DAM-3B-Video / trainer_state.json
richardaecn's picture
Upload folder using huggingface_hub
8479381 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.999393851926685,
"eval_steps": 500,
"global_step": 1082,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 3.0303030303030305e-06,
"loss": 1.4111,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 6.060606060606061e-06,
"loss": 1.4088,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 9.090909090909091e-06,
"loss": 1.4259,
"step": 3
},
{
"epoch": 0.0,
"learning_rate": 1.2121212121212122e-05,
"loss": 1.2825,
"step": 4
},
{
"epoch": 0.0,
"learning_rate": 1.5151515151515153e-05,
"loss": 1.1687,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 1.8181818181818182e-05,
"loss": 1.1495,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 2.1212121212121215e-05,
"loss": 1.1025,
"step": 7
},
{
"epoch": 0.01,
"learning_rate": 2.4242424242424244e-05,
"loss": 1.0862,
"step": 8
},
{
"epoch": 0.01,
"learning_rate": 2.7272727272727273e-05,
"loss": 1.0954,
"step": 9
},
{
"epoch": 0.01,
"learning_rate": 3.0303030303030306e-05,
"loss": 1.0793,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 3.3333333333333335e-05,
"loss": 1.0228,
"step": 11
},
{
"epoch": 0.01,
"learning_rate": 3.6363636363636364e-05,
"loss": 1.0025,
"step": 12
},
{
"epoch": 0.01,
"learning_rate": 3.939393939393939e-05,
"loss": 1.0066,
"step": 13
},
{
"epoch": 0.01,
"learning_rate": 4.242424242424243e-05,
"loss": 0.9937,
"step": 14
},
{
"epoch": 0.01,
"learning_rate": 4.545454545454546e-05,
"loss": 0.9832,
"step": 15
},
{
"epoch": 0.01,
"learning_rate": 4.848484848484849e-05,
"loss": 0.9644,
"step": 16
},
{
"epoch": 0.02,
"learning_rate": 5.151515151515152e-05,
"loss": 0.9457,
"step": 17
},
{
"epoch": 0.02,
"learning_rate": 5.4545454545454546e-05,
"loss": 0.9393,
"step": 18
},
{
"epoch": 0.02,
"learning_rate": 5.757575757575758e-05,
"loss": 0.9225,
"step": 19
},
{
"epoch": 0.02,
"learning_rate": 6.060606060606061e-05,
"loss": 0.9185,
"step": 20
},
{
"epoch": 0.02,
"learning_rate": 6.363636363636364e-05,
"loss": 0.8952,
"step": 21
},
{
"epoch": 0.02,
"learning_rate": 6.666666666666667e-05,
"loss": 0.8983,
"step": 22
},
{
"epoch": 0.02,
"learning_rate": 6.96969696969697e-05,
"loss": 0.8719,
"step": 23
},
{
"epoch": 0.02,
"learning_rate": 7.272727272727273e-05,
"loss": 0.8739,
"step": 24
},
{
"epoch": 0.02,
"learning_rate": 7.575757575757576e-05,
"loss": 0.8676,
"step": 25
},
{
"epoch": 0.02,
"learning_rate": 7.878787878787879e-05,
"loss": 0.8802,
"step": 26
},
{
"epoch": 0.02,
"learning_rate": 8.181818181818183e-05,
"loss": 0.844,
"step": 27
},
{
"epoch": 0.03,
"learning_rate": 8.484848484848486e-05,
"loss": 0.8388,
"step": 28
},
{
"epoch": 0.03,
"learning_rate": 8.787878787878789e-05,
"loss": 0.8302,
"step": 29
},
{
"epoch": 0.03,
"learning_rate": 9.090909090909092e-05,
"loss": 0.8346,
"step": 30
},
{
"epoch": 0.03,
"learning_rate": 9.393939393939395e-05,
"loss": 0.8229,
"step": 31
},
{
"epoch": 0.03,
"learning_rate": 9.696969696969698e-05,
"loss": 0.8326,
"step": 32
},
{
"epoch": 0.03,
"learning_rate": 0.0001,
"loss": 0.8119,
"step": 33
},
{
"epoch": 0.03,
"learning_rate": 9.999977577271777e-05,
"loss": 0.8319,
"step": 34
},
{
"epoch": 0.03,
"learning_rate": 9.999910309288213e-05,
"loss": 0.8102,
"step": 35
},
{
"epoch": 0.03,
"learning_rate": 9.999798196652648e-05,
"loss": 0.8123,
"step": 36
},
{
"epoch": 0.03,
"learning_rate": 9.999641240370627e-05,
"loss": 0.7999,
"step": 37
},
{
"epoch": 0.04,
"learning_rate": 9.999439441849905e-05,
"loss": 0.7996,
"step": 38
},
{
"epoch": 0.04,
"learning_rate": 9.99919280290043e-05,
"loss": 0.7882,
"step": 39
},
{
"epoch": 0.04,
"learning_rate": 9.998901325734332e-05,
"loss": 0.8064,
"step": 40
},
{
"epoch": 0.04,
"learning_rate": 9.998565012965894e-05,
"loss": 0.8031,
"step": 41
},
{
"epoch": 0.04,
"learning_rate": 9.998183867611539e-05,
"loss": 0.7951,
"step": 42
},
{
"epoch": 0.04,
"learning_rate": 9.997757893089791e-05,
"loss": 0.7767,
"step": 43
},
{
"epoch": 0.04,
"learning_rate": 9.997287093221256e-05,
"loss": 0.7852,
"step": 44
},
{
"epoch": 0.04,
"learning_rate": 9.996771472228583e-05,
"loss": 0.7871,
"step": 45
},
{
"epoch": 0.04,
"learning_rate": 9.99621103473642e-05,
"loss": 0.7844,
"step": 46
},
{
"epoch": 0.04,
"learning_rate": 9.995605785771385e-05,
"loss": 0.763,
"step": 47
},
{
"epoch": 0.04,
"learning_rate": 9.994955730762008e-05,
"loss": 0.7542,
"step": 48
},
{
"epoch": 0.05,
"learning_rate": 9.994260875538695e-05,
"loss": 0.7798,
"step": 49
},
{
"epoch": 0.05,
"learning_rate": 9.993521226333664e-05,
"loss": 0.7791,
"step": 50
},
{
"epoch": 0.05,
"learning_rate": 9.992736789780897e-05,
"loss": 0.7589,
"step": 51
},
{
"epoch": 0.05,
"learning_rate": 9.991907572916077e-05,
"loss": 0.7758,
"step": 52
},
{
"epoch": 0.05,
"learning_rate": 9.991033583176525e-05,
"loss": 0.7522,
"step": 53
},
{
"epoch": 0.05,
"learning_rate": 9.990114828401136e-05,
"loss": 0.7723,
"step": 54
},
{
"epoch": 0.05,
"learning_rate": 9.989151316830305e-05,
"loss": 0.7639,
"step": 55
},
{
"epoch": 0.05,
"learning_rate": 9.988143057105854e-05,
"loss": 0.736,
"step": 56
},
{
"epoch": 0.05,
"learning_rate": 9.98709005827096e-05,
"loss": 0.7204,
"step": 57
},
{
"epoch": 0.05,
"learning_rate": 9.98599232977006e-05,
"loss": 0.7051,
"step": 58
},
{
"epoch": 0.05,
"learning_rate": 9.984849881448788e-05,
"loss": 0.6943,
"step": 59
},
{
"epoch": 0.06,
"learning_rate": 9.983662723553861e-05,
"loss": 0.7135,
"step": 60
},
{
"epoch": 0.06,
"learning_rate": 9.98243086673301e-05,
"loss": 0.7104,
"step": 61
},
{
"epoch": 0.06,
"learning_rate": 9.981154322034872e-05,
"loss": 0.7186,
"step": 62
},
{
"epoch": 0.06,
"learning_rate": 9.979833100908888e-05,
"loss": 0.7205,
"step": 63
},
{
"epoch": 0.06,
"learning_rate": 9.978467215205219e-05,
"loss": 0.7158,
"step": 64
},
{
"epoch": 0.06,
"learning_rate": 9.97705667717461e-05,
"loss": 0.7136,
"step": 65
},
{
"epoch": 0.06,
"learning_rate": 9.97560149946831e-05,
"loss": 0.7118,
"step": 66
},
{
"epoch": 0.06,
"learning_rate": 9.974101695137942e-05,
"loss": 0.7124,
"step": 67
},
{
"epoch": 0.06,
"learning_rate": 9.972557277635384e-05,
"loss": 0.7115,
"step": 68
},
{
"epoch": 0.06,
"learning_rate": 9.97096826081266e-05,
"loss": 0.6996,
"step": 69
},
{
"epoch": 0.06,
"learning_rate": 9.969334658921807e-05,
"loss": 0.7074,
"step": 70
},
{
"epoch": 0.07,
"learning_rate": 9.96765648661475e-05,
"loss": 0.689,
"step": 71
},
{
"epoch": 0.07,
"learning_rate": 9.965933758943166e-05,
"loss": 0.6996,
"step": 72
},
{
"epoch": 0.07,
"learning_rate": 9.96416649135836e-05,
"loss": 0.6892,
"step": 73
},
{
"epoch": 0.07,
"learning_rate": 9.962354699711116e-05,
"loss": 0.6806,
"step": 74
},
{
"epoch": 0.07,
"learning_rate": 9.96049840025156e-05,
"loss": 0.6776,
"step": 75
},
{
"epoch": 0.07,
"learning_rate": 9.958597609629006e-05,
"loss": 0.6646,
"step": 76
},
{
"epoch": 0.07,
"learning_rate": 9.956652344891823e-05,
"loss": 0.6716,
"step": 77
},
{
"epoch": 0.07,
"learning_rate": 9.95466262348727e-05,
"loss": 0.679,
"step": 78
},
{
"epoch": 0.07,
"learning_rate": 9.952628463261336e-05,
"loss": 0.6714,
"step": 79
},
{
"epoch": 0.07,
"learning_rate": 9.95054988245859e-05,
"loss": 0.6647,
"step": 80
},
{
"epoch": 0.07,
"learning_rate": 9.948426899722014e-05,
"loss": 0.6598,
"step": 81
},
{
"epoch": 0.08,
"learning_rate": 9.946259534092835e-05,
"loss": 0.6815,
"step": 82
},
{
"epoch": 0.08,
"learning_rate": 9.94404780501035e-05,
"loss": 0.6503,
"step": 83
},
{
"epoch": 0.08,
"learning_rate": 9.941791732311763e-05,
"loss": 0.6716,
"step": 84
},
{
"epoch": 0.08,
"learning_rate": 9.939491336231994e-05,
"loss": 0.6556,
"step": 85
},
{
"epoch": 0.08,
"learning_rate": 9.937146637403505e-05,
"loss": 0.665,
"step": 86
},
{
"epoch": 0.08,
"learning_rate": 9.934757656856115e-05,
"loss": 0.6678,
"step": 87
},
{
"epoch": 0.08,
"learning_rate": 9.932324416016806e-05,
"loss": 0.6756,
"step": 88
},
{
"epoch": 0.08,
"learning_rate": 9.929846936709541e-05,
"loss": 0.6437,
"step": 89
},
{
"epoch": 0.08,
"learning_rate": 9.927325241155056e-05,
"loss": 0.6337,
"step": 90
},
{
"epoch": 0.08,
"learning_rate": 9.924759351970668e-05,
"loss": 0.626,
"step": 91
},
{
"epoch": 0.08,
"learning_rate": 9.922149292170073e-05,
"loss": 0.621,
"step": 92
},
{
"epoch": 0.09,
"learning_rate": 9.919495085163136e-05,
"loss": 0.6306,
"step": 93
},
{
"epoch": 0.09,
"learning_rate": 9.91679675475568e-05,
"loss": 0.6447,
"step": 94
},
{
"epoch": 0.09,
"learning_rate": 9.914054325149277e-05,
"loss": 0.6375,
"step": 95
},
{
"epoch": 0.09,
"learning_rate": 9.911267820941029e-05,
"loss": 0.6367,
"step": 96
},
{
"epoch": 0.09,
"learning_rate": 9.908437267123347e-05,
"loss": 0.6308,
"step": 97
},
{
"epoch": 0.09,
"learning_rate": 9.905562689083726e-05,
"loss": 0.6227,
"step": 98
},
{
"epoch": 0.09,
"learning_rate": 9.90264411260452e-05,
"loss": 0.6529,
"step": 99
},
{
"epoch": 0.09,
"learning_rate": 9.899681563862706e-05,
"loss": 0.6274,
"step": 100
},
{
"epoch": 0.09,
"learning_rate": 9.896675069429656e-05,
"loss": 0.6419,
"step": 101
},
{
"epoch": 0.09,
"learning_rate": 9.893624656270892e-05,
"loss": 0.6284,
"step": 102
},
{
"epoch": 0.1,
"learning_rate": 9.890530351745848e-05,
"loss": 0.6392,
"step": 103
},
{
"epoch": 0.1,
"learning_rate": 9.887392183607625e-05,
"loss": 0.6323,
"step": 104
},
{
"epoch": 0.1,
"learning_rate": 9.884210180002737e-05,
"loss": 0.6418,
"step": 105
},
{
"epoch": 0.1,
"learning_rate": 9.880984369470867e-05,
"loss": 0.6336,
"step": 106
},
{
"epoch": 0.1,
"learning_rate": 9.877714780944602e-05,
"loss": 0.6291,
"step": 107
},
{
"epoch": 0.1,
"learning_rate": 9.874401443749184e-05,
"loss": 0.6339,
"step": 108
},
{
"epoch": 0.1,
"learning_rate": 9.871044387602233e-05,
"loss": 0.6252,
"step": 109
},
{
"epoch": 0.1,
"learning_rate": 9.867643642613494e-05,
"loss": 0.6406,
"step": 110
},
{
"epoch": 0.1,
"learning_rate": 9.864199239284557e-05,
"loss": 0.6196,
"step": 111
},
{
"epoch": 0.1,
"learning_rate": 9.860711208508592e-05,
"loss": 0.6122,
"step": 112
},
{
"epoch": 0.1,
"learning_rate": 9.857179581570067e-05,
"loss": 0.6302,
"step": 113
},
{
"epoch": 0.11,
"learning_rate": 9.853604390144462e-05,
"loss": 0.6285,
"step": 114
},
{
"epoch": 0.11,
"learning_rate": 9.849985666297998e-05,
"loss": 0.6336,
"step": 115
},
{
"epoch": 0.11,
"learning_rate": 9.84632344248734e-05,
"loss": 0.644,
"step": 116
},
{
"epoch": 0.11,
"learning_rate": 9.842617751559307e-05,
"loss": 0.6198,
"step": 117
},
{
"epoch": 0.11,
"learning_rate": 9.838868626750578e-05,
"loss": 0.6061,
"step": 118
},
{
"epoch": 0.11,
"learning_rate": 9.835076101687397e-05,
"loss": 0.6307,
"step": 119
},
{
"epoch": 0.11,
"learning_rate": 9.831240210385266e-05,
"loss": 0.6213,
"step": 120
},
{
"epoch": 0.11,
"learning_rate": 9.827360987248647e-05,
"loss": 0.635,
"step": 121
},
{
"epoch": 0.11,
"learning_rate": 9.823438467070644e-05,
"loss": 0.6314,
"step": 122
},
{
"epoch": 0.11,
"learning_rate": 9.8194726850327e-05,
"loss": 0.6183,
"step": 123
},
{
"epoch": 0.11,
"learning_rate": 9.815463676704276e-05,
"loss": 0.6222,
"step": 124
},
{
"epoch": 0.12,
"learning_rate": 9.811411478042533e-05,
"loss": 0.6257,
"step": 125
},
{
"epoch": 0.12,
"learning_rate": 9.807316125392011e-05,
"loss": 0.6114,
"step": 126
},
{
"epoch": 0.12,
"learning_rate": 9.803177655484301e-05,
"loss": 0.6194,
"step": 127
},
{
"epoch": 0.12,
"learning_rate": 9.798996105437719e-05,
"loss": 0.6277,
"step": 128
},
{
"epoch": 0.12,
"learning_rate": 9.794771512756969e-05,
"loss": 0.6101,
"step": 129
},
{
"epoch": 0.12,
"learning_rate": 9.790503915332807e-05,
"loss": 0.5987,
"step": 130
},
{
"epoch": 0.12,
"learning_rate": 9.786193351441704e-05,
"loss": 0.6072,
"step": 131
},
{
"epoch": 0.12,
"learning_rate": 9.781839859745501e-05,
"loss": 0.6165,
"step": 132
},
{
"epoch": 0.12,
"learning_rate": 9.777443479291065e-05,
"loss": 0.6119,
"step": 133
},
{
"epoch": 0.12,
"learning_rate": 9.773004249509932e-05,
"loss": 0.6143,
"step": 134
},
{
"epoch": 0.12,
"learning_rate": 9.768522210217957e-05,
"loss": 0.6274,
"step": 135
},
{
"epoch": 0.13,
"learning_rate": 9.763997401614963e-05,
"loss": 0.6118,
"step": 136
},
{
"epoch": 0.13,
"learning_rate": 9.759429864284368e-05,
"loss": 0.615,
"step": 137
},
{
"epoch": 0.13,
"learning_rate": 9.754819639192836e-05,
"loss": 0.6213,
"step": 138
},
{
"epoch": 0.13,
"learning_rate": 9.750166767689892e-05,
"loss": 0.6069,
"step": 139
},
{
"epoch": 0.13,
"learning_rate": 9.745471291507567e-05,
"loss": 0.6065,
"step": 140
},
{
"epoch": 0.13,
"learning_rate": 9.740733252760017e-05,
"loss": 0.618,
"step": 141
},
{
"epoch": 0.13,
"learning_rate": 9.735952693943141e-05,
"loss": 0.6047,
"step": 142
},
{
"epoch": 0.13,
"learning_rate": 9.73112965793421e-05,
"loss": 0.6107,
"step": 143
},
{
"epoch": 0.13,
"learning_rate": 9.726264187991474e-05,
"loss": 0.6158,
"step": 144
},
{
"epoch": 0.13,
"learning_rate": 9.721356327753776e-05,
"loss": 0.6255,
"step": 145
},
{
"epoch": 0.13,
"learning_rate": 9.716406121240164e-05,
"loss": 0.6428,
"step": 146
},
{
"epoch": 0.14,
"learning_rate": 9.711413612849489e-05,
"loss": 0.6373,
"step": 147
},
{
"epoch": 0.14,
"learning_rate": 9.706378847360019e-05,
"loss": 0.6498,
"step": 148
},
{
"epoch": 0.14,
"learning_rate": 9.701301869929023e-05,
"loss": 0.6317,
"step": 149
},
{
"epoch": 0.14,
"learning_rate": 9.696182726092373e-05,
"loss": 0.6424,
"step": 150
},
{
"epoch": 0.14,
"learning_rate": 9.69102146176414e-05,
"loss": 0.6433,
"step": 151
},
{
"epoch": 0.14,
"learning_rate": 9.685818123236177e-05,
"loss": 0.6484,
"step": 152
},
{
"epoch": 0.14,
"learning_rate": 9.680572757177697e-05,
"loss": 0.6384,
"step": 153
},
{
"epoch": 0.14,
"learning_rate": 9.67528541063487e-05,
"loss": 0.6393,
"step": 154
},
{
"epoch": 0.14,
"learning_rate": 9.66995613103039e-05,
"loss": 0.6379,
"step": 155
},
{
"epoch": 0.14,
"learning_rate": 9.664584966163052e-05,
"loss": 0.6328,
"step": 156
},
{
"epoch": 0.15,
"learning_rate": 9.659171964207322e-05,
"loss": 0.63,
"step": 157
},
{
"epoch": 0.15,
"learning_rate": 9.653717173712913e-05,
"loss": 0.6499,
"step": 158
},
{
"epoch": 0.15,
"learning_rate": 9.648220643604335e-05,
"loss": 0.633,
"step": 159
},
{
"epoch": 0.15,
"learning_rate": 9.642682423180472e-05,
"loss": 0.6368,
"step": 160
},
{
"epoch": 0.15,
"learning_rate": 9.637102562114122e-05,
"loss": 0.6327,
"step": 161
},
{
"epoch": 0.15,
"learning_rate": 9.631481110451576e-05,
"loss": 0.6505,
"step": 162
},
{
"epoch": 0.15,
"learning_rate": 9.625818118612142e-05,
"loss": 0.6724,
"step": 163
},
{
"epoch": 0.15,
"learning_rate": 9.620113637387713e-05,
"loss": 0.6653,
"step": 164
},
{
"epoch": 0.15,
"learning_rate": 9.614367717942303e-05,
"loss": 0.6532,
"step": 165
},
{
"epoch": 0.15,
"learning_rate": 9.608580411811584e-05,
"loss": 0.6593,
"step": 166
},
{
"epoch": 0.15,
"learning_rate": 9.602751770902437e-05,
"loss": 0.6688,
"step": 167
},
{
"epoch": 0.16,
"learning_rate": 9.596881847492472e-05,
"loss": 0.6568,
"step": 168
},
{
"epoch": 0.16,
"learning_rate": 9.590970694229568e-05,
"loss": 0.674,
"step": 169
},
{
"epoch": 0.16,
"learning_rate": 9.585018364131401e-05,
"loss": 0.6752,
"step": 170
},
{
"epoch": 0.16,
"learning_rate": 9.57902491058496e-05,
"loss": 0.6634,
"step": 171
},
{
"epoch": 0.16,
"learning_rate": 9.572990387346078e-05,
"loss": 0.655,
"step": 172
},
{
"epoch": 0.16,
"learning_rate": 9.566914848538945e-05,
"loss": 0.6588,
"step": 173
},
{
"epoch": 0.16,
"learning_rate": 9.560798348655623e-05,
"loss": 0.6537,
"step": 174
},
{
"epoch": 0.16,
"learning_rate": 9.554640942555558e-05,
"loss": 0.6639,
"step": 175
},
{
"epoch": 0.16,
"learning_rate": 9.548442685465086e-05,
"loss": 0.6665,
"step": 176
},
{
"epoch": 0.16,
"learning_rate": 9.542203632976944e-05,
"loss": 0.6636,
"step": 177
},
{
"epoch": 0.16,
"learning_rate": 9.535923841049761e-05,
"loss": 0.6582,
"step": 178
},
{
"epoch": 0.17,
"learning_rate": 9.529603366007564e-05,
"loss": 0.664,
"step": 179
},
{
"epoch": 0.17,
"learning_rate": 9.523242264539271e-05,
"loss": 0.6828,
"step": 180
},
{
"epoch": 0.17,
"learning_rate": 9.516840593698182e-05,
"loss": 0.6801,
"step": 181
},
{
"epoch": 0.17,
"learning_rate": 9.510398410901467e-05,
"loss": 0.6727,
"step": 182
},
{
"epoch": 0.17,
"learning_rate": 9.50391577392965e-05,
"loss": 0.6786,
"step": 183
},
{
"epoch": 0.17,
"learning_rate": 9.497392740926096e-05,
"loss": 0.6708,
"step": 184
},
{
"epoch": 0.17,
"learning_rate": 9.490829370396484e-05,
"loss": 0.6707,
"step": 185
},
{
"epoch": 0.17,
"learning_rate": 9.484225721208282e-05,
"loss": 0.6789,
"step": 186
},
{
"epoch": 0.17,
"learning_rate": 9.477581852590222e-05,
"loss": 0.6813,
"step": 187
},
{
"epoch": 0.17,
"learning_rate": 9.47089782413177e-05,
"loss": 0.672,
"step": 188
},
{
"epoch": 0.17,
"learning_rate": 9.464173695782586e-05,
"loss": 0.6745,
"step": 189
},
{
"epoch": 0.18,
"learning_rate": 9.457409527851993e-05,
"loss": 0.6569,
"step": 190
},
{
"epoch": 0.18,
"learning_rate": 9.450605381008426e-05,
"loss": 0.6736,
"step": 191
},
{
"epoch": 0.18,
"learning_rate": 9.443761316278903e-05,
"loss": 0.6636,
"step": 192
},
{
"epoch": 0.18,
"learning_rate": 9.436877395048466e-05,
"loss": 0.6583,
"step": 193
},
{
"epoch": 0.18,
"learning_rate": 9.429953679059629e-05,
"loss": 0.6934,
"step": 194
},
{
"epoch": 0.18,
"learning_rate": 9.422990230411838e-05,
"loss": 0.6681,
"step": 195
},
{
"epoch": 0.18,
"learning_rate": 9.415987111560896e-05,
"loss": 0.6742,
"step": 196
},
{
"epoch": 0.18,
"learning_rate": 9.408944385318415e-05,
"loss": 0.6823,
"step": 197
},
{
"epoch": 0.18,
"learning_rate": 9.401862114851251e-05,
"loss": 0.6752,
"step": 198
},
{
"epoch": 0.18,
"learning_rate": 9.394740363680936e-05,
"loss": 0.6696,
"step": 199
},
{
"epoch": 0.18,
"learning_rate": 9.387579195683102e-05,
"loss": 0.67,
"step": 200
},
{
"epoch": 0.19,
"learning_rate": 9.380378675086923e-05,
"loss": 0.659,
"step": 201
},
{
"epoch": 0.19,
"learning_rate": 9.373138866474522e-05,
"loss": 0.6805,
"step": 202
},
{
"epoch": 0.19,
"learning_rate": 9.365859834780406e-05,
"loss": 0.6995,
"step": 203
},
{
"epoch": 0.19,
"learning_rate": 9.358541645290873e-05,
"loss": 0.6751,
"step": 204
},
{
"epoch": 0.19,
"learning_rate": 9.351184363643434e-05,
"loss": 0.6714,
"step": 205
},
{
"epoch": 0.19,
"learning_rate": 9.343788055826219e-05,
"loss": 0.6618,
"step": 206
},
{
"epoch": 0.19,
"learning_rate": 9.336352788177388e-05,
"loss": 0.6749,
"step": 207
},
{
"epoch": 0.19,
"learning_rate": 9.328878627384537e-05,
"loss": 0.6693,
"step": 208
},
{
"epoch": 0.19,
"learning_rate": 9.321365640484093e-05,
"loss": 0.6671,
"step": 209
},
{
"epoch": 0.19,
"learning_rate": 9.313813894860722e-05,
"loss": 0.6637,
"step": 210
},
{
"epoch": 0.19,
"learning_rate": 9.306223458246722e-05,
"loss": 0.6533,
"step": 211
},
{
"epoch": 0.2,
"learning_rate": 9.298594398721411e-05,
"loss": 0.671,
"step": 212
},
{
"epoch": 0.2,
"learning_rate": 9.290926784710521e-05,
"loss": 0.6727,
"step": 213
},
{
"epoch": 0.2,
"learning_rate": 9.283220684985583e-05,
"loss": 0.6527,
"step": 214
},
{
"epoch": 0.2,
"learning_rate": 9.275476168663304e-05,
"loss": 0.6466,
"step": 215
},
{
"epoch": 0.2,
"learning_rate": 9.267693305204964e-05,
"loss": 0.6728,
"step": 216
},
{
"epoch": 0.2,
"learning_rate": 9.259872164415772e-05,
"loss": 0.6576,
"step": 217
},
{
"epoch": 0.2,
"learning_rate": 9.252012816444255e-05,
"loss": 0.6572,
"step": 218
},
{
"epoch": 0.2,
"learning_rate": 9.244115331781621e-05,
"loss": 0.6757,
"step": 219
},
{
"epoch": 0.2,
"learning_rate": 9.236179781261134e-05,
"loss": 0.6478,
"step": 220
},
{
"epoch": 0.2,
"learning_rate": 9.228206236057468e-05,
"loss": 0.6457,
"step": 221
},
{
"epoch": 0.21,
"learning_rate": 9.22019476768608e-05,
"loss": 0.6497,
"step": 222
},
{
"epoch": 0.21,
"learning_rate": 9.212145448002559e-05,
"loss": 0.6591,
"step": 223
},
{
"epoch": 0.21,
"learning_rate": 9.20405834920199e-05,
"loss": 0.6543,
"step": 224
},
{
"epoch": 0.21,
"learning_rate": 9.195933543818299e-05,
"loss": 0.6424,
"step": 225
},
{
"epoch": 0.21,
"learning_rate": 9.187771104723608e-05,
"loss": 0.6433,
"step": 226
},
{
"epoch": 0.21,
"learning_rate": 9.179571105127577e-05,
"loss": 0.6407,
"step": 227
},
{
"epoch": 0.21,
"learning_rate": 9.171333618576755e-05,
"loss": 0.6592,
"step": 228
},
{
"epoch": 0.21,
"learning_rate": 9.163058718953907e-05,
"loss": 0.6607,
"step": 229
},
{
"epoch": 0.21,
"learning_rate": 9.154746480477365e-05,
"loss": 0.6421,
"step": 230
},
{
"epoch": 0.21,
"learning_rate": 9.146396977700351e-05,
"loss": 0.6528,
"step": 231
},
{
"epoch": 0.21,
"learning_rate": 9.138010285510323e-05,
"loss": 0.6537,
"step": 232
},
{
"epoch": 0.22,
"learning_rate": 9.129586479128287e-05,
"loss": 0.6534,
"step": 233
},
{
"epoch": 0.22,
"learning_rate": 9.12112563410813e-05,
"loss": 0.6429,
"step": 234
},
{
"epoch": 0.22,
"learning_rate": 9.112627826335944e-05,
"loss": 0.6499,
"step": 235
},
{
"epoch": 0.22,
"learning_rate": 9.104093132029344e-05,
"loss": 0.6343,
"step": 236
},
{
"epoch": 0.22,
"learning_rate": 9.09552162773678e-05,
"loss": 0.6423,
"step": 237
},
{
"epoch": 0.22,
"learning_rate": 9.086913390336858e-05,
"loss": 0.6444,
"step": 238
},
{
"epoch": 0.22,
"learning_rate": 9.078268497037644e-05,
"loss": 0.6569,
"step": 239
},
{
"epoch": 0.22,
"learning_rate": 9.069587025375979e-05,
"loss": 0.6413,
"step": 240
},
{
"epoch": 0.22,
"learning_rate": 9.060869053216768e-05,
"loss": 0.6299,
"step": 241
},
{
"epoch": 0.22,
"learning_rate": 9.052114658752307e-05,
"loss": 0.6429,
"step": 242
},
{
"epoch": 0.22,
"learning_rate": 9.043323920501552e-05,
"loss": 0.6266,
"step": 243
},
{
"epoch": 0.23,
"learning_rate": 9.034496917309441e-05,
"loss": 0.6308,
"step": 244
},
{
"epoch": 0.23,
"learning_rate": 9.025633728346172e-05,
"loss": 0.6365,
"step": 245
},
{
"epoch": 0.23,
"learning_rate": 9.016734433106493e-05,
"loss": 0.6367,
"step": 246
},
{
"epoch": 0.23,
"learning_rate": 9.007799111408998e-05,
"loss": 0.6464,
"step": 247
},
{
"epoch": 0.23,
"learning_rate": 8.998827843395401e-05,
"loss": 0.6452,
"step": 248
},
{
"epoch": 0.23,
"learning_rate": 8.989820709529824e-05,
"loss": 0.6459,
"step": 249
},
{
"epoch": 0.23,
"learning_rate": 8.980777790598075e-05,
"loss": 0.6437,
"step": 250
},
{
"epoch": 0.23,
"learning_rate": 8.971699167706918e-05,
"loss": 0.6417,
"step": 251
},
{
"epoch": 0.23,
"learning_rate": 8.96258492228335e-05,
"loss": 0.6664,
"step": 252
},
{
"epoch": 0.23,
"learning_rate": 8.953435136073871e-05,
"loss": 0.6706,
"step": 253
},
{
"epoch": 0.23,
"learning_rate": 8.944249891143747e-05,
"loss": 0.6718,
"step": 254
},
{
"epoch": 0.24,
"learning_rate": 8.935029269876283e-05,
"loss": 0.6442,
"step": 255
},
{
"epoch": 0.24,
"learning_rate": 8.925773354972067e-05,
"loss": 0.6582,
"step": 256
},
{
"epoch": 0.24,
"learning_rate": 8.916482229448248e-05,
"loss": 0.6679,
"step": 257
},
{
"epoch": 0.24,
"learning_rate": 8.90715597663778e-05,
"loss": 0.6734,
"step": 258
},
{
"epoch": 0.24,
"learning_rate": 8.897794680188673e-05,
"loss": 0.6712,
"step": 259
},
{
"epoch": 0.24,
"learning_rate": 8.888398424063251e-05,
"loss": 0.6541,
"step": 260
},
{
"epoch": 0.24,
"learning_rate": 8.878967292537392e-05,
"loss": 0.6546,
"step": 261
},
{
"epoch": 0.24,
"learning_rate": 8.869501370199778e-05,
"loss": 0.6629,
"step": 262
},
{
"epoch": 0.24,
"learning_rate": 8.860000741951128e-05,
"loss": 0.6616,
"step": 263
},
{
"epoch": 0.24,
"learning_rate": 8.850465493003445e-05,
"loss": 0.6514,
"step": 264
},
{
"epoch": 0.24,
"learning_rate": 8.840895708879248e-05,
"loss": 0.6472,
"step": 265
},
{
"epoch": 0.25,
"learning_rate": 8.831291475410804e-05,
"loss": 0.6556,
"step": 266
},
{
"epoch": 0.25,
"learning_rate": 8.821652878739358e-05,
"loss": 0.6594,
"step": 267
},
{
"epoch": 0.25,
"learning_rate": 8.811980005314365e-05,
"loss": 0.6737,
"step": 268
},
{
"epoch": 0.25,
"learning_rate": 8.80227294189271e-05,
"loss": 0.6902,
"step": 269
},
{
"epoch": 0.25,
"learning_rate": 8.792531775537931e-05,
"loss": 0.6981,
"step": 270
},
{
"epoch": 0.25,
"learning_rate": 8.782756593619439e-05,
"loss": 0.6835,
"step": 271
},
{
"epoch": 0.25,
"learning_rate": 8.772947483811731e-05,
"loss": 0.6888,
"step": 272
},
{
"epoch": 0.25,
"learning_rate": 8.763104534093609e-05,
"loss": 0.6964,
"step": 273
},
{
"epoch": 0.25,
"learning_rate": 8.753227832747388e-05,
"loss": 0.6817,
"step": 274
},
{
"epoch": 0.25,
"learning_rate": 8.743317468358105e-05,
"loss": 0.696,
"step": 275
},
{
"epoch": 0.25,
"learning_rate": 8.733373529812722e-05,
"loss": 0.6844,
"step": 276
},
{
"epoch": 0.26,
"learning_rate": 8.72339610629933e-05,
"loss": 0.6964,
"step": 277
},
{
"epoch": 0.26,
"learning_rate": 8.713385287306355e-05,
"loss": 0.6932,
"step": 278
},
{
"epoch": 0.26,
"learning_rate": 8.703341162621742e-05,
"loss": 0.6869,
"step": 279
},
{
"epoch": 0.26,
"learning_rate": 8.693263822332165e-05,
"loss": 0.6806,
"step": 280
},
{
"epoch": 0.26,
"learning_rate": 8.683153356822209e-05,
"loss": 0.6953,
"step": 281
},
{
"epoch": 0.26,
"learning_rate": 8.673009856773562e-05,
"loss": 0.678,
"step": 282
},
{
"epoch": 0.26,
"learning_rate": 8.6628334131642e-05,
"loss": 0.685,
"step": 283
},
{
"epoch": 0.26,
"learning_rate": 8.652624117267579e-05,
"loss": 0.6822,
"step": 284
},
{
"epoch": 0.26,
"learning_rate": 8.642382060651801e-05,
"loss": 0.7075,
"step": 285
},
{
"epoch": 0.26,
"learning_rate": 8.632107335178808e-05,
"loss": 0.7538,
"step": 286
},
{
"epoch": 0.27,
"learning_rate": 8.621800033003553e-05,
"loss": 0.7471,
"step": 287
},
{
"epoch": 0.27,
"learning_rate": 8.611460246573169e-05,
"loss": 0.7436,
"step": 288
},
{
"epoch": 0.27,
"learning_rate": 8.601088068626144e-05,
"loss": 0.7571,
"step": 289
},
{
"epoch": 0.27,
"learning_rate": 8.590683592191489e-05,
"loss": 0.7464,
"step": 290
},
{
"epoch": 0.27,
"learning_rate": 8.580246910587904e-05,
"loss": 0.7489,
"step": 291
},
{
"epoch": 0.27,
"learning_rate": 8.569778117422938e-05,
"loss": 0.7465,
"step": 292
},
{
"epoch": 0.27,
"learning_rate": 8.559277306592152e-05,
"loss": 0.7496,
"step": 293
},
{
"epoch": 0.27,
"learning_rate": 8.548744572278278e-05,
"loss": 0.7501,
"step": 294
},
{
"epoch": 0.27,
"learning_rate": 8.538180008950371e-05,
"loss": 0.7422,
"step": 295
},
{
"epoch": 0.27,
"learning_rate": 8.527583711362965e-05,
"loss": 0.7527,
"step": 296
},
{
"epoch": 0.27,
"learning_rate": 8.51695577455522e-05,
"loss": 0.739,
"step": 297
},
{
"epoch": 0.28,
"learning_rate": 8.506296293850069e-05,
"loss": 0.7356,
"step": 298
},
{
"epoch": 0.28,
"learning_rate": 8.495605364853371e-05,
"loss": 0.7484,
"step": 299
},
{
"epoch": 0.28,
"learning_rate": 8.484883083453043e-05,
"loss": 0.7342,
"step": 300
},
{
"epoch": 0.28,
"learning_rate": 8.474129545818206e-05,
"loss": 0.7429,
"step": 301
},
{
"epoch": 0.28,
"learning_rate": 8.463344848398319e-05,
"loss": 0.7447,
"step": 302
},
{
"epoch": 0.28,
"learning_rate": 8.45252908792232e-05,
"loss": 0.7442,
"step": 303
},
{
"epoch": 0.28,
"learning_rate": 8.44168236139775e-05,
"loss": 0.7616,
"step": 304
},
{
"epoch": 0.28,
"learning_rate": 8.430804766109891e-05,
"loss": 0.7298,
"step": 305
},
{
"epoch": 0.28,
"learning_rate": 8.419896399620886e-05,
"loss": 0.734,
"step": 306
},
{
"epoch": 0.28,
"learning_rate": 8.408957359768874e-05,
"loss": 0.7494,
"step": 307
},
{
"epoch": 0.28,
"learning_rate": 8.397987744667098e-05,
"loss": 0.7343,
"step": 308
},
{
"epoch": 0.29,
"learning_rate": 8.38698765270304e-05,
"loss": 0.7425,
"step": 309
},
{
"epoch": 0.29,
"learning_rate": 8.375957182537525e-05,
"loss": 0.7395,
"step": 310
},
{
"epoch": 0.29,
"learning_rate": 8.364896433103852e-05,
"loss": 0.732,
"step": 311
},
{
"epoch": 0.29,
"learning_rate": 8.35380550360689e-05,
"loss": 0.7375,
"step": 312
},
{
"epoch": 0.29,
"learning_rate": 8.342684493522196e-05,
"loss": 0.7411,
"step": 313
},
{
"epoch": 0.29,
"learning_rate": 8.331533502595127e-05,
"loss": 0.7355,
"step": 314
},
{
"epoch": 0.29,
"learning_rate": 8.320352630839943e-05,
"loss": 0.7292,
"step": 315
},
{
"epoch": 0.29,
"learning_rate": 8.309141978538895e-05,
"loss": 0.7395,
"step": 316
},
{
"epoch": 0.29,
"learning_rate": 8.297901646241351e-05,
"loss": 0.7338,
"step": 317
},
{
"epoch": 0.29,
"learning_rate": 8.286631734762877e-05,
"loss": 0.735,
"step": 318
},
{
"epoch": 0.29,
"learning_rate": 8.27533234518434e-05,
"loss": 0.736,
"step": 319
},
{
"epoch": 0.3,
"learning_rate": 8.264003578850994e-05,
"loss": 0.723,
"step": 320
},
{
"epoch": 0.3,
"learning_rate": 8.252645537371579e-05,
"loss": 0.7357,
"step": 321
},
{
"epoch": 0.3,
"learning_rate": 8.241258322617407e-05,
"loss": 0.7316,
"step": 322
},
{
"epoch": 0.3,
"learning_rate": 8.229842036721444e-05,
"loss": 0.7175,
"step": 323
},
{
"epoch": 0.3,
"learning_rate": 8.218396782077404e-05,
"loss": 0.7426,
"step": 324
},
{
"epoch": 0.3,
"learning_rate": 8.206922661338819e-05,
"loss": 0.7367,
"step": 325
},
{
"epoch": 0.3,
"learning_rate": 8.195419777418125e-05,
"loss": 0.7383,
"step": 326
},
{
"epoch": 0.3,
"learning_rate": 8.183888233485738e-05,
"loss": 0.7364,
"step": 327
},
{
"epoch": 0.3,
"learning_rate": 8.172328132969129e-05,
"loss": 0.7366,
"step": 328
},
{
"epoch": 0.3,
"learning_rate": 8.160739579551894e-05,
"loss": 0.7129,
"step": 329
},
{
"epoch": 0.3,
"learning_rate": 8.149122677172828e-05,
"loss": 0.7361,
"step": 330
},
{
"epoch": 0.31,
"learning_rate": 8.137477530024986e-05,
"loss": 0.7423,
"step": 331
},
{
"epoch": 0.31,
"learning_rate": 8.12580424255476e-05,
"loss": 0.7302,
"step": 332
},
{
"epoch": 0.31,
"learning_rate": 8.114102919460927e-05,
"loss": 0.7277,
"step": 333
},
{
"epoch": 0.31,
"learning_rate": 8.102373665693725e-05,
"loss": 0.712,
"step": 334
},
{
"epoch": 0.31,
"learning_rate": 8.090616586453898e-05,
"loss": 0.7244,
"step": 335
},
{
"epoch": 0.31,
"learning_rate": 8.07883178719177e-05,
"loss": 0.7163,
"step": 336
},
{
"epoch": 0.31,
"learning_rate": 8.067019373606273e-05,
"loss": 0.7227,
"step": 337
},
{
"epoch": 0.31,
"learning_rate": 8.055179451644028e-05,
"loss": 0.7146,
"step": 338
},
{
"epoch": 0.31,
"learning_rate": 8.043312127498374e-05,
"loss": 0.7256,
"step": 339
},
{
"epoch": 0.31,
"learning_rate": 8.031417507608428e-05,
"loss": 0.7208,
"step": 340
},
{
"epoch": 0.31,
"learning_rate": 8.019495698658116e-05,
"loss": 0.724,
"step": 341
},
{
"epoch": 0.32,
"learning_rate": 8.007546807575236e-05,
"loss": 0.7394,
"step": 342
},
{
"epoch": 0.32,
"learning_rate": 7.99557094153048e-05,
"loss": 0.7209,
"step": 343
},
{
"epoch": 0.32,
"learning_rate": 7.983568207936484e-05,
"loss": 0.7108,
"step": 344
},
{
"epoch": 0.32,
"learning_rate": 7.971538714446863e-05,
"loss": 0.724,
"step": 345
},
{
"epoch": 0.32,
"learning_rate": 7.95948256895524e-05,
"loss": 0.7216,
"step": 346
},
{
"epoch": 0.32,
"learning_rate": 7.947399879594286e-05,
"loss": 0.7129,
"step": 347
},
{
"epoch": 0.32,
"learning_rate": 7.935290754734744e-05,
"loss": 0.7128,
"step": 348
},
{
"epoch": 0.32,
"learning_rate": 7.923155302984462e-05,
"loss": 0.7274,
"step": 349
},
{
"epoch": 0.32,
"learning_rate": 7.910993633187414e-05,
"loss": 0.7157,
"step": 350
},
{
"epoch": 0.32,
"learning_rate": 7.898805854422724e-05,
"loss": 0.7205,
"step": 351
},
{
"epoch": 0.33,
"learning_rate": 7.886592076003697e-05,
"loss": 0.7207,
"step": 352
},
{
"epoch": 0.33,
"learning_rate": 7.874352407476822e-05,
"loss": 0.7163,
"step": 353
},
{
"epoch": 0.33,
"learning_rate": 7.862086958620807e-05,
"loss": 0.7101,
"step": 354
},
{
"epoch": 0.33,
"learning_rate": 7.84979583944558e-05,
"loss": 0.7207,
"step": 355
},
{
"epoch": 0.33,
"learning_rate": 7.837479160191312e-05,
"loss": 0.7296,
"step": 356
},
{
"epoch": 0.33,
"learning_rate": 7.825137031327423e-05,
"loss": 0.7285,
"step": 357
},
{
"epoch": 0.33,
"learning_rate": 7.812769563551595e-05,
"loss": 0.6757,
"step": 358
},
{
"epoch": 0.33,
"learning_rate": 7.800376867788774e-05,
"loss": 0.6795,
"step": 359
},
{
"epoch": 0.33,
"learning_rate": 7.787959055190182e-05,
"loss": 0.6776,
"step": 360
},
{
"epoch": 0.33,
"learning_rate": 7.77551623713231e-05,
"loss": 0.6775,
"step": 361
},
{
"epoch": 0.33,
"learning_rate": 7.763048525215932e-05,
"loss": 0.6797,
"step": 362
},
{
"epoch": 0.34,
"learning_rate": 7.750556031265093e-05,
"loss": 0.676,
"step": 363
},
{
"epoch": 0.34,
"learning_rate": 7.738038867326112e-05,
"loss": 0.6711,
"step": 364
},
{
"epoch": 0.34,
"learning_rate": 7.725497145666576e-05,
"loss": 0.6819,
"step": 365
},
{
"epoch": 0.34,
"learning_rate": 7.712930978774328e-05,
"loss": 0.6657,
"step": 366
},
{
"epoch": 0.34,
"learning_rate": 7.700340479356471e-05,
"loss": 0.6712,
"step": 367
},
{
"epoch": 0.34,
"learning_rate": 7.687725760338342e-05,
"loss": 0.6684,
"step": 368
},
{
"epoch": 0.34,
"learning_rate": 7.675086934862505e-05,
"loss": 0.675,
"step": 369
},
{
"epoch": 0.34,
"learning_rate": 7.662424116287742e-05,
"loss": 0.6763,
"step": 370
},
{
"epoch": 0.34,
"learning_rate": 7.649737418188027e-05,
"loss": 0.6813,
"step": 371
},
{
"epoch": 0.34,
"learning_rate": 7.637026954351517e-05,
"loss": 0.6845,
"step": 372
},
{
"epoch": 0.34,
"learning_rate": 7.624292838779517e-05,
"loss": 0.6701,
"step": 373
},
{
"epoch": 0.35,
"learning_rate": 7.611535185685477e-05,
"loss": 0.6889,
"step": 374
},
{
"epoch": 0.35,
"learning_rate": 7.59875410949395e-05,
"loss": 0.671,
"step": 375
},
{
"epoch": 0.35,
"learning_rate": 7.585949724839575e-05,
"loss": 0.664,
"step": 376
},
{
"epoch": 0.35,
"learning_rate": 7.573122146566048e-05,
"loss": 0.6797,
"step": 377
},
{
"epoch": 0.35,
"learning_rate": 7.560271489725087e-05,
"loss": 0.6849,
"step": 378
},
{
"epoch": 0.35,
"learning_rate": 7.54739786957541e-05,
"loss": 0.6627,
"step": 379
},
{
"epoch": 0.35,
"learning_rate": 7.534501401581689e-05,
"loss": 0.6712,
"step": 380
},
{
"epoch": 0.35,
"learning_rate": 7.521582201413522e-05,
"loss": 0.6678,
"step": 381
},
{
"epoch": 0.35,
"learning_rate": 7.508640384944396e-05,
"loss": 0.6713,
"step": 382
},
{
"epoch": 0.35,
"learning_rate": 7.495676068250648e-05,
"loss": 0.6788,
"step": 383
},
{
"epoch": 0.35,
"learning_rate": 7.482689367610411e-05,
"loss": 0.6761,
"step": 384
},
{
"epoch": 0.36,
"learning_rate": 7.469680399502591e-05,
"loss": 0.6758,
"step": 385
},
{
"epoch": 0.36,
"learning_rate": 7.456649280605814e-05,
"loss": 0.6706,
"step": 386
},
{
"epoch": 0.36,
"learning_rate": 7.443596127797373e-05,
"loss": 0.6715,
"step": 387
},
{
"epoch": 0.36,
"learning_rate": 7.430521058152184e-05,
"loss": 0.6707,
"step": 388
},
{
"epoch": 0.36,
"learning_rate": 7.417424188941744e-05,
"loss": 0.6831,
"step": 389
},
{
"epoch": 0.36,
"learning_rate": 7.404305637633067e-05,
"loss": 0.6565,
"step": 390
},
{
"epoch": 0.36,
"learning_rate": 7.391165521887638e-05,
"loss": 0.6677,
"step": 391
},
{
"epoch": 0.36,
"learning_rate": 7.378003959560354e-05,
"loss": 0.6542,
"step": 392
},
{
"epoch": 0.36,
"learning_rate": 7.364821068698471e-05,
"loss": 0.661,
"step": 393
},
{
"epoch": 0.36,
"learning_rate": 7.351616967540536e-05,
"loss": 0.6734,
"step": 394
},
{
"epoch": 0.36,
"learning_rate": 7.338391774515343e-05,
"loss": 0.6838,
"step": 395
},
{
"epoch": 0.37,
"learning_rate": 7.325145608240853e-05,
"loss": 0.6657,
"step": 396
},
{
"epoch": 0.37,
"learning_rate": 7.31187858752314e-05,
"loss": 0.6735,
"step": 397
},
{
"epoch": 0.37,
"learning_rate": 7.298590831355326e-05,
"loss": 0.6663,
"step": 398
},
{
"epoch": 0.37,
"learning_rate": 7.285282458916506e-05,
"loss": 0.6571,
"step": 399
},
{
"epoch": 0.37,
"learning_rate": 7.27195358957069e-05,
"loss": 0.694,
"step": 400
},
{
"epoch": 0.37,
"learning_rate": 7.258604342865724e-05,
"loss": 0.698,
"step": 401
},
{
"epoch": 0.37,
"learning_rate": 7.245234838532218e-05,
"loss": 0.6896,
"step": 402
},
{
"epoch": 0.37,
"learning_rate": 7.231845196482479e-05,
"loss": 0.6811,
"step": 403
},
{
"epoch": 0.37,
"learning_rate": 7.21843553680943e-05,
"loss": 0.67,
"step": 404
},
{
"epoch": 0.37,
"learning_rate": 7.205005979785527e-05,
"loss": 0.6817,
"step": 405
},
{
"epoch": 0.38,
"learning_rate": 7.1915566458617e-05,
"loss": 0.6651,
"step": 406
},
{
"epoch": 0.38,
"learning_rate": 7.178087655666248e-05,
"loss": 0.6789,
"step": 407
},
{
"epoch": 0.38,
"learning_rate": 7.164599130003777e-05,
"loss": 0.672,
"step": 408
},
{
"epoch": 0.38,
"learning_rate": 7.151091189854102e-05,
"loss": 0.6641,
"step": 409
},
{
"epoch": 0.38,
"learning_rate": 7.137563956371172e-05,
"loss": 0.6544,
"step": 410
},
{
"epoch": 0.38,
"learning_rate": 7.124017550881981e-05,
"loss": 0.6581,
"step": 411
},
{
"epoch": 0.38,
"learning_rate": 7.110452094885476e-05,
"loss": 0.6748,
"step": 412
},
{
"epoch": 0.38,
"learning_rate": 7.096867710051467e-05,
"loss": 0.6823,
"step": 413
},
{
"epoch": 0.38,
"learning_rate": 7.083264518219547e-05,
"loss": 0.6719,
"step": 414
},
{
"epoch": 0.38,
"learning_rate": 7.06964264139798e-05,
"loss": 0.6708,
"step": 415
},
{
"epoch": 0.38,
"learning_rate": 7.056002201762626e-05,
"loss": 0.6801,
"step": 416
},
{
"epoch": 0.39,
"learning_rate": 7.042343321655833e-05,
"loss": 0.6702,
"step": 417
},
{
"epoch": 0.39,
"learning_rate": 7.028666123585342e-05,
"loss": 0.667,
"step": 418
},
{
"epoch": 0.39,
"learning_rate": 7.014970730223194e-05,
"loss": 0.6634,
"step": 419
},
{
"epoch": 0.39,
"learning_rate": 7.001257264404617e-05,
"loss": 0.6634,
"step": 420
},
{
"epoch": 0.39,
"learning_rate": 6.987525849126945e-05,
"loss": 0.6691,
"step": 421
},
{
"epoch": 0.39,
"learning_rate": 6.973776607548491e-05,
"loss": 0.6691,
"step": 422
},
{
"epoch": 0.39,
"learning_rate": 6.960009662987458e-05,
"loss": 0.6665,
"step": 423
},
{
"epoch": 0.39,
"learning_rate": 6.946225138920827e-05,
"loss": 0.6686,
"step": 424
},
{
"epoch": 0.39,
"learning_rate": 6.932423158983257e-05,
"loss": 0.6462,
"step": 425
},
{
"epoch": 0.39,
"learning_rate": 6.918603846965963e-05,
"loss": 0.6561,
"step": 426
},
{
"epoch": 0.39,
"learning_rate": 6.904767326815618e-05,
"loss": 0.6671,
"step": 427
},
{
"epoch": 0.4,
"learning_rate": 6.89091372263323e-05,
"loss": 0.6701,
"step": 428
},
{
"epoch": 0.4,
"learning_rate": 6.877043158673045e-05,
"loss": 0.6696,
"step": 429
},
{
"epoch": 0.4,
"learning_rate": 6.863155759341414e-05,
"loss": 0.6792,
"step": 430
},
{
"epoch": 0.4,
"learning_rate": 6.849251649195692e-05,
"loss": 0.6707,
"step": 431
},
{
"epoch": 0.4,
"learning_rate": 6.835330952943109e-05,
"loss": 0.645,
"step": 432
},
{
"epoch": 0.4,
"learning_rate": 6.821393795439663e-05,
"loss": 0.6575,
"step": 433
},
{
"epoch": 0.4,
"learning_rate": 6.807440301688989e-05,
"loss": 0.6561,
"step": 434
},
{
"epoch": 0.4,
"learning_rate": 6.793470596841253e-05,
"loss": 0.6735,
"step": 435
},
{
"epoch": 0.4,
"learning_rate": 6.779484806192004e-05,
"loss": 0.6478,
"step": 436
},
{
"epoch": 0.4,
"learning_rate": 6.765483055181083e-05,
"loss": 0.6781,
"step": 437
},
{
"epoch": 0.4,
"learning_rate": 6.751465469391469e-05,
"loss": 0.6573,
"step": 438
},
{
"epoch": 0.41,
"learning_rate": 6.737432174548167e-05,
"loss": 0.6579,
"step": 439
},
{
"epoch": 0.41,
"learning_rate": 6.723383296517084e-05,
"loss": 0.6595,
"step": 440
},
{
"epoch": 0.41,
"learning_rate": 6.709318961303886e-05,
"loss": 0.6646,
"step": 441
},
{
"epoch": 0.41,
"learning_rate": 6.695239295052881e-05,
"loss": 0.6593,
"step": 442
},
{
"epoch": 0.41,
"learning_rate": 6.681144424045883e-05,
"loss": 0.6714,
"step": 443
},
{
"epoch": 0.41,
"learning_rate": 6.667034474701071e-05,
"loss": 0.6644,
"step": 444
},
{
"epoch": 0.41,
"learning_rate": 6.652909573571874e-05,
"loss": 0.6705,
"step": 445
},
{
"epoch": 0.41,
"learning_rate": 6.638769847345818e-05,
"loss": 0.6705,
"step": 446
},
{
"epoch": 0.41,
"learning_rate": 6.624615422843398e-05,
"loss": 0.662,
"step": 447
},
{
"epoch": 0.41,
"learning_rate": 6.61044642701694e-05,
"loss": 0.6556,
"step": 448
},
{
"epoch": 0.41,
"learning_rate": 6.59626298694946e-05,
"loss": 0.6516,
"step": 449
},
{
"epoch": 0.42,
"learning_rate": 6.582065229853529e-05,
"loss": 0.6624,
"step": 450
},
{
"epoch": 0.42,
"learning_rate": 6.567853283070125e-05,
"loss": 0.6565,
"step": 451
},
{
"epoch": 0.42,
"learning_rate": 6.553627274067495e-05,
"loss": 0.6523,
"step": 452
},
{
"epoch": 0.42,
"learning_rate": 6.539387330440017e-05,
"loss": 0.6399,
"step": 453
},
{
"epoch": 0.42,
"learning_rate": 6.52513357990704e-05,
"loss": 0.6627,
"step": 454
},
{
"epoch": 0.42,
"learning_rate": 6.510866150311754e-05,
"loss": 0.6673,
"step": 455
},
{
"epoch": 0.42,
"learning_rate": 6.496585169620041e-05,
"loss": 0.6579,
"step": 456
},
{
"epoch": 0.42,
"learning_rate": 6.482290765919318e-05,
"loss": 0.6531,
"step": 457
},
{
"epoch": 0.42,
"learning_rate": 6.467983067417399e-05,
"loss": 0.6692,
"step": 458
},
{
"epoch": 0.42,
"learning_rate": 6.453662202441335e-05,
"loss": 0.6636,
"step": 459
},
{
"epoch": 0.42,
"learning_rate": 6.439328299436273e-05,
"loss": 0.6654,
"step": 460
},
{
"epoch": 0.43,
"learning_rate": 6.424981486964298e-05,
"loss": 0.6441,
"step": 461
},
{
"epoch": 0.43,
"learning_rate": 6.410621893703281e-05,
"loss": 0.6567,
"step": 462
},
{
"epoch": 0.43,
"learning_rate": 6.396249648445723e-05,
"loss": 0.6549,
"step": 463
},
{
"epoch": 0.43,
"learning_rate": 6.381864880097606e-05,
"loss": 0.6517,
"step": 464
},
{
"epoch": 0.43,
"learning_rate": 6.367467717677229e-05,
"loss": 0.6748,
"step": 465
},
{
"epoch": 0.43,
"learning_rate": 6.353058290314056e-05,
"loss": 0.6562,
"step": 466
},
{
"epoch": 0.43,
"learning_rate": 6.338636727247557e-05,
"loss": 0.6536,
"step": 467
},
{
"epoch": 0.43,
"learning_rate": 6.324203157826045e-05,
"loss": 0.6544,
"step": 468
},
{
"epoch": 0.43,
"learning_rate": 6.309757711505528e-05,
"loss": 0.6632,
"step": 469
},
{
"epoch": 0.43,
"learning_rate": 6.295300517848528e-05,
"loss": 0.6495,
"step": 470
},
{
"epoch": 0.44,
"learning_rate": 6.280831706522935e-05,
"loss": 0.6577,
"step": 471
},
{
"epoch": 0.44,
"learning_rate": 6.26635140730084e-05,
"loss": 0.65,
"step": 472
},
{
"epoch": 0.44,
"learning_rate": 6.251859750057368e-05,
"loss": 0.6622,
"step": 473
},
{
"epoch": 0.44,
"learning_rate": 6.237356864769517e-05,
"loss": 0.6665,
"step": 474
},
{
"epoch": 0.44,
"learning_rate": 6.222842881514985e-05,
"loss": 0.659,
"step": 475
},
{
"epoch": 0.44,
"learning_rate": 6.208317930471017e-05,
"loss": 0.6498,
"step": 476
},
{
"epoch": 0.44,
"learning_rate": 6.193782141913226e-05,
"loss": 0.6495,
"step": 477
},
{
"epoch": 0.44,
"learning_rate": 6.179235646214422e-05,
"loss": 0.6594,
"step": 478
},
{
"epoch": 0.44,
"learning_rate": 6.164678573843455e-05,
"loss": 0.6483,
"step": 479
},
{
"epoch": 0.44,
"learning_rate": 6.150111055364035e-05,
"loss": 0.6663,
"step": 480
},
{
"epoch": 0.44,
"learning_rate": 6.135533221433568e-05,
"loss": 0.6421,
"step": 481
},
{
"epoch": 0.45,
"learning_rate": 6.120945202801975e-05,
"loss": 0.6434,
"step": 482
},
{
"epoch": 0.45,
"learning_rate": 6.106347130310526e-05,
"loss": 0.6403,
"step": 483
},
{
"epoch": 0.45,
"learning_rate": 6.0917391348906684e-05,
"loss": 0.658,
"step": 484
},
{
"epoch": 0.45,
"learning_rate": 6.077121347562846e-05,
"loss": 0.6356,
"step": 485
},
{
"epoch": 0.45,
"learning_rate": 6.062493899435326e-05,
"loss": 0.6501,
"step": 486
},
{
"epoch": 0.45,
"learning_rate": 6.047856921703028e-05,
"loss": 0.6378,
"step": 487
},
{
"epoch": 0.45,
"learning_rate": 6.03321054564634e-05,
"loss": 0.627,
"step": 488
},
{
"epoch": 0.45,
"learning_rate": 6.018554902629946e-05,
"loss": 0.6444,
"step": 489
},
{
"epoch": 0.45,
"learning_rate": 6.0038901241016484e-05,
"loss": 0.6588,
"step": 490
},
{
"epoch": 0.45,
"learning_rate": 5.989216341591182e-05,
"loss": 0.647,
"step": 491
},
{
"epoch": 0.45,
"learning_rate": 5.974533686709042e-05,
"loss": 0.6385,
"step": 492
},
{
"epoch": 0.46,
"learning_rate": 5.9598422911453024e-05,
"loss": 0.6345,
"step": 493
},
{
"epoch": 0.46,
"learning_rate": 5.945142286668428e-05,
"loss": 0.6428,
"step": 494
},
{
"epoch": 0.46,
"learning_rate": 5.930433805124104e-05,
"loss": 0.6511,
"step": 495
},
{
"epoch": 0.46,
"learning_rate": 5.9157169784340426e-05,
"loss": 0.649,
"step": 496
},
{
"epoch": 0.46,
"learning_rate": 5.900991938594805e-05,
"loss": 0.6375,
"step": 497
},
{
"epoch": 0.46,
"learning_rate": 5.886258817676621e-05,
"loss": 0.6387,
"step": 498
},
{
"epoch": 0.46,
"learning_rate": 5.871517747822193e-05,
"loss": 0.6418,
"step": 499
},
{
"epoch": 0.46,
"learning_rate": 5.856768861245525e-05,
"loss": 0.639,
"step": 500
},
{
"epoch": 0.46,
"learning_rate": 5.8420122902307275e-05,
"loss": 0.6512,
"step": 501
},
{
"epoch": 0.46,
"learning_rate": 5.82724816713083e-05,
"loss": 0.6404,
"step": 502
},
{
"epoch": 0.46,
"learning_rate": 5.812476624366602e-05,
"loss": 0.641,
"step": 503
},
{
"epoch": 0.47,
"learning_rate": 5.7976977944253594e-05,
"loss": 0.6367,
"step": 504
},
{
"epoch": 0.47,
"learning_rate": 5.782911809859778e-05,
"loss": 0.6383,
"step": 505
},
{
"epoch": 0.47,
"learning_rate": 5.7681188032867026e-05,
"loss": 0.6335,
"step": 506
},
{
"epoch": 0.47,
"learning_rate": 5.7533189073859575e-05,
"loss": 0.6255,
"step": 507
},
{
"epoch": 0.47,
"learning_rate": 5.738512254899163e-05,
"loss": 0.6221,
"step": 508
},
{
"epoch": 0.47,
"learning_rate": 5.7236989786285355e-05,
"loss": 0.6121,
"step": 509
},
{
"epoch": 0.47,
"learning_rate": 5.708879211435702e-05,
"loss": 0.6142,
"step": 510
},
{
"epoch": 0.47,
"learning_rate": 5.694053086240508e-05,
"loss": 0.6185,
"step": 511
},
{
"epoch": 0.47,
"learning_rate": 5.6792207360198234e-05,
"loss": 0.6128,
"step": 512
},
{
"epoch": 0.47,
"learning_rate": 5.664382293806352e-05,
"loss": 0.6413,
"step": 513
},
{
"epoch": 0.47,
"learning_rate": 5.6495378926874376e-05,
"loss": 0.6181,
"step": 514
},
{
"epoch": 0.48,
"learning_rate": 5.634687665803864e-05,
"loss": 0.6306,
"step": 515
},
{
"epoch": 0.48,
"learning_rate": 5.619831746348678e-05,
"loss": 0.6146,
"step": 516
},
{
"epoch": 0.48,
"learning_rate": 5.604970267565974e-05,
"loss": 0.6122,
"step": 517
},
{
"epoch": 0.48,
"learning_rate": 5.590103362749712e-05,
"loss": 0.6243,
"step": 518
},
{
"epoch": 0.48,
"learning_rate": 5.5752311652425205e-05,
"loss": 0.6239,
"step": 519
},
{
"epoch": 0.48,
"learning_rate": 5.560353808434494e-05,
"loss": 0.6163,
"step": 520
},
{
"epoch": 0.48,
"learning_rate": 5.5454714257620064e-05,
"loss": 0.6154,
"step": 521
},
{
"epoch": 0.48,
"learning_rate": 5.5305841507065046e-05,
"loss": 0.6336,
"step": 522
},
{
"epoch": 0.48,
"learning_rate": 5.515692116793316e-05,
"loss": 0.6193,
"step": 523
},
{
"epoch": 0.48,
"learning_rate": 5.5007954575904586e-05,
"loss": 0.6212,
"step": 524
},
{
"epoch": 0.48,
"learning_rate": 5.485894306707422e-05,
"loss": 0.6259,
"step": 525
},
{
"epoch": 0.49,
"learning_rate": 5.4709887977939925e-05,
"loss": 0.6131,
"step": 526
},
{
"epoch": 0.49,
"learning_rate": 5.456079064539037e-05,
"loss": 0.6195,
"step": 527
},
{
"epoch": 0.49,
"learning_rate": 5.441165240669318e-05,
"loss": 0.6241,
"step": 528
},
{
"epoch": 0.49,
"learning_rate": 5.4262474599482825e-05,
"loss": 0.6177,
"step": 529
},
{
"epoch": 0.49,
"learning_rate": 5.4113258561748647e-05,
"loss": 0.6193,
"step": 530
},
{
"epoch": 0.49,
"learning_rate": 5.396400563182294e-05,
"loss": 0.6197,
"step": 531
},
{
"epoch": 0.49,
"learning_rate": 5.381471714836885e-05,
"loss": 0.6255,
"step": 532
},
{
"epoch": 0.49,
"learning_rate": 5.3665394450368414e-05,
"loss": 0.6072,
"step": 533
},
{
"epoch": 0.49,
"learning_rate": 5.351603887711053e-05,
"loss": 0.6109,
"step": 534
},
{
"epoch": 0.49,
"learning_rate": 5.336665176817899e-05,
"loss": 0.6218,
"step": 535
},
{
"epoch": 0.5,
"learning_rate": 5.321723446344039e-05,
"loss": 0.6084,
"step": 536
},
{
"epoch": 0.5,
"learning_rate": 5.3067788303032204e-05,
"loss": 0.6209,
"step": 537
},
{
"epoch": 0.5,
"learning_rate": 5.291831462735064e-05,
"loss": 0.6273,
"step": 538
},
{
"epoch": 0.5,
"learning_rate": 5.2768814777038787e-05,
"loss": 0.6218,
"step": 539
},
{
"epoch": 0.5,
"learning_rate": 5.261929009297445e-05,
"loss": 0.6149,
"step": 540
},
{
"epoch": 0.5,
"learning_rate": 5.246974191625814e-05,
"loss": 0.6198,
"step": 541
},
{
"epoch": 0.5,
"learning_rate": 5.232017158820113e-05,
"loss": 0.6122,
"step": 542
},
{
"epoch": 0.5,
"learning_rate": 5.217058045031332e-05,
"loss": 0.6168,
"step": 543
},
{
"epoch": 0.5,
"learning_rate": 5.202096984429131e-05,
"loss": 0.6221,
"step": 544
},
{
"epoch": 0.5,
"learning_rate": 5.187134111200629e-05,
"loss": 0.6228,
"step": 545
},
{
"epoch": 0.5,
"learning_rate": 5.172169559549197e-05,
"loss": 0.6154,
"step": 546
},
{
"epoch": 0.51,
"learning_rate": 5.15720346369327e-05,
"loss": 0.6129,
"step": 547
},
{
"epoch": 0.51,
"learning_rate": 5.142235957865126e-05,
"loss": 0.6141,
"step": 548
},
{
"epoch": 0.51,
"learning_rate": 5.1272671763096904e-05,
"loss": 0.6412,
"step": 549
},
{
"epoch": 0.51,
"learning_rate": 5.1122972532833335e-05,
"loss": 0.6298,
"step": 550
},
{
"epoch": 0.51,
"learning_rate": 5.097326323052659e-05,
"loss": 0.6171,
"step": 551
},
{
"epoch": 0.51,
"learning_rate": 5.082354519893309e-05,
"loss": 0.6107,
"step": 552
},
{
"epoch": 0.51,
"learning_rate": 5.067381978088752e-05,
"loss": 0.6197,
"step": 553
},
{
"epoch": 0.51,
"learning_rate": 5.0524088319290804e-05,
"loss": 0.6471,
"step": 554
},
{
"epoch": 0.51,
"learning_rate": 5.0374352157098136e-05,
"loss": 0.6338,
"step": 555
},
{
"epoch": 0.51,
"learning_rate": 5.022461263730679e-05,
"loss": 0.6319,
"step": 556
},
{
"epoch": 0.51,
"learning_rate": 5.007487110294419e-05,
"loss": 0.6254,
"step": 557
},
{
"epoch": 0.52,
"learning_rate": 4.992512889705583e-05,
"loss": 0.632,
"step": 558
},
{
"epoch": 0.52,
"learning_rate": 4.9775387362693236e-05,
"loss": 0.6441,
"step": 559
},
{
"epoch": 0.52,
"learning_rate": 4.9625647842901876e-05,
"loss": 0.6293,
"step": 560
},
{
"epoch": 0.52,
"learning_rate": 4.94759116807092e-05,
"loss": 0.6243,
"step": 561
},
{
"epoch": 0.52,
"learning_rate": 4.93261802191125e-05,
"loss": 0.6319,
"step": 562
},
{
"epoch": 0.52,
"learning_rate": 4.9176454801066915e-05,
"loss": 0.6207,
"step": 563
},
{
"epoch": 0.52,
"learning_rate": 4.902673676947342e-05,
"loss": 0.6265,
"step": 564
},
{
"epoch": 0.52,
"learning_rate": 4.8877027467166684e-05,
"loss": 0.6298,
"step": 565
},
{
"epoch": 0.52,
"learning_rate": 4.8727328236903094e-05,
"loss": 0.603,
"step": 566
},
{
"epoch": 0.52,
"learning_rate": 4.857764042134875e-05,
"loss": 0.6183,
"step": 567
},
{
"epoch": 0.52,
"learning_rate": 4.842796536306732e-05,
"loss": 0.6221,
"step": 568
},
{
"epoch": 0.53,
"learning_rate": 4.827830440450803e-05,
"loss": 0.6204,
"step": 569
},
{
"epoch": 0.53,
"learning_rate": 4.812865888799373e-05,
"loss": 0.6483,
"step": 570
},
{
"epoch": 0.53,
"learning_rate": 4.7979030155708696e-05,
"loss": 0.6588,
"step": 571
},
{
"epoch": 0.53,
"learning_rate": 4.78294195496867e-05,
"loss": 0.6608,
"step": 572
},
{
"epoch": 0.53,
"learning_rate": 4.767982841179889e-05,
"loss": 0.6559,
"step": 573
},
{
"epoch": 0.53,
"learning_rate": 4.753025808374187e-05,
"loss": 0.659,
"step": 574
},
{
"epoch": 0.53,
"learning_rate": 4.738070990702556e-05,
"loss": 0.6616,
"step": 575
},
{
"epoch": 0.53,
"learning_rate": 4.7231185222961205e-05,
"loss": 0.6465,
"step": 576
},
{
"epoch": 0.53,
"learning_rate": 4.7081685372649366e-05,
"loss": 0.6729,
"step": 577
},
{
"epoch": 0.53,
"learning_rate": 4.693221169696782e-05,
"loss": 0.6495,
"step": 578
},
{
"epoch": 0.53,
"learning_rate": 4.678276553655961e-05,
"loss": 0.6581,
"step": 579
},
{
"epoch": 0.54,
"learning_rate": 4.663334823182102e-05,
"loss": 0.6616,
"step": 580
},
{
"epoch": 0.54,
"learning_rate": 4.6483961122889475e-05,
"loss": 0.6602,
"step": 581
},
{
"epoch": 0.54,
"learning_rate": 4.633460554963159e-05,
"loss": 0.6571,
"step": 582
},
{
"epoch": 0.54,
"learning_rate": 4.6185282851631154e-05,
"loss": 0.6743,
"step": 583
},
{
"epoch": 0.54,
"learning_rate": 4.603599436817707e-05,
"loss": 0.6627,
"step": 584
},
{
"epoch": 0.54,
"learning_rate": 4.5886741438251365e-05,
"loss": 0.6676,
"step": 585
},
{
"epoch": 0.54,
"learning_rate": 4.573752540051719e-05,
"loss": 0.6503,
"step": 586
},
{
"epoch": 0.54,
"learning_rate": 4.558834759330683e-05,
"loss": 0.6826,
"step": 587
},
{
"epoch": 0.54,
"learning_rate": 4.543920935460965e-05,
"loss": 0.6901,
"step": 588
},
{
"epoch": 0.54,
"learning_rate": 4.5290112022060094e-05,
"loss": 0.682,
"step": 589
},
{
"epoch": 0.54,
"learning_rate": 4.514105693292579e-05,
"loss": 0.6928,
"step": 590
},
{
"epoch": 0.55,
"learning_rate": 4.499204542409544e-05,
"loss": 0.6868,
"step": 591
},
{
"epoch": 0.55,
"learning_rate": 4.4843078832066835e-05,
"loss": 0.6803,
"step": 592
},
{
"epoch": 0.55,
"learning_rate": 4.4694158492934966e-05,
"loss": 0.7012,
"step": 593
},
{
"epoch": 0.55,
"learning_rate": 4.4545285742379954e-05,
"loss": 0.6951,
"step": 594
},
{
"epoch": 0.55,
"learning_rate": 4.439646191565506e-05,
"loss": 0.6728,
"step": 595
},
{
"epoch": 0.55,
"learning_rate": 4.4247688347574814e-05,
"loss": 0.6799,
"step": 596
},
{
"epoch": 0.55,
"learning_rate": 4.4098966372502884e-05,
"loss": 0.6762,
"step": 597
},
{
"epoch": 0.55,
"learning_rate": 4.395029732434026e-05,
"loss": 0.6602,
"step": 598
},
{
"epoch": 0.55,
"learning_rate": 4.3801682536513225e-05,
"loss": 0.6772,
"step": 599
},
{
"epoch": 0.55,
"learning_rate": 4.365312334196137e-05,
"loss": 0.6686,
"step": 600
},
{
"epoch": 0.56,
"learning_rate": 4.350462107312565e-05,
"loss": 0.6639,
"step": 601
},
{
"epoch": 0.56,
"learning_rate": 4.335617706193648e-05,
"loss": 0.6753,
"step": 602
},
{
"epoch": 0.56,
"learning_rate": 4.320779263980177e-05,
"loss": 0.6691,
"step": 603
},
{
"epoch": 0.56,
"learning_rate": 4.305946913759494e-05,
"loss": 0.678,
"step": 604
},
{
"epoch": 0.56,
"learning_rate": 4.291120788564298e-05,
"loss": 0.659,
"step": 605
},
{
"epoch": 0.56,
"learning_rate": 4.276301021371465e-05,
"loss": 0.6694,
"step": 606
},
{
"epoch": 0.56,
"learning_rate": 4.2614877451008386e-05,
"loss": 0.6503,
"step": 607
},
{
"epoch": 0.56,
"learning_rate": 4.246681092614043e-05,
"loss": 0.6664,
"step": 608
},
{
"epoch": 0.56,
"learning_rate": 4.231881196713298e-05,
"loss": 0.6741,
"step": 609
},
{
"epoch": 0.56,
"learning_rate": 4.2170881901402225e-05,
"loss": 0.6665,
"step": 610
},
{
"epoch": 0.56,
"learning_rate": 4.20230220557464e-05,
"loss": 0.6761,
"step": 611
},
{
"epoch": 0.57,
"learning_rate": 4.1875233756333986e-05,
"loss": 0.6596,
"step": 612
},
{
"epoch": 0.57,
"learning_rate": 4.172751832869172e-05,
"loss": 0.668,
"step": 613
},
{
"epoch": 0.57,
"learning_rate": 4.157987709769276e-05,
"loss": 0.6618,
"step": 614
},
{
"epoch": 0.57,
"learning_rate": 4.1432311387544756e-05,
"loss": 0.6607,
"step": 615
},
{
"epoch": 0.57,
"learning_rate": 4.1284822521778084e-05,
"loss": 0.664,
"step": 616
},
{
"epoch": 0.57,
"learning_rate": 4.113741182323381e-05,
"loss": 0.6529,
"step": 617
},
{
"epoch": 0.57,
"learning_rate": 4.099008061405194e-05,
"loss": 0.66,
"step": 618
},
{
"epoch": 0.57,
"learning_rate": 4.0842830215659586e-05,
"loss": 0.6739,
"step": 619
},
{
"epoch": 0.57,
"learning_rate": 4.0695661948758966e-05,
"loss": 0.6659,
"step": 620
},
{
"epoch": 0.57,
"learning_rate": 4.0548577133315726e-05,
"loss": 0.6724,
"step": 621
},
{
"epoch": 0.57,
"learning_rate": 4.040157708854699e-05,
"loss": 0.6515,
"step": 622
},
{
"epoch": 0.58,
"learning_rate": 4.025466313290959e-05,
"loss": 0.662,
"step": 623
},
{
"epoch": 0.58,
"learning_rate": 4.0107836584088185e-05,
"loss": 0.6674,
"step": 624
},
{
"epoch": 0.58,
"learning_rate": 3.996109875898352e-05,
"loss": 0.6587,
"step": 625
},
{
"epoch": 0.58,
"learning_rate": 3.9814450973700544e-05,
"loss": 0.6665,
"step": 626
},
{
"epoch": 0.58,
"learning_rate": 3.966789454353663e-05,
"loss": 0.6539,
"step": 627
},
{
"epoch": 0.58,
"learning_rate": 3.952143078296974e-05,
"loss": 0.6594,
"step": 628
},
{
"epoch": 0.58,
"learning_rate": 3.937506100564675e-05,
"loss": 0.6478,
"step": 629
},
{
"epoch": 0.58,
"learning_rate": 3.922878652437156e-05,
"loss": 0.6713,
"step": 630
},
{
"epoch": 0.58,
"learning_rate": 3.9082608651093314e-05,
"loss": 0.6541,
"step": 631
},
{
"epoch": 0.58,
"learning_rate": 3.893652869689475e-05,
"loss": 0.6632,
"step": 632
},
{
"epoch": 0.58,
"learning_rate": 3.879054797198027e-05,
"loss": 0.6525,
"step": 633
},
{
"epoch": 0.59,
"learning_rate": 3.864466778566432e-05,
"loss": 0.6606,
"step": 634
},
{
"epoch": 0.59,
"learning_rate": 3.849888944635965e-05,
"loss": 0.6776,
"step": 635
},
{
"epoch": 0.59,
"learning_rate": 3.835321426156547e-05,
"loss": 0.6637,
"step": 636
},
{
"epoch": 0.59,
"learning_rate": 3.8207643537855785e-05,
"loss": 0.647,
"step": 637
},
{
"epoch": 0.59,
"learning_rate": 3.806217858086776e-05,
"loss": 0.6559,
"step": 638
},
{
"epoch": 0.59,
"learning_rate": 3.791682069528983e-05,
"loss": 0.6696,
"step": 639
},
{
"epoch": 0.59,
"learning_rate": 3.777157118485016e-05,
"loss": 0.6479,
"step": 640
},
{
"epoch": 0.59,
"learning_rate": 3.762643135230485e-05,
"loss": 0.6637,
"step": 641
},
{
"epoch": 0.59,
"learning_rate": 3.7481402499426335e-05,
"loss": 0.6426,
"step": 642
},
{
"epoch": 0.59,
"learning_rate": 3.733648592699162e-05,
"loss": 0.6719,
"step": 643
},
{
"epoch": 0.59,
"learning_rate": 3.7191682934770655e-05,
"loss": 0.6634,
"step": 644
},
{
"epoch": 0.6,
"learning_rate": 3.7046994821514734e-05,
"loss": 0.6665,
"step": 645
},
{
"epoch": 0.6,
"learning_rate": 3.690242288494474e-05,
"loss": 0.6567,
"step": 646
},
{
"epoch": 0.6,
"learning_rate": 3.675796842173954e-05,
"loss": 0.6568,
"step": 647
},
{
"epoch": 0.6,
"learning_rate": 3.661363272752445e-05,
"loss": 0.6777,
"step": 648
},
{
"epoch": 0.6,
"learning_rate": 3.6469417096859464e-05,
"loss": 0.6722,
"step": 649
},
{
"epoch": 0.6,
"learning_rate": 3.6325322823227726e-05,
"loss": 0.6714,
"step": 650
},
{
"epoch": 0.6,
"learning_rate": 3.618135119902396e-05,
"loss": 0.6631,
"step": 651
},
{
"epoch": 0.6,
"learning_rate": 3.603750351554278e-05,
"loss": 0.673,
"step": 652
},
{
"epoch": 0.6,
"learning_rate": 3.5893781062967216e-05,
"loss": 0.6594,
"step": 653
},
{
"epoch": 0.6,
"learning_rate": 3.575018513035702e-05,
"loss": 0.6572,
"step": 654
},
{
"epoch": 0.6,
"learning_rate": 3.560671700563728e-05,
"loss": 0.6548,
"step": 655
},
{
"epoch": 0.61,
"learning_rate": 3.5463377975586664e-05,
"loss": 0.6559,
"step": 656
},
{
"epoch": 0.61,
"learning_rate": 3.532016932582602e-05,
"loss": 0.6641,
"step": 657
},
{
"epoch": 0.61,
"learning_rate": 3.517709234080683e-05,
"loss": 0.6503,
"step": 658
},
{
"epoch": 0.61,
"learning_rate": 3.503414830379962e-05,
"loss": 0.6553,
"step": 659
},
{
"epoch": 0.61,
"learning_rate": 3.489133849688247e-05,
"loss": 0.6524,
"step": 660
},
{
"epoch": 0.61,
"learning_rate": 3.474866420092963e-05,
"loss": 0.6452,
"step": 661
},
{
"epoch": 0.61,
"learning_rate": 3.460612669559986e-05,
"loss": 0.6465,
"step": 662
},
{
"epoch": 0.61,
"learning_rate": 3.446372725932504e-05,
"loss": 0.6516,
"step": 663
},
{
"epoch": 0.61,
"learning_rate": 3.432146716929875e-05,
"loss": 0.6547,
"step": 664
},
{
"epoch": 0.61,
"learning_rate": 3.417934770146471e-05,
"loss": 0.6548,
"step": 665
},
{
"epoch": 0.62,
"learning_rate": 3.4037370130505393e-05,
"loss": 0.6405,
"step": 666
},
{
"epoch": 0.62,
"learning_rate": 3.389553572983061e-05,
"loss": 0.6447,
"step": 667
},
{
"epoch": 0.62,
"learning_rate": 3.375384577156603e-05,
"loss": 0.6493,
"step": 668
},
{
"epoch": 0.62,
"learning_rate": 3.361230152654184e-05,
"loss": 0.6561,
"step": 669
},
{
"epoch": 0.62,
"learning_rate": 3.347090426428127e-05,
"loss": 0.6582,
"step": 670
},
{
"epoch": 0.62,
"learning_rate": 3.33296552529893e-05,
"loss": 0.6432,
"step": 671
},
{
"epoch": 0.62,
"learning_rate": 3.318855575954119e-05,
"loss": 0.6621,
"step": 672
},
{
"epoch": 0.62,
"learning_rate": 3.304760704947119e-05,
"loss": 0.6585,
"step": 673
},
{
"epoch": 0.62,
"learning_rate": 3.290681038696115e-05,
"loss": 0.6396,
"step": 674
},
{
"epoch": 0.62,
"learning_rate": 3.2766167034829176e-05,
"loss": 0.6657,
"step": 675
},
{
"epoch": 0.62,
"learning_rate": 3.262567825451833e-05,
"loss": 0.6299,
"step": 676
},
{
"epoch": 0.63,
"learning_rate": 3.2485345306085325e-05,
"loss": 0.6315,
"step": 677
},
{
"epoch": 0.63,
"learning_rate": 3.2345169448189187e-05,
"loss": 0.6173,
"step": 678
},
{
"epoch": 0.63,
"learning_rate": 3.220515193807995e-05,
"loss": 0.6025,
"step": 679
},
{
"epoch": 0.63,
"learning_rate": 3.206529403158748e-05,
"loss": 0.6256,
"step": 680
},
{
"epoch": 0.63,
"learning_rate": 3.1925596983110105e-05,
"loss": 0.6201,
"step": 681
},
{
"epoch": 0.63,
"learning_rate": 3.178606204560339e-05,
"loss": 0.6348,
"step": 682
},
{
"epoch": 0.63,
"learning_rate": 3.1646690470568926e-05,
"loss": 0.623,
"step": 683
},
{
"epoch": 0.63,
"learning_rate": 3.15074835080431e-05,
"loss": 0.6218,
"step": 684
},
{
"epoch": 0.63,
"learning_rate": 3.1368442406585875e-05,
"loss": 0.6123,
"step": 685
},
{
"epoch": 0.63,
"learning_rate": 3.122956841326956e-05,
"loss": 0.6218,
"step": 686
},
{
"epoch": 0.63,
"learning_rate": 3.109086277366772e-05,
"loss": 0.6264,
"step": 687
},
{
"epoch": 0.64,
"learning_rate": 3.095232673184385e-05,
"loss": 0.6242,
"step": 688
},
{
"epoch": 0.64,
"learning_rate": 3.081396153034037e-05,
"loss": 0.6197,
"step": 689
},
{
"epoch": 0.64,
"learning_rate": 3.0675768410167446e-05,
"loss": 0.6271,
"step": 690
},
{
"epoch": 0.64,
"learning_rate": 3.053774861079173e-05,
"loss": 0.6273,
"step": 691
},
{
"epoch": 0.64,
"learning_rate": 3.0399903370125427e-05,
"loss": 0.6268,
"step": 692
},
{
"epoch": 0.64,
"learning_rate": 3.0262233924515103e-05,
"loss": 0.6199,
"step": 693
},
{
"epoch": 0.64,
"learning_rate": 3.0124741508730558e-05,
"loss": 0.6217,
"step": 694
},
{
"epoch": 0.64,
"learning_rate": 2.9987427355953823e-05,
"loss": 0.6184,
"step": 695
},
{
"epoch": 0.64,
"learning_rate": 2.985029269776807e-05,
"loss": 0.6262,
"step": 696
},
{
"epoch": 0.64,
"learning_rate": 2.9713338764146585e-05,
"loss": 0.6202,
"step": 697
},
{
"epoch": 0.64,
"learning_rate": 2.9576566783441696e-05,
"loss": 0.6264,
"step": 698
},
{
"epoch": 0.65,
"learning_rate": 2.9439977982373745e-05,
"loss": 0.6008,
"step": 699
},
{
"epoch": 0.65,
"learning_rate": 2.9303573586020207e-05,
"loss": 0.6208,
"step": 700
},
{
"epoch": 0.65,
"learning_rate": 2.9167354817804553e-05,
"loss": 0.6217,
"step": 701
},
{
"epoch": 0.65,
"learning_rate": 2.9031322899485315e-05,
"loss": 0.6302,
"step": 702
},
{
"epoch": 0.65,
"learning_rate": 2.889547905114526e-05,
"loss": 0.6328,
"step": 703
},
{
"epoch": 0.65,
"learning_rate": 2.8759824491180197e-05,
"loss": 0.6337,
"step": 704
},
{
"epoch": 0.65,
"learning_rate": 2.8624360436288284e-05,
"loss": 0.6302,
"step": 705
},
{
"epoch": 0.65,
"learning_rate": 2.8489088101458994e-05,
"loss": 0.6318,
"step": 706
},
{
"epoch": 0.65,
"learning_rate": 2.8354008699962242e-05,
"loss": 0.6246,
"step": 707
},
{
"epoch": 0.65,
"learning_rate": 2.8219123443337524e-05,
"loss": 0.6282,
"step": 708
},
{
"epoch": 0.65,
"learning_rate": 2.8084433541383014e-05,
"loss": 0.6188,
"step": 709
},
{
"epoch": 0.66,
"learning_rate": 2.7949940202144732e-05,
"loss": 0.6276,
"step": 710
},
{
"epoch": 0.66,
"learning_rate": 2.7815644631905748e-05,
"loss": 0.6372,
"step": 711
},
{
"epoch": 0.66,
"learning_rate": 2.7681548035175215e-05,
"loss": 0.6188,
"step": 712
},
{
"epoch": 0.66,
"learning_rate": 2.7547651614677823e-05,
"loss": 0.6239,
"step": 713
},
{
"epoch": 0.66,
"learning_rate": 2.7413956571342793e-05,
"loss": 0.6285,
"step": 714
},
{
"epoch": 0.66,
"learning_rate": 2.7280464104293102e-05,
"loss": 0.6111,
"step": 715
},
{
"epoch": 0.66,
"learning_rate": 2.714717541083496e-05,
"loss": 0.6192,
"step": 716
},
{
"epoch": 0.66,
"learning_rate": 2.7014091686446764e-05,
"loss": 0.6265,
"step": 717
},
{
"epoch": 0.66,
"learning_rate": 2.6881214124768595e-05,
"loss": 0.6218,
"step": 718
},
{
"epoch": 0.66,
"learning_rate": 2.6748543917591484e-05,
"loss": 0.6237,
"step": 719
},
{
"epoch": 0.67,
"learning_rate": 2.661608225484658e-05,
"loss": 0.6131,
"step": 720
},
{
"epoch": 0.67,
"learning_rate": 2.6483830324594644e-05,
"loss": 0.6265,
"step": 721
},
{
"epoch": 0.67,
"learning_rate": 2.6351789313015307e-05,
"loss": 0.6304,
"step": 722
},
{
"epoch": 0.67,
"learning_rate": 2.621996040439646e-05,
"loss": 0.6274,
"step": 723
},
{
"epoch": 0.67,
"learning_rate": 2.6088344781123626e-05,
"loss": 0.6265,
"step": 724
},
{
"epoch": 0.67,
"learning_rate": 2.5956943623669332e-05,
"loss": 0.6254,
"step": 725
},
{
"epoch": 0.67,
"learning_rate": 2.5825758110582565e-05,
"loss": 0.6255,
"step": 726
},
{
"epoch": 0.67,
"learning_rate": 2.5694789418478183e-05,
"loss": 0.6028,
"step": 727
},
{
"epoch": 0.67,
"learning_rate": 2.5564038722026285e-05,
"loss": 0.6235,
"step": 728
},
{
"epoch": 0.67,
"learning_rate": 2.5433507193941853e-05,
"loss": 0.6165,
"step": 729
},
{
"epoch": 0.67,
"learning_rate": 2.5303196004974094e-05,
"loss": 0.618,
"step": 730
},
{
"epoch": 0.68,
"learning_rate": 2.517310632389589e-05,
"loss": 0.6224,
"step": 731
},
{
"epoch": 0.68,
"learning_rate": 2.5043239317493544e-05,
"loss": 0.6203,
"step": 732
},
{
"epoch": 0.68,
"learning_rate": 2.491359615055604e-05,
"loss": 0.6241,
"step": 733
},
{
"epoch": 0.68,
"learning_rate": 2.478417798586477e-05,
"loss": 0.627,
"step": 734
},
{
"epoch": 0.68,
"learning_rate": 2.4654985984183125e-05,
"loss": 0.6232,
"step": 735
},
{
"epoch": 0.68,
"learning_rate": 2.4526021304245916e-05,
"loss": 0.6145,
"step": 736
},
{
"epoch": 0.68,
"learning_rate": 2.439728510274914e-05,
"loss": 0.6213,
"step": 737
},
{
"epoch": 0.68,
"learning_rate": 2.4268778534339537e-05,
"loss": 0.6173,
"step": 738
},
{
"epoch": 0.68,
"learning_rate": 2.414050275160426e-05,
"loss": 0.6113,
"step": 739
},
{
"epoch": 0.68,
"learning_rate": 2.4012458905060507e-05,
"loss": 0.6099,
"step": 740
},
{
"epoch": 0.68,
"learning_rate": 2.3884648143145234e-05,
"loss": 0.6257,
"step": 741
},
{
"epoch": 0.69,
"learning_rate": 2.375707161220483e-05,
"loss": 0.6184,
"step": 742
},
{
"epoch": 0.69,
"learning_rate": 2.362973045648486e-05,
"loss": 0.6147,
"step": 743
},
{
"epoch": 0.69,
"learning_rate": 2.350262581811972e-05,
"loss": 0.6093,
"step": 744
},
{
"epoch": 0.69,
"learning_rate": 2.337575883712258e-05,
"loss": 0.6163,
"step": 745
},
{
"epoch": 0.69,
"learning_rate": 2.3249130651374966e-05,
"loss": 0.6042,
"step": 746
},
{
"epoch": 0.69,
"learning_rate": 2.3122742396616587e-05,
"loss": 0.6115,
"step": 747
},
{
"epoch": 0.69,
"learning_rate": 2.29965952064353e-05,
"loss": 0.6057,
"step": 748
},
{
"epoch": 0.69,
"learning_rate": 2.2870690212256723e-05,
"loss": 0.6043,
"step": 749
},
{
"epoch": 0.69,
"learning_rate": 2.274502854333426e-05,
"loss": 0.6074,
"step": 750
},
{
"epoch": 0.69,
"learning_rate": 2.2619611326738893e-05,
"loss": 0.5923,
"step": 751
},
{
"epoch": 0.69,
"learning_rate": 2.2494439687349082e-05,
"loss": 0.6032,
"step": 752
},
{
"epoch": 0.7,
"learning_rate": 2.236951474784069e-05,
"loss": 0.5809,
"step": 753
},
{
"epoch": 0.7,
"learning_rate": 2.2244837628676905e-05,
"loss": 0.6004,
"step": 754
},
{
"epoch": 0.7,
"learning_rate": 2.2120409448098194e-05,
"loss": 0.606,
"step": 755
},
{
"epoch": 0.7,
"learning_rate": 2.199623132211226e-05,
"loss": 0.5892,
"step": 756
},
{
"epoch": 0.7,
"learning_rate": 2.1872304364484052e-05,
"loss": 0.601,
"step": 757
},
{
"epoch": 0.7,
"learning_rate": 2.1748629686725768e-05,
"loss": 0.5953,
"step": 758
},
{
"epoch": 0.7,
"learning_rate": 2.1625208398086904e-05,
"loss": 0.5896,
"step": 759
},
{
"epoch": 0.7,
"learning_rate": 2.150204160554421e-05,
"loss": 0.5979,
"step": 760
},
{
"epoch": 0.7,
"learning_rate": 2.137913041379194e-05,
"loss": 0.5973,
"step": 761
},
{
"epoch": 0.7,
"learning_rate": 2.12564759252318e-05,
"loss": 0.5977,
"step": 762
},
{
"epoch": 0.7,
"learning_rate": 2.1134079239963056e-05,
"loss": 0.5844,
"step": 763
},
{
"epoch": 0.71,
"learning_rate": 2.1011941455772776e-05,
"loss": 0.5914,
"step": 764
},
{
"epoch": 0.71,
"learning_rate": 2.0890063668125888e-05,
"loss": 0.5738,
"step": 765
},
{
"epoch": 0.71,
"learning_rate": 2.07684469701554e-05,
"loss": 0.5498,
"step": 766
},
{
"epoch": 0.71,
"learning_rate": 2.0647092452652573e-05,
"loss": 0.5436,
"step": 767
},
{
"epoch": 0.71,
"learning_rate": 2.0526001204057155e-05,
"loss": 0.5546,
"step": 768
},
{
"epoch": 0.71,
"learning_rate": 2.0405174310447615e-05,
"loss": 0.5484,
"step": 769
},
{
"epoch": 0.71,
"learning_rate": 2.0284612855531386e-05,
"loss": 0.5533,
"step": 770
},
{
"epoch": 0.71,
"learning_rate": 2.0164317920635173e-05,
"loss": 0.5544,
"step": 771
},
{
"epoch": 0.71,
"learning_rate": 2.0044290584695212e-05,
"loss": 0.5536,
"step": 772
},
{
"epoch": 0.71,
"learning_rate": 1.9924531924247648e-05,
"loss": 0.555,
"step": 773
},
{
"epoch": 0.71,
"learning_rate": 1.980504301341884e-05,
"loss": 0.56,
"step": 774
},
{
"epoch": 0.72,
"learning_rate": 1.9685824923915745e-05,
"loss": 0.5617,
"step": 775
},
{
"epoch": 0.72,
"learning_rate": 1.956687872501627e-05,
"loss": 0.5525,
"step": 776
},
{
"epoch": 0.72,
"learning_rate": 1.9448205483559717e-05,
"loss": 0.549,
"step": 777
},
{
"epoch": 0.72,
"learning_rate": 1.9329806263937288e-05,
"loss": 0.5481,
"step": 778
},
{
"epoch": 0.72,
"learning_rate": 1.921168212808233e-05,
"loss": 0.5626,
"step": 779
},
{
"epoch": 0.72,
"learning_rate": 1.909383413546101e-05,
"loss": 0.5547,
"step": 780
},
{
"epoch": 0.72,
"learning_rate": 1.8976263343062772e-05,
"loss": 0.5446,
"step": 781
},
{
"epoch": 0.72,
"learning_rate": 1.8858970805390742e-05,
"loss": 0.5299,
"step": 782
},
{
"epoch": 0.72,
"learning_rate": 1.874195757445242e-05,
"loss": 0.526,
"step": 783
},
{
"epoch": 0.72,
"learning_rate": 1.8625224699750145e-05,
"loss": 0.523,
"step": 784
},
{
"epoch": 0.73,
"learning_rate": 1.8508773228271738e-05,
"loss": 0.5373,
"step": 785
},
{
"epoch": 0.73,
"learning_rate": 1.8392604204481067e-05,
"loss": 0.5261,
"step": 786
},
{
"epoch": 0.73,
"learning_rate": 1.8276718670308718e-05,
"loss": 0.5195,
"step": 787
},
{
"epoch": 0.73,
"learning_rate": 1.8161117665142625e-05,
"loss": 0.5474,
"step": 788
},
{
"epoch": 0.73,
"learning_rate": 1.8045802225818758e-05,
"loss": 0.5346,
"step": 789
},
{
"epoch": 0.73,
"learning_rate": 1.7930773386611815e-05,
"loss": 0.5366,
"step": 790
},
{
"epoch": 0.73,
"learning_rate": 1.781603217922598e-05,
"loss": 0.5434,
"step": 791
},
{
"epoch": 0.73,
"learning_rate": 1.770157963278557e-05,
"loss": 0.5461,
"step": 792
},
{
"epoch": 0.73,
"learning_rate": 1.7587416773825933e-05,
"loss": 0.5427,
"step": 793
},
{
"epoch": 0.73,
"learning_rate": 1.7473544626284223e-05,
"loss": 0.5489,
"step": 794
},
{
"epoch": 0.73,
"learning_rate": 1.7359964211490078e-05,
"loss": 0.5451,
"step": 795
},
{
"epoch": 0.74,
"learning_rate": 1.72466765481566e-05,
"loss": 0.5405,
"step": 796
},
{
"epoch": 0.74,
"learning_rate": 1.7133682652371236e-05,
"loss": 0.5396,
"step": 797
},
{
"epoch": 0.74,
"learning_rate": 1.7020983537586498e-05,
"loss": 0.549,
"step": 798
},
{
"epoch": 0.74,
"learning_rate": 1.6908580214611065e-05,
"loss": 0.5518,
"step": 799
},
{
"epoch": 0.74,
"learning_rate": 1.679647369160059e-05,
"loss": 0.5347,
"step": 800
},
{
"epoch": 0.74,
"learning_rate": 1.668466497404872e-05,
"loss": 0.5426,
"step": 801
},
{
"epoch": 0.74,
"learning_rate": 1.657315506477804e-05,
"loss": 0.5423,
"step": 802
},
{
"epoch": 0.74,
"learning_rate": 1.6461944963931113e-05,
"loss": 0.5406,
"step": 803
},
{
"epoch": 0.74,
"learning_rate": 1.6351035668961484e-05,
"loss": 0.5368,
"step": 804
},
{
"epoch": 0.74,
"learning_rate": 1.624042817462476e-05,
"loss": 0.5344,
"step": 805
},
{
"epoch": 0.74,
"learning_rate": 1.613012347296961e-05,
"loss": 0.5432,
"step": 806
},
{
"epoch": 0.75,
"learning_rate": 1.602012255332902e-05,
"loss": 0.5459,
"step": 807
},
{
"epoch": 0.75,
"learning_rate": 1.591042640231128e-05,
"loss": 0.5342,
"step": 808
},
{
"epoch": 0.75,
"learning_rate": 1.580103600379113e-05,
"loss": 0.537,
"step": 809
},
{
"epoch": 0.75,
"learning_rate": 1.5691952338901105e-05,
"loss": 0.5387,
"step": 810
},
{
"epoch": 0.75,
"learning_rate": 1.5583176386022514e-05,
"loss": 0.5376,
"step": 811
},
{
"epoch": 0.75,
"learning_rate": 1.5474709120776802e-05,
"loss": 0.545,
"step": 812
},
{
"epoch": 0.75,
"learning_rate": 1.536655151601682e-05,
"loss": 0.5323,
"step": 813
},
{
"epoch": 0.75,
"learning_rate": 1.5258704541817953e-05,
"loss": 0.5329,
"step": 814
},
{
"epoch": 0.75,
"learning_rate": 1.5151169165469575e-05,
"loss": 0.5449,
"step": 815
},
{
"epoch": 0.75,
"learning_rate": 1.5043946351466292e-05,
"loss": 0.5561,
"step": 816
},
{
"epoch": 0.75,
"learning_rate": 1.4937037061499316e-05,
"loss": 0.5385,
"step": 817
},
{
"epoch": 0.76,
"learning_rate": 1.4830442254447818e-05,
"loss": 0.5425,
"step": 818
},
{
"epoch": 0.76,
"learning_rate": 1.4724162886370357e-05,
"loss": 0.5441,
"step": 819
},
{
"epoch": 0.76,
"learning_rate": 1.4618199910496294e-05,
"loss": 0.5312,
"step": 820
},
{
"epoch": 0.76,
"learning_rate": 1.4512554277217244e-05,
"loss": 0.5377,
"step": 821
},
{
"epoch": 0.76,
"learning_rate": 1.4407226934078489e-05,
"loss": 0.5393,
"step": 822
},
{
"epoch": 0.76,
"learning_rate": 1.430221882577063e-05,
"loss": 0.5375,
"step": 823
},
{
"epoch": 0.76,
"learning_rate": 1.419753089412098e-05,
"loss": 0.541,
"step": 824
},
{
"epoch": 0.76,
"learning_rate": 1.4093164078085109e-05,
"loss": 0.5262,
"step": 825
},
{
"epoch": 0.76,
"learning_rate": 1.3989119313738574e-05,
"loss": 0.5414,
"step": 826
},
{
"epoch": 0.76,
"learning_rate": 1.3885397534268325e-05,
"loss": 0.5427,
"step": 827
},
{
"epoch": 0.76,
"learning_rate": 1.3781999669964469e-05,
"loss": 0.5352,
"step": 828
},
{
"epoch": 0.77,
"learning_rate": 1.3678926648211932e-05,
"loss": 0.5378,
"step": 829
},
{
"epoch": 0.77,
"learning_rate": 1.3576179393482013e-05,
"loss": 0.529,
"step": 830
},
{
"epoch": 0.77,
"learning_rate": 1.3473758827324234e-05,
"loss": 0.5343,
"step": 831
},
{
"epoch": 0.77,
"learning_rate": 1.3371665868358008e-05,
"loss": 0.5351,
"step": 832
},
{
"epoch": 0.77,
"learning_rate": 1.3269901432264392e-05,
"loss": 0.5453,
"step": 833
},
{
"epoch": 0.77,
"learning_rate": 1.3168466431777914e-05,
"loss": 0.5362,
"step": 834
},
{
"epoch": 0.77,
"learning_rate": 1.306736177667835e-05,
"loss": 0.5498,
"step": 835
},
{
"epoch": 0.77,
"learning_rate": 1.2966588373782584e-05,
"loss": 0.525,
"step": 836
},
{
"epoch": 0.77,
"learning_rate": 1.2866147126936473e-05,
"loss": 0.532,
"step": 837
},
{
"epoch": 0.77,
"learning_rate": 1.2766038937006696e-05,
"loss": 0.5444,
"step": 838
},
{
"epoch": 0.77,
"learning_rate": 1.2666264701872782e-05,
"loss": 0.5452,
"step": 839
},
{
"epoch": 0.78,
"learning_rate": 1.2566825316418956e-05,
"loss": 0.5537,
"step": 840
},
{
"epoch": 0.78,
"learning_rate": 1.2467721672526106e-05,
"loss": 0.5349,
"step": 841
},
{
"epoch": 0.78,
"learning_rate": 1.2368954659063914e-05,
"loss": 0.5474,
"step": 842
},
{
"epoch": 0.78,
"learning_rate": 1.2270525161882701e-05,
"loss": 0.5447,
"step": 843
},
{
"epoch": 0.78,
"learning_rate": 1.2172434063805622e-05,
"loss": 0.5481,
"step": 844
},
{
"epoch": 0.78,
"learning_rate": 1.2074682244620695e-05,
"loss": 0.5463,
"step": 845
},
{
"epoch": 0.78,
"learning_rate": 1.1977270581072902e-05,
"loss": 0.5536,
"step": 846
},
{
"epoch": 0.78,
"learning_rate": 1.1880199946856357e-05,
"loss": 0.5421,
"step": 847
},
{
"epoch": 0.78,
"learning_rate": 1.1783471212606434e-05,
"loss": 0.5335,
"step": 848
},
{
"epoch": 0.78,
"learning_rate": 1.1687085245891977e-05,
"loss": 0.5528,
"step": 849
},
{
"epoch": 0.79,
"learning_rate": 1.1591042911207527e-05,
"loss": 0.5371,
"step": 850
},
{
"epoch": 0.79,
"learning_rate": 1.1495345069965558e-05,
"loss": 0.5329,
"step": 851
},
{
"epoch": 0.79,
"learning_rate": 1.1399992580488727e-05,
"loss": 0.5476,
"step": 852
},
{
"epoch": 0.79,
"learning_rate": 1.130498629800224e-05,
"loss": 0.5415,
"step": 853
},
{
"epoch": 0.79,
"learning_rate": 1.1210327074626077e-05,
"loss": 0.5354,
"step": 854
},
{
"epoch": 0.79,
"learning_rate": 1.1116015759367493e-05,
"loss": 0.5244,
"step": 855
},
{
"epoch": 0.79,
"learning_rate": 1.1022053198113285e-05,
"loss": 0.5413,
"step": 856
},
{
"epoch": 0.79,
"learning_rate": 1.0928440233622205e-05,
"loss": 0.5261,
"step": 857
},
{
"epoch": 0.79,
"learning_rate": 1.083517770551753e-05,
"loss": 0.5339,
"step": 858
},
{
"epoch": 0.79,
"learning_rate": 1.0742266450279343e-05,
"loss": 0.5494,
"step": 859
},
{
"epoch": 0.79,
"learning_rate": 1.0649707301237188e-05,
"loss": 0.5187,
"step": 860
},
{
"epoch": 0.8,
"learning_rate": 1.0557501088562532e-05,
"loss": 0.5309,
"step": 861
},
{
"epoch": 0.8,
"learning_rate": 1.0465648639261305e-05,
"loss": 0.5426,
"step": 862
},
{
"epoch": 0.8,
"learning_rate": 1.0374150777166513e-05,
"loss": 0.5378,
"step": 863
},
{
"epoch": 0.8,
"learning_rate": 1.0283008322930832e-05,
"loss": 0.5203,
"step": 864
},
{
"epoch": 0.8,
"learning_rate": 1.0192222094019255e-05,
"loss": 0.5346,
"step": 865
},
{
"epoch": 0.8,
"learning_rate": 1.0101792904701763e-05,
"loss": 0.5487,
"step": 866
},
{
"epoch": 0.8,
"learning_rate": 1.0011721566046001e-05,
"loss": 0.5413,
"step": 867
},
{
"epoch": 0.8,
"learning_rate": 9.922008885910028e-06,
"loss": 0.529,
"step": 868
},
{
"epoch": 0.8,
"learning_rate": 9.832655668935087e-06,
"loss": 0.5381,
"step": 869
},
{
"epoch": 0.8,
"learning_rate": 9.743662716538288e-06,
"loss": 0.5397,
"step": 870
},
{
"epoch": 0.8,
"learning_rate": 9.655030826905586e-06,
"loss": 0.5406,
"step": 871
},
{
"epoch": 0.81,
"learning_rate": 9.566760794984492e-06,
"loss": 0.5436,
"step": 872
},
{
"epoch": 0.81,
"learning_rate": 9.478853412476957e-06,
"loss": 0.5222,
"step": 873
},
{
"epoch": 0.81,
"learning_rate": 9.391309467832327e-06,
"loss": 0.5432,
"step": 874
},
{
"epoch": 0.81,
"learning_rate": 9.304129746240237e-06,
"loss": 0.5366,
"step": 875
},
{
"epoch": 0.81,
"learning_rate": 9.217315029623563e-06,
"loss": 0.5356,
"step": 876
},
{
"epoch": 0.81,
"learning_rate": 9.130866096631436e-06,
"loss": 0.5347,
"step": 877
},
{
"epoch": 0.81,
"learning_rate": 9.044783722632215e-06,
"loss": 0.5344,
"step": 878
},
{
"epoch": 0.81,
"learning_rate": 8.959068679706579e-06,
"loss": 0.5201,
"step": 879
},
{
"epoch": 0.81,
"learning_rate": 8.873721736640566e-06,
"loss": 0.5351,
"step": 880
},
{
"epoch": 0.81,
"learning_rate": 8.788743658918707e-06,
"loss": 0.5387,
"step": 881
},
{
"epoch": 0.81,
"learning_rate": 8.70413520871714e-06,
"loss": 0.5346,
"step": 882
},
{
"epoch": 0.82,
"learning_rate": 8.619897144896767e-06,
"loss": 0.5452,
"step": 883
},
{
"epoch": 0.82,
"learning_rate": 8.536030222996483e-06,
"loss": 0.5242,
"step": 884
},
{
"epoch": 0.82,
"learning_rate": 8.452535195226374e-06,
"loss": 0.5205,
"step": 885
},
{
"epoch": 0.82,
"learning_rate": 8.369412810460947e-06,
"loss": 0.5382,
"step": 886
},
{
"epoch": 0.82,
"learning_rate": 8.286663814232449e-06,
"loss": 0.5435,
"step": 887
},
{
"epoch": 0.82,
"learning_rate": 8.20428894872423e-06,
"loss": 0.5242,
"step": 888
},
{
"epoch": 0.82,
"learning_rate": 8.122288952763934e-06,
"loss": 0.5372,
"step": 889
},
{
"epoch": 0.82,
"learning_rate": 8.040664561817014e-06,
"loss": 0.5336,
"step": 890
},
{
"epoch": 0.82,
"learning_rate": 7.959416507980122e-06,
"loss": 0.5354,
"step": 891
},
{
"epoch": 0.82,
"learning_rate": 7.878545519974428e-06,
"loss": 0.5358,
"step": 892
},
{
"epoch": 0.82,
"learning_rate": 7.798052323139222e-06,
"loss": 0.5241,
"step": 893
},
{
"epoch": 0.83,
"learning_rate": 7.717937639425332e-06,
"loss": 0.5343,
"step": 894
},
{
"epoch": 0.83,
"learning_rate": 7.638202187388677e-06,
"loss": 0.542,
"step": 895
},
{
"epoch": 0.83,
"learning_rate": 7.558846682183795e-06,
"loss": 0.5376,
"step": 896
},
{
"epoch": 0.83,
"learning_rate": 7.479871835557462e-06,
"loss": 0.5276,
"step": 897
},
{
"epoch": 0.83,
"learning_rate": 7.40127835584229e-06,
"loss": 0.5276,
"step": 898
},
{
"epoch": 0.83,
"learning_rate": 7.32306694795038e-06,
"loss": 0.5376,
"step": 899
},
{
"epoch": 0.83,
"learning_rate": 7.245238313366953e-06,
"loss": 0.5239,
"step": 900
},
{
"epoch": 0.83,
"learning_rate": 7.167793150144192e-06,
"loss": 0.5389,
"step": 901
},
{
"epoch": 0.83,
"learning_rate": 7.0907321528947915e-06,
"loss": 0.5363,
"step": 902
},
{
"epoch": 0.83,
"learning_rate": 7.01405601278588e-06,
"loss": 0.5338,
"step": 903
},
{
"epoch": 0.83,
"learning_rate": 6.937765417532788e-06,
"loss": 0.5192,
"step": 904
},
{
"epoch": 0.84,
"learning_rate": 6.8618610513927876e-06,
"loss": 0.5153,
"step": 905
},
{
"epoch": 0.84,
"learning_rate": 6.786343595159083e-06,
"loss": 0.5408,
"step": 906
},
{
"epoch": 0.84,
"learning_rate": 6.711213726154652e-06,
"loss": 0.5485,
"step": 907
},
{
"epoch": 0.84,
"learning_rate": 6.636472118226128e-06,
"loss": 0.5399,
"step": 908
},
{
"epoch": 0.84,
"learning_rate": 6.562119441737813e-06,
"loss": 0.5414,
"step": 909
},
{
"epoch": 0.84,
"learning_rate": 6.4881563635656615e-06,
"loss": 0.5304,
"step": 910
},
{
"epoch": 0.84,
"learning_rate": 6.414583547091269e-06,
"loss": 0.5302,
"step": 911
},
{
"epoch": 0.84,
"learning_rate": 6.341401652195955e-06,
"loss": 0.5341,
"step": 912
},
{
"epoch": 0.84,
"learning_rate": 6.268611335254781e-06,
"loss": 0.5296,
"step": 913
},
{
"epoch": 0.84,
"learning_rate": 6.196213249130778e-06,
"loss": 0.5339,
"step": 914
},
{
"epoch": 0.85,
"learning_rate": 6.124208043168988e-06,
"loss": 0.5324,
"step": 915
},
{
"epoch": 0.85,
"learning_rate": 6.052596363190649e-06,
"loss": 0.544,
"step": 916
},
{
"epoch": 0.85,
"learning_rate": 5.981378851487485e-06,
"loss": 0.5452,
"step": 917
},
{
"epoch": 0.85,
"learning_rate": 5.91055614681586e-06,
"loss": 0.525,
"step": 918
},
{
"epoch": 0.85,
"learning_rate": 5.840128884391049e-06,
"loss": 0.532,
"step": 919
},
{
"epoch": 0.85,
"learning_rate": 5.770097695881632e-06,
"loss": 0.521,
"step": 920
},
{
"epoch": 0.85,
"learning_rate": 5.700463209403712e-06,
"loss": 0.5419,
"step": 921
},
{
"epoch": 0.85,
"learning_rate": 5.631226049515342e-06,
"loss": 0.5317,
"step": 922
},
{
"epoch": 0.85,
"learning_rate": 5.5623868372109755e-06,
"loss": 0.5386,
"step": 923
},
{
"epoch": 0.85,
"learning_rate": 5.49394618991575e-06,
"loss": 0.5361,
"step": 924
},
{
"epoch": 0.85,
"learning_rate": 5.42590472148009e-06,
"loss": 0.5339,
"step": 925
},
{
"epoch": 0.86,
"learning_rate": 5.3582630421741385e-06,
"loss": 0.5255,
"step": 926
},
{
"epoch": 0.86,
"learning_rate": 5.291021758682302e-06,
"loss": 0.5228,
"step": 927
},
{
"epoch": 0.86,
"learning_rate": 5.224181474097783e-06,
"loss": 0.5406,
"step": 928
},
{
"epoch": 0.86,
"learning_rate": 5.15774278791718e-06,
"loss": 0.5237,
"step": 929
},
{
"epoch": 0.86,
"learning_rate": 5.091706296035159e-06,
"loss": 0.5354,
"step": 930
},
{
"epoch": 0.86,
"learning_rate": 5.026072590739045e-06,
"loss": 0.5188,
"step": 931
},
{
"epoch": 0.86,
"learning_rate": 4.960842260703502e-06,
"loss": 0.519,
"step": 932
},
{
"epoch": 0.86,
"learning_rate": 4.896015890985345e-06,
"loss": 0.5288,
"step": 933
},
{
"epoch": 0.86,
"learning_rate": 4.831594063018197e-06,
"loss": 0.5286,
"step": 934
},
{
"epoch": 0.86,
"learning_rate": 4.767577354607294e-06,
"loss": 0.534,
"step": 935
},
{
"epoch": 0.86,
"learning_rate": 4.703966339924376e-06,
"loss": 0.5233,
"step": 936
},
{
"epoch": 0.87,
"learning_rate": 4.6407615895024015e-06,
"loss": 0.5371,
"step": 937
},
{
"epoch": 0.87,
"learning_rate": 4.577963670230556e-06,
"loss": 0.5385,
"step": 938
},
{
"epoch": 0.87,
"learning_rate": 4.515573145349139e-06,
"loss": 0.5282,
"step": 939
},
{
"epoch": 0.87,
"learning_rate": 4.453590574444427e-06,
"loss": 0.526,
"step": 940
},
{
"epoch": 0.87,
"learning_rate": 4.3920165134437755e-06,
"loss": 0.5363,
"step": 941
},
{
"epoch": 0.87,
"learning_rate": 4.330851514610557e-06,
"loss": 0.5274,
"step": 942
},
{
"epoch": 0.87,
"learning_rate": 4.270096126539225e-06,
"loss": 0.5411,
"step": 943
},
{
"epoch": 0.87,
"learning_rate": 4.209750894150405e-06,
"loss": 0.5594,
"step": 944
},
{
"epoch": 0.87,
"learning_rate": 4.149816358685998e-06,
"loss": 0.5738,
"step": 945
},
{
"epoch": 0.87,
"learning_rate": 4.090293057704314e-06,
"loss": 0.56,
"step": 946
},
{
"epoch": 0.87,
"learning_rate": 4.031181525075295e-06,
"loss": 0.5553,
"step": 947
},
{
"epoch": 0.88,
"learning_rate": 3.9724822909756375e-06,
"loss": 0.569,
"step": 948
},
{
"epoch": 0.88,
"learning_rate": 3.914195881884159e-06,
"loss": 0.575,
"step": 949
},
{
"epoch": 0.88,
"learning_rate": 3.856322820576985e-06,
"loss": 0.5633,
"step": 950
},
{
"epoch": 0.88,
"learning_rate": 3.7988636261228614e-06,
"loss": 0.5663,
"step": 951
},
{
"epoch": 0.88,
"learning_rate": 3.7418188138785835e-06,
"loss": 0.5537,
"step": 952
},
{
"epoch": 0.88,
"learning_rate": 3.6851888954842483e-06,
"loss": 0.5525,
"step": 953
},
{
"epoch": 0.88,
"learning_rate": 3.6289743788587795e-06,
"loss": 0.5548,
"step": 954
},
{
"epoch": 0.88,
"learning_rate": 3.5731757681953026e-06,
"loss": 0.5719,
"step": 955
},
{
"epoch": 0.88,
"learning_rate": 3.5177935639566505e-06,
"loss": 0.5424,
"step": 956
},
{
"epoch": 0.88,
"learning_rate": 3.4628282628708764e-06,
"loss": 0.5694,
"step": 957
},
{
"epoch": 0.88,
"learning_rate": 3.408280357926774e-06,
"loss": 0.5709,
"step": 958
},
{
"epoch": 0.89,
"learning_rate": 3.3541503383694885e-06,
"loss": 0.5544,
"step": 959
},
{
"epoch": 0.89,
"learning_rate": 3.300438689696106e-06,
"loss": 0.5684,
"step": 960
},
{
"epoch": 0.89,
"learning_rate": 3.2471458936513045e-06,
"loss": 0.5764,
"step": 961
},
{
"epoch": 0.89,
"learning_rate": 3.194272428223033e-06,
"loss": 0.561,
"step": 962
},
{
"epoch": 0.89,
"learning_rate": 3.141818767638249e-06,
"loss": 0.5563,
"step": 963
},
{
"epoch": 0.89,
"learning_rate": 3.08978538235859e-06,
"loss": 0.5712,
"step": 964
},
{
"epoch": 0.89,
"learning_rate": 3.038172739076267e-06,
"loss": 0.5648,
"step": 965
},
{
"epoch": 0.89,
"learning_rate": 2.9869813007097847e-06,
"loss": 0.5597,
"step": 966
},
{
"epoch": 0.89,
"learning_rate": 2.936211526399818e-06,
"loss": 0.5669,
"step": 967
},
{
"epoch": 0.89,
"learning_rate": 2.8858638715051066e-06,
"loss": 0.5676,
"step": 968
},
{
"epoch": 0.9,
"learning_rate": 2.8359387875983746e-06,
"loss": 0.5567,
"step": 969
},
{
"epoch": 0.9,
"learning_rate": 2.7864367224622434e-06,
"loss": 0.5591,
"step": 970
},
{
"epoch": 0.9,
"learning_rate": 2.737358120085265e-06,
"loss": 0.5578,
"step": 971
},
{
"epoch": 0.9,
"learning_rate": 2.688703420657901e-06,
"loss": 0.5723,
"step": 972
},
{
"epoch": 0.9,
"learning_rate": 2.6404730605685934e-06,
"loss": 0.5718,
"step": 973
},
{
"epoch": 0.9,
"learning_rate": 2.5926674723998456e-06,
"loss": 0.5755,
"step": 974
},
{
"epoch": 0.9,
"learning_rate": 2.545287084924336e-06,
"loss": 0.5543,
"step": 975
},
{
"epoch": 0.9,
"learning_rate": 2.498332323101088e-06,
"loss": 0.5748,
"step": 976
},
{
"epoch": 0.9,
"learning_rate": 2.4518036080716513e-06,
"loss": 0.5789,
"step": 977
},
{
"epoch": 0.9,
"learning_rate": 2.4057013571563147e-06,
"loss": 0.5799,
"step": 978
},
{
"epoch": 0.9,
"learning_rate": 2.3600259838503836e-06,
"loss": 0.582,
"step": 979
},
{
"epoch": 0.91,
"learning_rate": 2.314777897820436e-06,
"loss": 0.5782,
"step": 980
},
{
"epoch": 0.91,
"learning_rate": 2.269957504900688e-06,
"loss": 0.58,
"step": 981
},
{
"epoch": 0.91,
"learning_rate": 2.225565207089353e-06,
"loss": 0.5841,
"step": 982
},
{
"epoch": 0.91,
"learning_rate": 2.181601402544986e-06,
"loss": 0.5753,
"step": 983
},
{
"epoch": 0.91,
"learning_rate": 2.138066485582968e-06,
"loss": 0.5692,
"step": 984
},
{
"epoch": 0.91,
"learning_rate": 2.0949608466719452e-06,
"loss": 0.5869,
"step": 985
},
{
"epoch": 0.91,
"learning_rate": 2.052284872430321e-06,
"loss": 0.5549,
"step": 986
},
{
"epoch": 0.91,
"learning_rate": 2.010038945622811e-06,
"loss": 0.5539,
"step": 987
},
{
"epoch": 0.91,
"learning_rate": 1.9682234451569915e-06,
"loss": 0.5628,
"step": 988
},
{
"epoch": 0.91,
"learning_rate": 1.926838746079895e-06,
"loss": 0.5613,
"step": 989
},
{
"epoch": 0.91,
"learning_rate": 1.8858852195746734e-06,
"loss": 0.558,
"step": 990
},
{
"epoch": 0.92,
"learning_rate": 1.8453632329572423e-06,
"loss": 0.56,
"step": 991
},
{
"epoch": 0.92,
"learning_rate": 1.805273149672998e-06,
"loss": 0.5536,
"step": 992
},
{
"epoch": 0.92,
"learning_rate": 1.7656153292935574e-06,
"loss": 0.5542,
"step": 993
},
{
"epoch": 0.92,
"learning_rate": 1.726390127513533e-06,
"loss": 0.5593,
"step": 994
},
{
"epoch": 0.92,
"learning_rate": 1.687597896147347e-06,
"loss": 0.5501,
"step": 995
},
{
"epoch": 0.92,
"learning_rate": 1.6492389831260447e-06,
"loss": 0.5569,
"step": 996
},
{
"epoch": 0.92,
"learning_rate": 1.6113137324942295e-06,
"loss": 0.5659,
"step": 997
},
{
"epoch": 0.92,
"learning_rate": 1.5738224844069448e-06,
"loss": 0.5658,
"step": 998
},
{
"epoch": 0.92,
"learning_rate": 1.5367655751266086e-06,
"loss": 0.5687,
"step": 999
},
{
"epoch": 0.92,
"learning_rate": 1.5001433370200213e-06,
"loss": 0.5528,
"step": 1000
},
{
"epoch": 0.92,
"learning_rate": 1.4639560985553858e-06,
"loss": 0.5602,
"step": 1001
},
{
"epoch": 0.93,
"learning_rate": 1.4282041842993422e-06,
"loss": 0.5493,
"step": 1002
},
{
"epoch": 0.93,
"learning_rate": 1.392887914914076e-06,
"loss": 0.5587,
"step": 1003
},
{
"epoch": 0.93,
"learning_rate": 1.3580076071544267e-06,
"loss": 0.5585,
"step": 1004
},
{
"epoch": 0.93,
"learning_rate": 1.3235635738650665e-06,
"loss": 0.5514,
"step": 1005
},
{
"epoch": 0.93,
"learning_rate": 1.2895561239776754e-06,
"loss": 0.5684,
"step": 1006
},
{
"epoch": 0.93,
"learning_rate": 1.2559855625081663e-06,
"loss": 0.5661,
"step": 1007
},
{
"epoch": 0.93,
"learning_rate": 1.2228521905539748e-06,
"loss": 0.5476,
"step": 1008
},
{
"epoch": 0.93,
"learning_rate": 1.1901563052913455e-06,
"loss": 0.5614,
"step": 1009
},
{
"epoch": 0.93,
"learning_rate": 1.1578981999726402e-06,
"loss": 0.5515,
"step": 1010
},
{
"epoch": 0.93,
"learning_rate": 1.1260781639237716e-06,
"loss": 0.5619,
"step": 1011
},
{
"epoch": 0.93,
"learning_rate": 1.0946964825415296e-06,
"loss": 0.562,
"step": 1012
},
{
"epoch": 0.94,
"learning_rate": 1.0637534372910874e-06,
"loss": 0.5423,
"step": 1013
},
{
"epoch": 0.94,
"learning_rate": 1.0332493057034487e-06,
"loss": 0.5596,
"step": 1014
},
{
"epoch": 0.94,
"learning_rate": 1.003184361372944e-06,
"loss": 0.5562,
"step": 1015
},
{
"epoch": 0.94,
"learning_rate": 9.73558873954805e-07,
"loss": 0.5625,
"step": 1016
},
{
"epoch": 0.94,
"learning_rate": 9.443731091627439e-07,
"loss": 0.5543,
"step": 1017
},
{
"epoch": 0.94,
"learning_rate": 9.156273287665384e-07,
"loss": 0.5571,
"step": 1018
},
{
"epoch": 0.94,
"learning_rate": 8.87321790589718e-07,
"loss": 0.5547,
"step": 1019
},
{
"epoch": 0.94,
"learning_rate": 8.594567485072424e-07,
"loss": 0.5495,
"step": 1020
},
{
"epoch": 0.94,
"learning_rate": 8.320324524432155e-07,
"loss": 0.5577,
"step": 1021
},
{
"epoch": 0.94,
"learning_rate": 8.050491483686529e-07,
"loss": 0.5474,
"step": 1022
},
{
"epoch": 0.94,
"learning_rate": 7.785070782992676e-07,
"loss": 0.5526,
"step": 1023
},
{
"epoch": 0.95,
"learning_rate": 7.524064802933217e-07,
"loss": 0.5603,
"step": 1024
},
{
"epoch": 0.95,
"learning_rate": 7.267475884494501e-07,
"loss": 0.5493,
"step": 1025
},
{
"epoch": 0.95,
"learning_rate": 7.015306329045956e-07,
"loss": 0.5593,
"step": 1026
},
{
"epoch": 0.95,
"learning_rate": 6.767558398319384e-07,
"loss": 0.5592,
"step": 1027
},
{
"epoch": 0.95,
"learning_rate": 6.5242343143887e-07,
"loss": 0.5558,
"step": 1028
},
{
"epoch": 0.95,
"learning_rate": 6.285336259649555e-07,
"loss": 0.5566,
"step": 1029
},
{
"epoch": 0.95,
"learning_rate": 6.05086637680069e-07,
"loss": 0.5673,
"step": 1030
},
{
"epoch": 0.95,
"learning_rate": 5.820826768823728e-07,
"loss": 0.5655,
"step": 1031
},
{
"epoch": 0.95,
"learning_rate": 5.595219498964965e-07,
"loss": 0.5643,
"step": 1032
},
{
"epoch": 0.95,
"learning_rate": 5.374046590716608e-07,
"loss": 0.5296,
"step": 1033
},
{
"epoch": 0.96,
"learning_rate": 5.157310027798623e-07,
"loss": 0.5365,
"step": 1034
},
{
"epoch": 0.96,
"learning_rate": 4.945011754141083e-07,
"loss": 0.5353,
"step": 1035
},
{
"epoch": 0.96,
"learning_rate": 4.7371536738665123e-07,
"loss": 0.5338,
"step": 1036
},
{
"epoch": 0.96,
"learning_rate": 4.5337376512730735e-07,
"loss": 0.5354,
"step": 1037
},
{
"epoch": 0.96,
"learning_rate": 4.334765510817629e-07,
"loss": 0.5312,
"step": 1038
},
{
"epoch": 0.96,
"learning_rate": 4.140239037099425e-07,
"loss": 0.5385,
"step": 1039
},
{
"epoch": 0.96,
"learning_rate": 3.950159974844214e-07,
"loss": 0.5303,
"step": 1040
},
{
"epoch": 0.96,
"learning_rate": 3.764530028888491e-07,
"loss": 0.535,
"step": 1041
},
{
"epoch": 0.96,
"learning_rate": 3.583350864164059e-07,
"loss": 0.5364,
"step": 1042
},
{
"epoch": 0.96,
"learning_rate": 3.4066241056834867e-07,
"loss": 0.5341,
"step": 1043
},
{
"epoch": 0.96,
"learning_rate": 3.234351338525232e-07,
"loss": 0.5274,
"step": 1044
},
{
"epoch": 0.97,
"learning_rate": 3.0665341078193745e-07,
"loss": 0.5356,
"step": 1045
},
{
"epoch": 0.97,
"learning_rate": 2.9031739187340143e-07,
"loss": 0.5386,
"step": 1046
},
{
"epoch": 0.97,
"learning_rate": 2.7442722364616204e-07,
"loss": 0.5409,
"step": 1047
},
{
"epoch": 0.97,
"learning_rate": 2.589830486205924e-07,
"loss": 0.5445,
"step": 1048
},
{
"epoch": 0.97,
"learning_rate": 2.4398500531689905e-07,
"loss": 0.53,
"step": 1049
},
{
"epoch": 0.97,
"learning_rate": 2.294332282539058e-07,
"loss": 0.521,
"step": 1050
},
{
"epoch": 0.97,
"learning_rate": 2.1532784794782712e-07,
"loss": 0.5363,
"step": 1051
},
{
"epoch": 0.97,
"learning_rate": 2.0166899091111358e-07,
"loss": 0.5388,
"step": 1052
},
{
"epoch": 0.97,
"learning_rate": 1.8845677965129705e-07,
"loss": 0.5275,
"step": 1053
},
{
"epoch": 0.97,
"learning_rate": 1.756913326699028e-07,
"loss": 0.5304,
"step": 1054
},
{
"epoch": 0.97,
"learning_rate": 1.633727644613947e-07,
"loss": 0.5298,
"step": 1055
},
{
"epoch": 0.98,
"learning_rate": 1.5150118551213177e-07,
"loss": 0.5218,
"step": 1056
},
{
"epoch": 0.98,
"learning_rate": 1.4007670229939652e-07,
"loss": 0.5285,
"step": 1057
},
{
"epoch": 0.98,
"learning_rate": 1.2909941729041254e-07,
"loss": 0.5298,
"step": 1058
},
{
"epoch": 0.98,
"learning_rate": 1.1856942894145629e-07,
"loss": 0.5236,
"step": 1059
},
{
"epoch": 0.98,
"learning_rate": 1.0848683169695784e-07,
"loss": 0.5294,
"step": 1060
},
{
"epoch": 0.98,
"learning_rate": 9.885171598864595e-08,
"loss": 0.529,
"step": 1061
},
{
"epoch": 0.98,
"learning_rate": 8.966416823475432e-08,
"loss": 0.5337,
"step": 1062
},
{
"epoch": 0.98,
"learning_rate": 8.092427083923882e-08,
"loss": 0.524,
"step": 1063
},
{
"epoch": 0.98,
"learning_rate": 7.263210219103923e-08,
"loss": 0.52,
"step": 1064
},
{
"epoch": 0.98,
"learning_rate": 6.478773666336868e-08,
"loss": 0.5322,
"step": 1065
},
{
"epoch": 0.98,
"learning_rate": 5.739124461305867e-08,
"loss": 0.5211,
"step": 1066
},
{
"epoch": 0.99,
"learning_rate": 5.044269237992616e-08,
"loss": 0.5251,
"step": 1067
},
{
"epoch": 0.99,
"learning_rate": 4.3942142286163003e-08,
"loss": 0.5209,
"step": 1068
},
{
"epoch": 0.99,
"learning_rate": 3.788965263580302e-08,
"loss": 0.5249,
"step": 1069
},
{
"epoch": 0.99,
"learning_rate": 3.228527771417245e-08,
"loss": 0.5251,
"step": 1070
},
{
"epoch": 0.99,
"learning_rate": 2.7129067787429187e-08,
"loss": 0.5361,
"step": 1071
},
{
"epoch": 0.99,
"learning_rate": 2.242106910208541e-08,
"loss": 0.5209,
"step": 1072
},
{
"epoch": 0.99,
"learning_rate": 1.8161323884613447e-08,
"loss": 0.5278,
"step": 1073
},
{
"epoch": 0.99,
"learning_rate": 1.4349870341051618e-08,
"loss": 0.532,
"step": 1074
},
{
"epoch": 0.99,
"learning_rate": 1.0986742656682313e-08,
"loss": 0.527,
"step": 1075
},
{
"epoch": 0.99,
"learning_rate": 8.071970995698896e-09,
"loss": 0.5281,
"step": 1076
},
{
"epoch": 0.99,
"learning_rate": 5.6055815009614656e-09,
"loss": 0.5187,
"step": 1077
},
{
"epoch": 1.0,
"learning_rate": 3.587596293735951e-09,
"loss": 0.5268,
"step": 1078
},
{
"epoch": 1.0,
"learning_rate": 2.0180334735164786e-09,
"loss": 0.5218,
"step": 1079
},
{
"epoch": 1.0,
"learning_rate": 8.969071178588362e-10,
"loss": 0.5411,
"step": 1080
},
{
"epoch": 1.0,
"learning_rate": 2.2422728224169577e-10,
"loss": 0.5225,
"step": 1081
},
{
"epoch": 1.0,
"learning_rate": 0.0,
"loss": 0.5255,
"step": 1082
},
{
"epoch": 1.0,
"step": 1082,
"total_flos": 0.0,
"train_loss": 0.6376458885709371,
"train_runtime": 117526.0956,
"train_samples_per_second": 18.866,
"train_steps_per_second": 0.009
}
],
"logging_steps": 1.0,
"max_steps": 1082,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}