{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.9952,
  "eval_steps": 500,
  "global_step": 1638,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "learning_rate": 4.0000000000000003e-07,
      "loss": 0.8079,
      "step": 1
    },
    {
      "epoch": 0.0,
      "learning_rate": 8.000000000000001e-07,
      "loss": 0.7828,
      "step": 2
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.2000000000000002e-06,
      "loss": 0.7845,
      "step": 3
    },
    {
      "epoch": 0.01,
      "learning_rate": 1.6000000000000001e-06,
      "loss": 0.7802,
      "step": 4
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.0000000000000003e-06,
      "loss": 0.7728,
      "step": 5
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.4000000000000003e-06,
      "loss": 0.7783,
      "step": 6
    },
    {
      "epoch": 0.01,
      "learning_rate": 2.8000000000000003e-06,
      "loss": 0.7707,
      "step": 7
    },
    {
      "epoch": 0.01,
      "learning_rate": 3.2000000000000003e-06,
      "loss": 0.7661,
      "step": 8
    },
    {
      "epoch": 0.02,
      "learning_rate": 3.6000000000000003e-06,
      "loss": 0.7169,
      "step": 9
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.000000000000001e-06,
      "loss": 0.7298,
      "step": 10
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.4e-06,
      "loss": 0.7293,
      "step": 11
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.800000000000001e-06,
      "loss": 0.6884,
      "step": 12
    },
    {
      "epoch": 0.02,
      "learning_rate": 5.2e-06,
      "loss": 0.7098,
      "step": 13
    },
    {
      "epoch": 0.03,
      "learning_rate": 5.600000000000001e-06,
      "loss": 0.694,
      "step": 14
    },
    {
      "epoch": 0.03,
      "learning_rate": 6e-06,
      "loss": 0.6775,
      "step": 15
    },
    {
      "epoch": 0.03,
      "learning_rate": 6.4000000000000006e-06,
      "loss": 0.7008,
      "step": 16
    },
    {
      "epoch": 0.03,
      "learning_rate": 6.800000000000001e-06,
      "loss": 0.6801,
      "step": 17
    },
    {
      "epoch": 0.03,
      "learning_rate": 7.2000000000000005e-06,
      "loss": 0.706,
      "step": 18
    },
    {
      "epoch": 0.03,
      "learning_rate": 7.600000000000001e-06,
      "loss": 0.6867,
      "step": 19
    },
    {
      "epoch": 0.04,
      "learning_rate": 8.000000000000001e-06,
      "loss": 0.67,
      "step": 20
    },
    {
      "epoch": 0.04,
      "learning_rate": 8.400000000000001e-06,
      "loss": 0.6414,
      "step": 21
    },
    {
      "epoch": 0.04,
      "learning_rate": 8.8e-06,
      "loss": 0.6347,
      "step": 22
    },
    {
      "epoch": 0.04,
      "learning_rate": 9.200000000000002e-06,
      "loss": 0.681,
      "step": 23
    },
    {
      "epoch": 0.04,
      "learning_rate": 9.600000000000001e-06,
      "loss": 0.6419,
      "step": 24
    },
    {
      "epoch": 0.05,
      "learning_rate": 1e-05,
      "loss": 0.6479,
      "step": 25
    },
    {
      "epoch": 0.05,
      "learning_rate": 1.04e-05,
      "loss": 0.677,
      "step": 26
    },
    {
      "epoch": 0.05,
      "learning_rate": 1.0800000000000002e-05,
      "loss": 0.6524,
      "step": 27
    },
    {
      "epoch": 0.05,
      "learning_rate": 1.1200000000000001e-05,
      "loss": 0.6438,
      "step": 28
    },
    {
      "epoch": 0.05,
      "learning_rate": 1.16e-05,
      "loss": 0.6681,
      "step": 29
    },
    {
      "epoch": 0.05,
      "learning_rate": 1.2e-05,
      "loss": 0.6418,
      "step": 30
    },
    {
      "epoch": 0.06,
      "learning_rate": 1.2400000000000002e-05,
      "loss": 0.6767,
      "step": 31
    },
    {
      "epoch": 0.06,
      "learning_rate": 1.2800000000000001e-05,
      "loss": 0.6286,
      "step": 32
    },
    {
      "epoch": 0.06,
      "learning_rate": 1.3200000000000002e-05,
      "loss": 0.6145,
      "step": 33
    },
    {
      "epoch": 0.06,
      "learning_rate": 1.3600000000000002e-05,
      "loss": 0.6542,
      "step": 34
    },
    {
      "epoch": 0.06,
      "learning_rate": 1.4e-05,
      "loss": 0.6431,
      "step": 35
    },
    {
      "epoch": 0.07,
      "learning_rate": 1.4400000000000001e-05,
      "loss": 0.6634,
      "step": 36
    },
    {
      "epoch": 0.07,
      "learning_rate": 1.48e-05,
      "loss": 0.6329,
      "step": 37
    },
    {
      "epoch": 0.07,
      "learning_rate": 1.5200000000000002e-05,
      "loss": 0.6286,
      "step": 38
    },
    {
      "epoch": 0.07,
      "learning_rate": 1.5600000000000003e-05,
      "loss": 0.6587,
      "step": 39
    },
    {
      "epoch": 0.07,
      "learning_rate": 1.6000000000000003e-05,
      "loss": 0.6214,
      "step": 40
    },
    {
      "epoch": 0.07,
      "learning_rate": 1.64e-05,
      "loss": 0.6435,
      "step": 41
    },
    {
      "epoch": 0.08,
      "learning_rate": 1.6800000000000002e-05,
      "loss": 0.6204,
      "step": 42
    },
    {
      "epoch": 0.08,
      "learning_rate": 1.72e-05,
      "loss": 0.6182,
      "step": 43
    },
    {
      "epoch": 0.08,
      "learning_rate": 1.76e-05,
      "loss": 0.6466,
      "step": 44
    },
    {
      "epoch": 0.08,
      "learning_rate": 1.8e-05,
      "loss": 0.6283,
      "step": 45
    },
    {
      "epoch": 0.08,
      "learning_rate": 1.8400000000000003e-05,
      "loss": 0.6419,
      "step": 46
    },
    {
      "epoch": 0.09,
      "learning_rate": 1.88e-05,
      "loss": 0.5917,
      "step": 47
    },
    {
      "epoch": 0.09,
      "learning_rate": 1.9200000000000003e-05,
      "loss": 0.6473,
      "step": 48
    },
    {
      "epoch": 0.09,
      "learning_rate": 1.9600000000000002e-05,
      "loss": 0.5984,
      "step": 49
    },
    {
      "epoch": 0.09,
      "learning_rate": 2e-05,
      "loss": 0.6266,
      "step": 50
    },
    {
      "epoch": 0.09,
      "learning_rate": 1.9999980431000962e-05,
      "loss": 0.592,
      "step": 51
    },
    {
      "epoch": 0.1,
      "learning_rate": 1.999992172408044e-05,
      "loss": 0.6053,
      "step": 52
    },
    {
      "epoch": 0.1,
      "learning_rate": 1.999982387946819e-05,
      "loss": 0.6411,
      "step": 53
    },
    {
      "epoch": 0.1,
      "learning_rate": 1.999968689754717e-05,
      "loss": 0.6284,
      "step": 54
    },
    {
      "epoch": 0.1,
      "learning_rate": 1.999951077885349e-05,
      "loss": 0.6482,
      "step": 55
    },
    {
      "epoch": 0.1,
      "learning_rate": 1.9999295524076455e-05,
      "loss": 0.6372,
      "step": 56
    },
    {
      "epoch": 0.1,
      "learning_rate": 1.9999041134058514e-05,
      "loss": 0.6351,
      "step": 57
    },
    {
      "epoch": 0.11,
      "learning_rate": 1.999874760979531e-05,
      "loss": 0.634,
      "step": 58
    },
    {
      "epoch": 0.11,
      "learning_rate": 1.999841495243563e-05,
      "loss": 0.6034,
      "step": 59
    },
    {
      "epoch": 0.11,
      "learning_rate": 1.9998043163281435e-05,
      "loss": 0.6062,
      "step": 60
    },
    {
      "epoch": 0.11,
      "learning_rate": 1.999763224378783e-05,
      "loss": 0.6315,
      "step": 61
    },
    {
      "epoch": 0.11,
      "learning_rate": 1.999718219556307e-05,
      "loss": 0.6283,
      "step": 62
    },
    {
      "epoch": 0.12,
      "learning_rate": 1.999669302036856e-05,
      "loss": 0.626,
      "step": 63
    },
    {
      "epoch": 0.12,
      "learning_rate": 1.999616472011883e-05,
      "loss": 0.6246,
      "step": 64
    },
    {
      "epoch": 0.12,
      "learning_rate": 1.9995597296881542e-05,
      "loss": 0.6058,
      "step": 65
    },
    {
      "epoch": 0.12,
      "learning_rate": 1.9994990752877473e-05,
      "loss": 0.5862,
      "step": 66
    },
    {
      "epoch": 0.12,
      "learning_rate": 1.999434509048052e-05,
      "loss": 0.6235,
      "step": 67
    },
    {
      "epoch": 0.12,
      "learning_rate": 1.9993660312217674e-05,
      "loss": 0.5882,
      "step": 68
    },
    {
      "epoch": 0.13,
      "learning_rate": 1.999293642076902e-05,
      "loss": 0.633,
      "step": 69
    },
    {
      "epoch": 0.13,
      "learning_rate": 1.999217341896772e-05,
      "loss": 0.5946,
      "step": 70
    },
    {
      "epoch": 0.13,
      "learning_rate": 1.9991371309800024e-05,
      "loss": 0.6311,
      "step": 71
    },
    {
      "epoch": 0.13,
      "learning_rate": 1.999053009640521e-05,
      "loss": 0.6261,
      "step": 72
    },
    {
      "epoch": 0.13,
      "learning_rate": 1.998964978207563e-05,
      "loss": 0.6138,
      "step": 73
    },
    {
      "epoch": 0.14,
      "learning_rate": 1.9988730370256654e-05,
      "loss": 0.6095,
      "step": 74
    },
    {
      "epoch": 0.14,
      "learning_rate": 1.9987771864546677e-05,
      "loss": 0.6492,
      "step": 75
    },
    {
      "epoch": 0.14,
      "learning_rate": 1.9986774268697098e-05,
      "loss": 0.586,
      "step": 76
    },
    {
      "epoch": 0.14,
      "learning_rate": 1.9985737586612307e-05,
      "loss": 0.5801,
      "step": 77
    },
    {
      "epoch": 0.14,
      "learning_rate": 1.998466182234967e-05,
      "loss": 0.6112,
      "step": 78
    },
    {
      "epoch": 0.14,
      "learning_rate": 1.998354698011951e-05,
      "loss": 0.6219,
      "step": 79
    },
    {
      "epoch": 0.15,
      "learning_rate": 1.9982393064285106e-05,
      "loss": 0.6038,
      "step": 80
    },
    {
      "epoch": 0.15,
      "learning_rate": 1.9981200079362645e-05,
      "loss": 0.6138,
      "step": 81
    },
    {
      "epoch": 0.15,
      "learning_rate": 1.997996803002123e-05,
      "loss": 0.6318,
      "step": 82
    },
    {
      "epoch": 0.15,
      "learning_rate": 1.997869692108286e-05,
      "loss": 0.6058,
      "step": 83
    },
    {
      "epoch": 0.15,
      "learning_rate": 1.99773867575224e-05,
      "loss": 0.6175,
      "step": 84
    },
    {
      "epoch": 0.16,
      "learning_rate": 1.997603754446757e-05,
      "loss": 0.5948,
      "step": 85
    },
    {
      "epoch": 0.16,
      "learning_rate": 1.997464928719892e-05,
      "loss": 0.5982,
      "step": 86
    },
    {
      "epoch": 0.16,
      "learning_rate": 1.9973221991149804e-05,
      "loss": 0.6268,
      "step": 87
    },
    {
      "epoch": 0.16,
      "learning_rate": 1.9971755661906376e-05,
      "loss": 0.5966,
      "step": 88
    },
    {
      "epoch": 0.16,
      "learning_rate": 1.9970250305207557e-05,
      "loss": 0.5967,
      "step": 89
    },
    {
      "epoch": 0.16,
      "learning_rate": 1.9968705926945015e-05,
      "loss": 0.6143,
      "step": 90
    },
    {
      "epoch": 0.17,
      "learning_rate": 1.996712253316313e-05,
      "loss": 0.6139,
      "step": 91
    },
    {
      "epoch": 0.17,
      "learning_rate": 1.9965500130058996e-05,
      "loss": 0.5995,
      "step": 92
    },
    {
      "epoch": 0.17,
      "learning_rate": 1.9963838723982368e-05,
      "loss": 0.5798,
      "step": 93
    },
    {
      "epoch": 0.17,
      "learning_rate": 1.9962138321435658e-05,
      "loss": 0.607,
      "step": 94
    },
    {
      "epoch": 0.17,
      "learning_rate": 1.99603989290739e-05,
      "loss": 0.6157,
      "step": 95
    },
    {
      "epoch": 0.18,
      "learning_rate": 1.9958620553704737e-05,
      "loss": 0.643,
      "step": 96
    },
    {
      "epoch": 0.18,
      "learning_rate": 1.9956803202288358e-05,
      "loss": 0.6246,
      "step": 97
    },
    {
      "epoch": 0.18,
      "learning_rate": 1.9954946881937525e-05,
      "loss": 0.6213,
      "step": 98
    },
    {
      "epoch": 0.18,
      "learning_rate": 1.99530515999175e-05,
      "loss": 0.5969,
      "step": 99
    },
    {
      "epoch": 0.18,
      "learning_rate": 1.9951117363646036e-05,
      "loss": 0.6109,
      "step": 100
    },
    {
      "epoch": 0.18,
      "learning_rate": 1.994914418069335e-05,
      "loss": 0.6268,
      "step": 101
    },
    {
      "epoch": 0.19,
      "learning_rate": 1.9947132058782082e-05,
      "loss": 0.5923,
      "step": 102
    },
    {
      "epoch": 0.19,
      "learning_rate": 1.994508100578728e-05,
      "loss": 0.6107,
      "step": 103
    },
    {
      "epoch": 0.19,
      "learning_rate": 1.9942991029736346e-05,
      "loss": 0.6048,
      "step": 104
    },
    {
      "epoch": 0.19,
      "learning_rate": 1.9940862138809036e-05,
      "loss": 0.6215,
      "step": 105
    },
    {
      "epoch": 0.19,
      "learning_rate": 1.9938694341337398e-05,
      "loss": 0.614,
      "step": 106
    },
    {
      "epoch": 0.2,
      "learning_rate": 1.993648764580576e-05,
      "loss": 0.5971,
      "step": 107
    },
    {
      "epoch": 0.2,
      "learning_rate": 1.993424206085068e-05,
      "loss": 0.6248,
      "step": 108
    },
    {
      "epoch": 0.2,
      "learning_rate": 1.993195759526094e-05,
      "loss": 0.612,
      "step": 109
    },
    {
      "epoch": 0.2,
      "learning_rate": 1.9929634257977467e-05,
      "loss": 0.6018,
      "step": 110
    },
    {
      "epoch": 0.2,
      "learning_rate": 1.992727205809335e-05,
      "loss": 0.5908,
      "step": 111
    },
    {
      "epoch": 0.2,
      "learning_rate": 1.992487100485376e-05,
      "loss": 0.6068,
      "step": 112
    },
    {
      "epoch": 0.21,
      "learning_rate": 1.9922431107655942e-05,
      "loss": 0.6192,
      "step": 113
    },
    {
      "epoch": 0.21,
      "learning_rate": 1.991995237604916e-05,
      "loss": 0.5977,
      "step": 114
    },
    {
      "epoch": 0.21,
      "learning_rate": 1.991743481973468e-05,
      "loss": 0.6301,
      "step": 115
    },
    {
      "epoch": 0.21,
      "learning_rate": 1.9914878448565712e-05,
      "loss": 0.604,
      "step": 116
    },
    {
      "epoch": 0.21,
      "learning_rate": 1.991228327254738e-05,
      "loss": 0.5903,
      "step": 117
    },
    {
      "epoch": 0.22,
      "learning_rate": 1.9909649301836675e-05,
      "loss": 0.5724,
      "step": 118
    },
    {
      "epoch": 0.22,
      "learning_rate": 1.9906976546742443e-05,
      "loss": 0.5983,
      "step": 119
    },
    {
      "epoch": 0.22,
      "learning_rate": 1.990426501772531e-05,
      "loss": 0.6081,
      "step": 120
    },
    {
      "epoch": 0.22,
      "learning_rate": 1.990151472539765e-05,
      "loss": 0.6154,
      "step": 121
    },
    {
      "epoch": 0.22,
      "learning_rate": 1.989872568052357e-05,
      "loss": 0.6173,
      "step": 122
    },
    {
      "epoch": 0.22,
      "learning_rate": 1.9895897894018822e-05,
      "loss": 0.6004,
      "step": 123
    },
    {
      "epoch": 0.23,
      "learning_rate": 1.98930313769508e-05,
      "loss": 0.6188,
      "step": 124
    },
    {
      "epoch": 0.23,
      "learning_rate": 1.9890126140538476e-05,
      "loss": 0.5958,
      "step": 125
    },
    {
      "epoch": 0.23,
      "learning_rate": 1.988718219615237e-05,
      "loss": 0.6109,
      "step": 126
    },
    {
      "epoch": 0.23,
      "learning_rate": 1.9884199555314484e-05,
      "loss": 0.6114,
      "step": 127
    },
    {
      "epoch": 0.23,
      "learning_rate": 1.9881178229698278e-05,
      "loss": 0.5804,
      "step": 128
    },
    {
      "epoch": 0.24,
      "learning_rate": 1.987811823112862e-05,
      "loss": 0.6244,
      "step": 129
    },
    {
      "epoch": 0.24,
      "learning_rate": 1.987501957158173e-05,
      "loss": 0.5762,
      "step": 130
    },
    {
      "epoch": 0.24,
      "learning_rate": 1.9871882263185137e-05,
      "loss": 0.5868,
      "step": 131
    },
    {
      "epoch": 0.24,
      "learning_rate": 1.9868706318217645e-05,
      "loss": 0.6118,
      "step": 132
    },
    {
      "epoch": 0.24,
      "learning_rate": 1.986549174910926e-05,
      "loss": 0.6156,
      "step": 133
    },
    {
      "epoch": 0.25,
      "learning_rate": 1.9862238568441166e-05,
      "loss": 0.6046,
      "step": 134
    },
    {
      "epoch": 0.25,
      "learning_rate": 1.985894678894566e-05,
      "loss": 0.6137,
      "step": 135
    },
    {
      "epoch": 0.25,
      "learning_rate": 1.9855616423506106e-05,
      "loss": 0.5729,
      "step": 136
    },
    {
      "epoch": 0.25,
      "learning_rate": 1.9852247485156892e-05,
      "loss": 0.5827,
      "step": 137
    },
    {
      "epoch": 0.25,
      "learning_rate": 1.9848839987083366e-05,
      "loss": 0.6024,
      "step": 138
    },
    {
      "epoch": 0.25,
      "learning_rate": 1.9845393942621793e-05,
      "loss": 0.6106,
      "step": 139
    },
    {
      "epoch": 0.26,
      "learning_rate": 1.98419093652593e-05,
      "loss": 0.5903,
      "step": 140
    },
    {
      "epoch": 0.26,
      "learning_rate": 1.983838626863383e-05,
      "loss": 0.5913,
      "step": 141
    },
    {
      "epoch": 0.26,
      "learning_rate": 1.9834824666534073e-05,
      "loss": 0.5789,
      "step": 142
    },
    {
      "epoch": 0.26,
      "learning_rate": 1.9831224572899424e-05,
      "loss": 0.5984,
      "step": 143
    },
    {
      "epoch": 0.26,
      "learning_rate": 1.9827586001819933e-05,
      "loss": 0.6024,
      "step": 144
    },
    {
      "epoch": 0.27,
      "learning_rate": 1.9823908967536242e-05,
      "loss": 0.6001,
      "step": 145
    },
    {
      "epoch": 0.27,
      "learning_rate": 1.9820193484439523e-05,
      "loss": 0.5916,
      "step": 146
    },
    {
      "epoch": 0.27,
      "learning_rate": 1.981643956707143e-05,
      "loss": 0.5942,
      "step": 147
    },
    {
      "epoch": 0.27,
      "learning_rate": 1.981264723012405e-05,
      "loss": 0.5926,
      "step": 148
    },
    {
      "epoch": 0.27,
      "learning_rate": 1.9808816488439834e-05,
      "loss": 0.5747,
      "step": 149
    },
    {
      "epoch": 0.27,
      "learning_rate": 1.9804947357011525e-05,
      "loss": 0.5834,
      "step": 150
    },
    {
      "epoch": 0.28,
      "learning_rate": 1.980103985098214e-05,
      "loss": 0.6075,
      "step": 151
    },
    {
      "epoch": 0.28,
      "learning_rate": 1.9797093985644866e-05,
      "loss": 0.6352,
      "step": 152
    },
    {
      "epoch": 0.28,
      "learning_rate": 1.979310977644304e-05,
      "loss": 0.5982,
      "step": 153
    },
    {
      "epoch": 0.28,
      "learning_rate": 1.9789087238970056e-05,
      "loss": 0.6055,
      "step": 154
    },
    {
      "epoch": 0.28,
      "learning_rate": 1.9785026388969316e-05,
      "loss": 0.5945,
      "step": 155
    },
    {
      "epoch": 0.29,
      "learning_rate": 1.978092724233418e-05,
      "loss": 0.6098,
      "step": 156
    },
    {
      "epoch": 0.29,
      "learning_rate": 1.9776789815107882e-05,
      "loss": 0.6038,
      "step": 157
    },
    {
      "epoch": 0.29,
      "learning_rate": 1.9772614123483488e-05,
      "loss": 0.6047,
      "step": 158
    },
    {
      "epoch": 0.29,
      "learning_rate": 1.9768400183803814e-05,
      "loss": 0.5774,
      "step": 159
    },
    {
      "epoch": 0.29,
      "learning_rate": 1.9764148012561384e-05,
      "loss": 0.6083,
      "step": 160
    },
    {
      "epoch": 0.29,
      "learning_rate": 1.9759857626398336e-05,
      "loss": 0.6183,
      "step": 161
    },
    {
      "epoch": 0.3,
      "learning_rate": 1.9755529042106394e-05,
      "loss": 0.5771,
      "step": 162
    },
    {
      "epoch": 0.3,
      "learning_rate": 1.975116227662676e-05,
      "loss": 0.6356,
      "step": 163
    },
    {
      "epoch": 0.3,
      "learning_rate": 1.974675734705008e-05,
      "loss": 0.5723,
      "step": 164
    },
    {
      "epoch": 0.3,
      "learning_rate": 1.9742314270616375e-05,
      "loss": 0.584,
      "step": 165
    },
    {
      "epoch": 0.3,
      "learning_rate": 1.973783306471495e-05,
      "loss": 0.5755,
      "step": 166
    },
    {
      "epoch": 0.31,
      "learning_rate": 1.973331374688435e-05,
      "loss": 0.6151,
      "step": 167
    },
    {
      "epoch": 0.31,
      "learning_rate": 1.972875633481228e-05,
      "loss": 0.5881,
      "step": 168
    },
    {
      "epoch": 0.31,
      "learning_rate": 1.9724160846335535e-05,
      "loss": 0.5915,
      "step": 169
    },
    {
      "epoch": 0.31,
      "learning_rate": 1.9719527299439944e-05,
      "loss": 0.6112,
      "step": 170
    },
    {
      "epoch": 0.31,
      "learning_rate": 1.9714855712260275e-05,
      "loss": 0.5792,
      "step": 171
    },
    {
      "epoch": 0.31,
      "learning_rate": 1.971014610308019e-05,
      "loss": 0.5869,
      "step": 172
    },
    {
      "epoch": 0.32,
      "learning_rate": 1.9705398490332154e-05,
      "loss": 0.5925,
      "step": 173
    },
    {
      "epoch": 0.32,
      "learning_rate": 1.9700612892597376e-05,
      "loss": 0.5889,
      "step": 174
    },
    {
      "epoch": 0.32,
      "learning_rate": 1.9695789328605722e-05,
      "loss": 0.5877,
      "step": 175
    },
    {
      "epoch": 0.32,
      "learning_rate": 1.969092781723566e-05,
      "loss": 0.5891,
      "step": 176
    },
    {
      "epoch": 0.32,
      "learning_rate": 1.968602837751417e-05,
      "loss": 0.6091,
      "step": 177
    },
    {
      "epoch": 0.33,
      "learning_rate": 1.968109102861668e-05,
      "loss": 0.6106,
      "step": 178
    },
    {
      "epoch": 0.33,
      "learning_rate": 1.9676115789866982e-05,
      "loss": 0.616,
      "step": 179
    },
    {
      "epoch": 0.33,
      "learning_rate": 1.9671102680737172e-05,
      "loss": 0.6002,
      "step": 180
    },
    {
      "epoch": 0.33,
      "learning_rate": 1.9666051720847546e-05,
      "loss": 0.6018,
      "step": 181
    },
    {
      "epoch": 0.33,
      "learning_rate": 1.9660962929966553e-05,
      "loss": 0.5887,
      "step": 182
    },
    {
      "epoch": 0.33,
      "learning_rate": 1.9655836328010708e-05,
      "loss": 0.6081,
      "step": 183
    },
    {
      "epoch": 0.34,
      "learning_rate": 1.9650671935044497e-05,
      "loss": 0.5922,
      "step": 184
    },
    {
      "epoch": 0.34,
      "learning_rate": 1.9645469771280326e-05,
      "loss": 0.6012,
      "step": 185
    },
    {
      "epoch": 0.34,
      "learning_rate": 1.9640229857078417e-05,
      "loss": 0.6147,
      "step": 186
    },
    {
      "epoch": 0.34,
      "learning_rate": 1.963495221294675e-05,
      "loss": 0.5961,
      "step": 187
    },
    {
      "epoch": 0.34,
      "learning_rate": 1.9629636859540962e-05,
      "loss": 0.6114,
      "step": 188
    },
    {
      "epoch": 0.35,
      "learning_rate": 1.9624283817664287e-05,
      "loss": 0.558,
      "step": 189
    },
    {
      "epoch": 0.35,
      "learning_rate": 1.9618893108267457e-05,
      "loss": 0.5626,
      "step": 190
    },
    {
      "epoch": 0.35,
      "learning_rate": 1.961346475244863e-05,
      "loss": 0.5684,
      "step": 191
    },
    {
      "epoch": 0.35,
      "learning_rate": 1.9607998771453306e-05,
      "loss": 0.5819,
      "step": 192
    },
    {
      "epoch": 0.35,
      "learning_rate": 1.9602495186674237e-05,
      "loss": 0.5933,
      "step": 193
    },
    {
      "epoch": 0.35,
      "learning_rate": 1.9596954019651354e-05,
      "loss": 0.5997,
      "step": 194
    },
    {
      "epoch": 0.36,
      "learning_rate": 1.9591375292071677e-05,
      "loss": 0.5898,
      "step": 195
    },
    {
      "epoch": 0.36,
      "learning_rate": 1.9585759025769225e-05,
      "loss": 0.6317,
      "step": 196
    },
    {
      "epoch": 0.36,
      "learning_rate": 1.958010524272494e-05,
      "loss": 0.5999,
      "step": 197
    },
    {
      "epoch": 0.36,
      "learning_rate": 1.95744139650666e-05,
      "loss": 0.5931,
      "step": 198
    },
    {
      "epoch": 0.36,
      "learning_rate": 1.9568685215068726e-05,
      "loss": 0.5992,
      "step": 199
    },
    {
      "epoch": 0.37,
      "learning_rate": 1.95629190151525e-05,
      "loss": 0.6168,
      "step": 200
    },
    {
      "epoch": 0.37,
      "learning_rate": 1.9557115387885667e-05,
      "loss": 0.5959,
      "step": 201
    },
    {
      "epoch": 0.37,
      "learning_rate": 1.955127435598247e-05,
      "loss": 0.5897,
      "step": 202
    },
    {
      "epoch": 0.37,
      "learning_rate": 1.9545395942303538e-05,
      "loss": 0.5889,
      "step": 203
    },
    {
      "epoch": 0.37,
      "learning_rate": 1.95394801698558e-05,
      "loss": 0.5823,
      "step": 204
    },
    {
      "epoch": 0.37,
      "learning_rate": 1.953352706179241e-05,
      "loss": 0.6082,
      "step": 205
    },
    {
      "epoch": 0.38,
      "learning_rate": 1.9527536641412637e-05,
      "loss": 0.5797,
      "step": 206
    },
    {
      "epoch": 0.38,
      "learning_rate": 1.9521508932161796e-05,
      "loss": 0.5849,
      "step": 207
    },
    {
      "epoch": 0.38,
      "learning_rate": 1.951544395763112e-05,
      "loss": 0.5984,
      "step": 208
    },
    {
      "epoch": 0.38,
      "learning_rate": 1.9509341741557716e-05,
      "loss": 0.5976,
      "step": 209
    },
    {
      "epoch": 0.38,
      "learning_rate": 1.9503202307824433e-05,
      "loss": 0.5769,
      "step": 210
    },
    {
      "epoch": 0.39,
      "learning_rate": 1.9497025680459786e-05,
      "loss": 0.5792,
      "step": 211
    },
    {
      "epoch": 0.39,
      "learning_rate": 1.949081188363786e-05,
      "loss": 0.5952,
      "step": 212
    },
    {
      "epoch": 0.39,
      "learning_rate": 1.9484560941678207e-05,
      "loss": 0.5986,
      "step": 213
    },
    {
      "epoch": 0.39,
      "learning_rate": 1.9478272879045764e-05,
      "loss": 0.5843,
      "step": 214
    },
    {
      "epoch": 0.39,
      "learning_rate": 1.947194772035075e-05,
      "loss": 0.5933,
      "step": 215
    },
    {
      "epoch": 0.39,
      "learning_rate": 1.9465585490348574e-05,
      "loss": 0.5783,
      "step": 216
    },
    {
      "epoch": 0.4,
      "learning_rate": 1.9459186213939723e-05,
      "loss": 0.5939,
      "step": 217
    },
    {
      "epoch": 0.4,
      "learning_rate": 1.945274991616969e-05,
      "loss": 0.597,
      "step": 218
    },
    {
      "epoch": 0.4,
      "learning_rate": 1.944627662222885e-05,
      "loss": 0.5597,
      "step": 219
    },
    {
      "epoch": 0.4,
      "learning_rate": 1.9439766357452386e-05,
      "loss": 0.5595,
      "step": 220
    },
    {
      "epoch": 0.4,
      "learning_rate": 1.9433219147320166e-05,
      "loss": 0.608,
      "step": 221
    },
    {
      "epoch": 0.41,
      "learning_rate": 1.942663501745666e-05,
      "loss": 0.6106,
      "step": 222
    },
    {
      "epoch": 0.41,
      "learning_rate": 1.9420013993630843e-05,
      "loss": 0.5993,
      "step": 223
    },
    {
      "epoch": 0.41,
      "learning_rate": 1.9413356101756063e-05,
      "loss": 0.5813,
      "step": 224
    },
    {
      "epoch": 0.41,
      "learning_rate": 1.940666136788999e-05,
      "loss": 0.5912,
      "step": 225
    },
    {
      "epoch": 0.41,
      "learning_rate": 1.9399929818234457e-05,
      "loss": 0.581,
      "step": 226
    },
    {
      "epoch": 0.42,
      "learning_rate": 1.939316147913541e-05,
      "loss": 0.5803,
      "step": 227
    },
    {
      "epoch": 0.42,
      "learning_rate": 1.9386356377082776e-05,
      "loss": 0.5895,
      "step": 228
    },
    {
      "epoch": 0.42,
      "learning_rate": 1.937951453871036e-05,
      "loss": 0.5689,
      "step": 229
    },
    {
      "epoch": 0.42,
      "learning_rate": 1.9372635990795744e-05,
      "loss": 0.6018,
      "step": 230
    },
    {
      "epoch": 0.42,
      "learning_rate": 1.9365720760260193e-05,
      "loss": 0.6061,
      "step": 231
    },
    {
      "epoch": 0.42,
      "learning_rate": 1.935876887416853e-05,
      "loss": 0.5929,
      "step": 232
    },
    {
      "epoch": 0.43,
      "learning_rate": 1.9351780359729047e-05,
      "loss": 0.5928,
      "step": 233
    },
    {
      "epoch": 0.43,
      "learning_rate": 1.934475524429339e-05,
      "loss": 0.587,
      "step": 234
    },
    {
      "epoch": 0.43,
      "learning_rate": 1.9337693555356458e-05,
      "loss": 0.6007,
      "step": 235
    },
    {
      "epoch": 0.43,
      "learning_rate": 1.9330595320556286e-05,
      "loss": 0.5837,
      "step": 236
    },
    {
      "epoch": 0.43,
      "learning_rate": 1.932346056767394e-05,
      "loss": 0.5752,
      "step": 237
    },
    {
      "epoch": 0.44,
      "learning_rate": 1.931628932463342e-05,
      "loss": 0.6123,
      "step": 238
    },
    {
      "epoch": 0.44,
      "learning_rate": 1.9309081619501533e-05,
      "loss": 0.5801,
      "step": 239
    },
    {
      "epoch": 0.44,
      "learning_rate": 1.9301837480487794e-05,
      "loss": 0.596,
      "step": 240
    },
    {
      "epoch": 0.44,
      "learning_rate": 1.929455693594431e-05,
      "loss": 0.6054,
      "step": 241
    },
    {
      "epoch": 0.44,
      "learning_rate": 1.9287240014365682e-05,
      "loss": 0.5945,
      "step": 242
    },
    {
      "epoch": 0.44,
      "learning_rate": 1.9279886744388875e-05,
      "loss": 0.5767,
      "step": 243
    },
    {
      "epoch": 0.45,
      "learning_rate": 1.9272497154793107e-05,
      "loss": 0.5914,
      "step": 244
    },
    {
      "epoch": 0.45,
      "learning_rate": 1.926507127449976e-05,
      "loss": 0.6313,
      "step": 245
    },
    {
      "epoch": 0.45,
      "learning_rate": 1.925760913257224e-05,
      "loss": 0.5949,
      "step": 246
    },
    {
      "epoch": 0.45,
      "learning_rate": 1.9250110758215882e-05,
      "loss": 0.5606,
      "step": 247
    },
    {
      "epoch": 0.45,
      "learning_rate": 1.9242576180777816e-05,
      "loss": 0.5818,
      "step": 248
    },
    {
      "epoch": 0.46,
      "learning_rate": 1.923500542974687e-05,
      "loss": 0.5786,
      "step": 249
    },
    {
      "epoch": 0.46,
      "learning_rate": 1.922739853475345e-05,
      "loss": 0.5931,
      "step": 250
    },
    {
      "epoch": 0.46,
      "learning_rate": 1.9219755525569418e-05,
      "loss": 0.5688,
      "step": 251
    },
    {
      "epoch": 0.46,
      "learning_rate": 1.921207643210799e-05,
      "loss": 0.6043,
      "step": 252
    },
    {
      "epoch": 0.46,
      "learning_rate": 1.9204361284423586e-05,
      "loss": 0.6096,
      "step": 253
    },
    {
      "epoch": 0.46,
      "learning_rate": 1.9196610112711763e-05,
      "loss": 0.5528,
      "step": 254
    },
    {
      "epoch": 0.47,
      "learning_rate": 1.9188822947309046e-05,
      "loss": 0.5853,
      "step": 255
    },
    {
      "epoch": 0.47,
      "learning_rate": 1.918099981869285e-05,
      "loss": 0.5707,
      "step": 256
    },
    {
      "epoch": 0.47,
      "learning_rate": 1.9173140757481325e-05,
      "loss": 0.5716,
      "step": 257
    },
    {
      "epoch": 0.47,
      "learning_rate": 1.9165245794433272e-05,
      "loss": 0.6002,
      "step": 258
    },
    {
      "epoch": 0.47,
      "learning_rate": 1.9157314960447988e-05,
      "loss": 0.5648,
      "step": 259
    },
    {
      "epoch": 0.48,
      "learning_rate": 1.9149348286565176e-05,
      "loss": 0.5971,
      "step": 260
    },
    {
      "epoch": 0.48,
      "learning_rate": 1.91413458039648e-05,
      "loss": 0.5905,
      "step": 261
    },
    {
      "epoch": 0.48,
      "learning_rate": 1.9133307543966976e-05,
      "loss": 0.5786,
      "step": 262
    },
    {
      "epoch": 0.48,
      "learning_rate": 1.9125233538031836e-05,
      "loss": 0.5853,
      "step": 263
    },
    {
      "epoch": 0.48,
      "learning_rate": 1.9117123817759438e-05,
      "loss": 0.6005,
      "step": 264
    },
    {
      "epoch": 0.48,
      "learning_rate": 1.9108978414889595e-05,
      "loss": 0.569,
      "step": 265
    },
    {
      "epoch": 0.49,
      "learning_rate": 1.910079736130178e-05,
      "loss": 0.6134,
      "step": 266
    },
    {
      "epoch": 0.49,
      "learning_rate": 1.9092580689015007e-05,
      "loss": 0.5816,
      "step": 267
    },
    {
      "epoch": 0.49,
      "learning_rate": 1.9084328430187677e-05,
      "loss": 0.5978,
      "step": 268
    },
    {
      "epoch": 0.49,
      "learning_rate": 1.907604061711749e-05,
      "loss": 0.5856,
      "step": 269
    },
    {
      "epoch": 0.49,
      "learning_rate": 1.906771728224128e-05,
      "loss": 0.5805,
      "step": 270
    },
    {
      "epoch": 0.5,
      "learning_rate": 1.9059358458134914e-05,
      "loss": 0.5881,
      "step": 271
    },
    {
      "epoch": 0.5,
      "learning_rate": 1.9050964177513158e-05,
      "loss": 0.5941,
      "step": 272
    },
    {
      "epoch": 0.5,
      "learning_rate": 1.9042534473229544e-05,
      "loss": 0.5932,
      "step": 273
    },
    {
      "epoch": 0.5,
      "learning_rate": 1.903406937827625e-05,
      "loss": 0.5798,
      "step": 274
    },
    {
      "epoch": 0.5,
      "learning_rate": 1.902556892578396e-05,
      "loss": 0.5978,
      "step": 275
    },
    {
      "epoch": 0.5,
      "learning_rate": 1.9017033149021747e-05,
      "loss": 0.5805,
      "step": 276
    },
    {
      "epoch": 0.51,
      "learning_rate": 1.9008462081396924e-05,
      "loss": 0.5915,
      "step": 277
    },
    {
      "epoch": 0.51,
      "learning_rate": 1.8999855756454945e-05,
      "loss": 0.5872,
      "step": 278
    },
    {
      "epoch": 0.51,
      "learning_rate": 1.8991214207879233e-05,
      "loss": 0.564,
      "step": 279
    },
    {
      "epoch": 0.51,
      "learning_rate": 1.898253746949109e-05,
      "loss": 0.571,
      "step": 280
    },
    {
      "epoch": 0.51,
      "learning_rate": 1.897382557524952e-05,
      "loss": 0.5675,
      "step": 281
    },
    {
      "epoch": 0.52,
      "learning_rate": 1.8965078559251144e-05,
      "loss": 0.5753,
      "step": 282
    },
    {
      "epoch": 0.52,
      "learning_rate": 1.8956296455730027e-05,
      "loss": 0.5819,
      "step": 283
    },
    {
      "epoch": 0.52,
      "learning_rate": 1.8947479299057562e-05,
      "loss": 0.5728,
      "step": 284
    },
    {
      "epoch": 0.52,
      "learning_rate": 1.893862712374234e-05,
      "loss": 0.5565,
      "step": 285
    },
    {
      "epoch": 0.52,
      "learning_rate": 1.8929739964430002e-05,
      "loss": 0.5988,
      "step": 286
    },
    {
      "epoch": 0.52,
      "learning_rate": 1.8920817855903103e-05,
      "loss": 0.5909,
      "step": 287
    },
    {
      "epoch": 0.53,
      "learning_rate": 1.8911860833081e-05,
      "loss": 0.5718,
      "step": 288
    },
    {
      "epoch": 0.53,
      "learning_rate": 1.8902868931019683e-05,
      "loss": 0.5807,
      "step": 289
    },
    {
      "epoch": 0.53,
      "learning_rate": 1.8893842184911656e-05,
      "loss": 0.5766,
      "step": 290
    },
    {
      "epoch": 0.53,
      "learning_rate": 1.8884780630085795e-05,
      "loss": 0.5602,
      "step": 291
    },
    {
      "epoch": 0.53,
      "learning_rate": 1.8875684302007215e-05,
      "loss": 0.5749,
      "step": 292
    },
    {
      "epoch": 0.54,
      "learning_rate": 1.886655323627712e-05,
      "loss": 0.5826,
      "step": 293
    },
    {
      "epoch": 0.54,
      "learning_rate": 1.8857387468632675e-05,
      "loss": 0.5909,
      "step": 294
    },
    {
      "epoch": 0.54,
      "learning_rate": 1.884818703494686e-05,
      "loss": 0.5728,
      "step": 295
    },
    {
      "epoch": 0.54,
      "learning_rate": 1.8838951971228326e-05,
      "loss": 0.5622,
      "step": 296
    },
    {
      "epoch": 0.54,
      "learning_rate": 1.8829682313621268e-05,
      "loss": 0.5836,
      "step": 297
    },
    {
      "epoch": 0.54,
      "learning_rate": 1.8820378098405272e-05,
      "loss": 0.5974,
      "step": 298
    },
    {
      "epoch": 0.55,
      "learning_rate": 1.8811039361995173e-05,
      "loss": 0.5754,
      "step": 299
    },
    {
      "epoch": 0.55,
      "learning_rate": 1.880166614094091e-05,
      "loss": 0.5675,
      "step": 300
    },
    {
      "epoch": 0.55,
      "learning_rate": 1.87922584719274e-05,
      "loss": 0.5901,
      "step": 301
    },
    {
      "epoch": 0.55,
      "learning_rate": 1.878281639177437e-05,
      "loss": 0.582,
      "step": 302
    },
    {
      "epoch": 0.55,
      "learning_rate": 1.8773339937436237e-05,
      "loss": 0.5656,
      "step": 303
    },
    {
      "epoch": 0.56,
      "learning_rate": 1.876382914600195e-05,
      "loss": 0.5803,
      "step": 304
    },
    {
      "epoch": 0.56,
      "learning_rate": 1.875428405469483e-05,
      "loss": 0.5909,
      "step": 305
    },
    {
      "epoch": 0.56,
      "learning_rate": 1.8744704700872463e-05,
      "loss": 0.5609,
      "step": 306
    },
    {
      "epoch": 0.56,
      "learning_rate": 1.8735091122026522e-05,
      "loss": 0.5477,
      "step": 307
    },
    {
      "epoch": 0.56,
      "learning_rate": 1.8725443355782623e-05,
      "loss": 0.5823,
      "step": 308
    },
    {
      "epoch": 0.57,
      "learning_rate": 1.8715761439900203e-05,
      "loss": 0.5714,
      "step": 309
    },
    {
      "epoch": 0.57,
      "learning_rate": 1.870604541227233e-05,
      "loss": 0.5884,
      "step": 310
    },
    {
      "epoch": 0.57,
      "learning_rate": 1.86962953109256e-05,
      "loss": 0.5589,
      "step": 311
    },
    {
      "epoch": 0.57,
      "learning_rate": 1.8686511174019956e-05,
      "loss": 0.5694,
      "step": 312
    },
    {
      "epoch": 0.57,
      "learning_rate": 1.8676693039848548e-05,
      "loss": 0.5743,
      "step": 313
    },
    {
      "epoch": 0.57,
      "learning_rate": 1.866684094683759e-05,
      "loss": 0.5731,
      "step": 314
    },
    {
      "epoch": 0.58,
      "learning_rate": 1.86569549335462e-05,
      "loss": 0.5609,
      "step": 315
    },
    {
      "epoch": 0.58,
      "learning_rate": 1.864703503866626e-05,
      "loss": 0.5631,
      "step": 316
    },
    {
      "epoch": 0.58,
      "learning_rate": 1.8637081301022248e-05,
      "loss": 0.5614,
      "step": 317
    },
    {
      "epoch": 0.58,
      "learning_rate": 1.86270937595711e-05,
      "loss": 0.593,
      "step": 318
    },
    {
      "epoch": 0.58,
      "learning_rate": 1.8617072453402058e-05,
      "loss": 0.6037,
      "step": 319
    },
    {
      "epoch": 0.59,
      "learning_rate": 1.86070174217365e-05,
      "loss": 0.6023,
      "step": 320
    },
    {
      "epoch": 0.59,
      "learning_rate": 1.8596928703927816e-05,
      "loss": 0.5916,
      "step": 321
    },
    {
      "epoch": 0.59,
      "learning_rate": 1.8586806339461226e-05,
      "loss": 0.5926,
      "step": 322
    },
    {
      "epoch": 0.59,
      "learning_rate": 1.8576650367953634e-05,
      "loss": 0.5685,
      "step": 323
    },
    {
      "epoch": 0.59,
      "learning_rate": 1.8566460829153484e-05,
      "loss": 0.5785,
      "step": 324
    },
    {
      "epoch": 0.59,
      "learning_rate": 1.8556237762940585e-05,
      "loss": 0.5775,
      "step": 325
    },
    {
      "epoch": 0.6,
      "learning_rate": 1.8545981209325975e-05,
      "loss": 0.5783,
      "step": 326
    },
    {
      "epoch": 0.6,
      "learning_rate": 1.8535691208451757e-05,
      "loss": 0.5943,
      "step": 327
    },
    {
      "epoch": 0.6,
      "learning_rate": 1.8525367800590927e-05,
      "loss": 0.597,
      "step": 328
    },
    {
      "epoch": 0.6,
      "learning_rate": 1.8515011026147238e-05,
      "loss": 0.5838,
      "step": 329
    },
    {
      "epoch": 0.6,
      "learning_rate": 1.8504620925655034e-05,
      "loss": 0.5721,
      "step": 330
    },
    {
      "epoch": 0.61,
      "learning_rate": 1.8494197539779083e-05,
      "loss": 0.5873,
      "step": 331
    },
    {
      "epoch": 0.61,
      "learning_rate": 1.848374090931444e-05,
      "loss": 0.5705,
      "step": 332
    },
    {
      "epoch": 0.61,
      "learning_rate": 1.8473251075186257e-05,
      "loss": 0.5626,
      "step": 333
    },
    {
      "epoch": 0.61,
      "learning_rate": 1.8462728078449642e-05,
      "loss": 0.5775,
      "step": 334
    },
    {
      "epoch": 0.61,
      "learning_rate": 1.8452171960289506e-05,
      "loss": 0.5936,
      "step": 335
    },
    {
      "epoch": 0.61,
      "learning_rate": 1.8441582762020374e-05,
      "loss": 0.5872,
      "step": 336
    },
    {
      "epoch": 0.62,
      "learning_rate": 1.8430960525086255e-05,
      "loss": 0.5848,
      "step": 337
    },
    {
      "epoch": 0.62,
      "learning_rate": 1.8420305291060457e-05,
      "loss": 0.5815,
      "step": 338
    },
    {
      "epoch": 0.62,
      "learning_rate": 1.8409617101645425e-05,
      "loss": 0.6121,
      "step": 339
    },
    {
      "epoch": 0.62,
      "learning_rate": 1.83988959986726e-05,
      "loss": 0.5709,
      "step": 340
    },
    {
      "epoch": 0.62,
      "learning_rate": 1.8388142024102234e-05,
      "loss": 0.5965,
      "step": 341
    },
    {
      "epoch": 0.63,
      "learning_rate": 1.8377355220023223e-05,
      "loss": 0.5989,
      "step": 342
    },
    {
      "epoch": 0.63,
      "learning_rate": 1.8366535628652966e-05,
      "loss": 0.6011,
      "step": 343
    },
    {
      "epoch": 0.63,
      "learning_rate": 1.8355683292337174e-05,
      "loss": 0.5804,
      "step": 344
    },
    {
      "epoch": 0.63,
      "learning_rate": 1.834479825354972e-05,
      "loss": 0.5699,
      "step": 345
    },
    {
      "epoch": 0.63,
      "learning_rate": 1.8333880554892466e-05,
      "loss": 0.5538,
      "step": 346
    },
    {
      "epoch": 0.63,
      "learning_rate": 1.83229302390951e-05,
      "loss": 0.5755,
      "step": 347
    },
    {
      "epoch": 0.64,
      "learning_rate": 1.8311947349014968e-05,
      "loss": 0.5812,
      "step": 348
    },
    {
      "epoch": 0.64,
      "learning_rate": 1.8300931927636895e-05,
      "loss": 0.5754,
      "step": 349
    },
    {
      "epoch": 0.64,
      "learning_rate": 1.8289884018073042e-05,
      "loss": 0.5936,
      "step": 350
    },
    {
      "epoch": 0.64,
      "learning_rate": 1.8278803663562718e-05,
      "loss": 0.6044,
      "step": 351
    },
    {
      "epoch": 0.64,
      "learning_rate": 1.82676909074722e-05,
      "loss": 0.5521,
      "step": 352
    },
    {
      "epoch": 0.65,
      "learning_rate": 1.8256545793294606e-05,
      "loss": 0.5978,
      "step": 353
    },
    {
      "epoch": 0.65,
      "learning_rate": 1.8245368364649675e-05,
      "loss": 0.5588,
      "step": 354
    },
    {
      "epoch": 0.65,
      "learning_rate": 1.8234158665283618e-05,
      "loss": 0.583,
      "step": 355
    },
    {
      "epoch": 0.65,
      "learning_rate": 1.8222916739068964e-05,
      "loss": 0.5646,
      "step": 356
    },
    {
      "epoch": 0.65,
      "learning_rate": 1.8211642630004358e-05,
      "loss": 0.5844,
      "step": 357
    },
    {
      "epoch": 0.65,
      "learning_rate": 1.8200336382214406e-05,
      "loss": 0.5684,
      "step": 358
    },
    {
      "epoch": 0.66,
      "learning_rate": 1.8188998039949502e-05,
      "loss": 0.5786,
      "step": 359
    },
    {
      "epoch": 0.66,
      "learning_rate": 1.8177627647585644e-05,
      "loss": 0.5932,
      "step": 360
    },
    {
      "epoch": 0.66,
      "learning_rate": 1.8166225249624266e-05,
      "loss": 0.5567,
      "step": 361
    },
    {
      "epoch": 0.66,
      "learning_rate": 1.8154790890692082e-05,
      "loss": 0.5618,
      "step": 362
    },
    {
      "epoch": 0.66,
      "learning_rate": 1.8143324615540878e-05,
      "loss": 0.5652,
      "step": 363
    },
    {
      "epoch": 0.67,
      "learning_rate": 1.813182646904736e-05,
      "loss": 0.5642,
      "step": 364
    },
    {
      "epoch": 0.67,
      "learning_rate": 1.812029649621297e-05,
      "loss": 0.5469,
      "step": 365
    },
    {
      "epoch": 0.67,
      "learning_rate": 1.8108734742163717e-05,
      "loss": 0.5477,
      "step": 366
    },
    {
      "epoch": 0.67,
      "learning_rate": 1.809714125214999e-05,
      "loss": 0.5935,
      "step": 367
    },
    {
      "epoch": 0.67,
      "learning_rate": 1.8085516071546385e-05,
      "loss": 0.5882,
      "step": 368
    },
    {
      "epoch": 0.67,
      "learning_rate": 1.807385924585154e-05,
      "loss": 0.5835,
      "step": 369
    },
    {
      "epoch": 0.68,
      "learning_rate": 1.8062170820687925e-05,
      "loss": 0.5568,
      "step": 370
    },
    {
      "epoch": 0.68,
      "learning_rate": 1.805045084180171e-05,
      "loss": 0.561,
      "step": 371
    },
    {
      "epoch": 0.68,
      "learning_rate": 1.803869935506253e-05,
      "loss": 0.5967,
      "step": 372
    },
    {
      "epoch": 0.68,
      "learning_rate": 1.8026916406463368e-05,
      "loss": 0.5683,
      "step": 373
    },
    {
      "epoch": 0.68,
      "learning_rate": 1.8015102042120314e-05,
      "loss": 0.567,
      "step": 374
    },
    {
      "epoch": 0.69,
      "learning_rate": 1.8003256308272434e-05,
      "loss": 0.5758,
      "step": 375
    },
    {
      "epoch": 0.69,
      "learning_rate": 1.799137925128155e-05,
      "loss": 0.5801,
      "step": 376
    },
    {
      "epoch": 0.69,
      "learning_rate": 1.797947091763209e-05,
      "loss": 0.5398,
      "step": 377
    },
    {
      "epoch": 0.69,
      "learning_rate": 1.7967531353930893e-05,
      "loss": 0.5696,
      "step": 378
    },
    {
      "epoch": 0.69,
      "learning_rate": 1.795556060690701e-05,
      "loss": 0.5672,
      "step": 379
    },
    {
      "epoch": 0.69,
      "learning_rate": 1.7943558723411555e-05,
      "loss": 0.5797,
      "step": 380
    },
    {
      "epoch": 0.7,
      "learning_rate": 1.7931525750417497e-05,
      "loss": 0.5604,
      "step": 381
    },
    {
      "epoch": 0.7,
      "learning_rate": 1.791946173501948e-05,
      "loss": 0.5656,
      "step": 382
    },
    {
      "epoch": 0.7,
      "learning_rate": 1.790736672443365e-05,
      "loss": 0.579,
      "step": 383
    },
    {
      "epoch": 0.7,
      "learning_rate": 1.7895240765997455e-05,
      "loss": 0.551,
      "step": 384
    },
    {
      "epoch": 0.7,
      "learning_rate": 1.788308390716947e-05,
      "loss": 0.559,
      "step": 385
    },
    {
      "epoch": 0.71,
      "learning_rate": 1.7870896195529205e-05,
      "loss": 0.5617,
      "step": 386
    },
    {
      "epoch": 0.71,
      "learning_rate": 1.7858677678776923e-05,
      "loss": 0.5522,
      "step": 387
    },
    {
      "epoch": 0.71,
      "learning_rate": 1.7846428404733456e-05,
      "loss": 0.5895,
      "step": 388
    },
    {
      "epoch": 0.71,
      "learning_rate": 1.7834148421340006e-05,
      "loss": 0.5826,
      "step": 389
    },
    {
      "epoch": 0.71,
      "learning_rate": 1.7821837776657968e-05,
      "loss": 0.5902,
      "step": 390
    },
    {
      "epoch": 0.71,
      "learning_rate": 1.780949651886875e-05,
      "loss": 0.5567,
      "step": 391
    },
    {
      "epoch": 0.72,
      "learning_rate": 1.7797124696273553e-05,
      "loss": 0.5509,
      "step": 392
    },
    {
      "epoch": 0.72,
      "learning_rate": 1.7784722357293225e-05,
      "loss": 0.5516,
      "step": 393
    },
    {
      "epoch": 0.72,
      "learning_rate": 1.7772289550468033e-05,
      "loss": 0.5642,
      "step": 394
    },
    {
      "epoch": 0.72,
      "learning_rate": 1.775982632445749e-05,
      "loss": 0.5823,
      "step": 395
    },
    {
      "epoch": 0.72,
      "learning_rate": 1.774733272804017e-05,
      "loss": 0.5602,
      "step": 396
    },
    {
      "epoch": 0.73,
      "learning_rate": 1.7734808810113512e-05,
      "loss": 0.5983,
      "step": 397
    },
    {
      "epoch": 0.73,
      "learning_rate": 1.772225461969362e-05,
      "loss": 0.5519,
      "step": 398
    },
    {
      "epoch": 0.73,
      "learning_rate": 1.7709670205915084e-05,
      "loss": 0.5914,
      "step": 399
    },
    {
      "epoch": 0.73,
      "learning_rate": 1.7697055618030777e-05,
      "loss": 0.5604,
      "step": 400
    },
    {
      "epoch": 0.73,
      "learning_rate": 1.7684410905411675e-05,
      "loss": 0.5462,
      "step": 401
    },
    {
      "epoch": 0.74,
      "learning_rate": 1.7671736117546646e-05,
      "loss": 0.534,
      "step": 402
    },
    {
      "epoch": 0.74,
      "learning_rate": 1.765903130404228e-05,
      "loss": 0.5752,
      "step": 403
    },
    {
      "epoch": 0.74,
      "learning_rate": 1.7646296514622666e-05,
      "loss": 0.5551,
      "step": 404
    },
    {
      "epoch": 0.74,
      "learning_rate": 1.7633531799129227e-05,
      "loss": 0.5619,
      "step": 405
    },
    {
      "epoch": 0.74,
      "learning_rate": 1.76207372075205e-05,
      "loss": 0.5663,
      "step": 406
    },
    {
      "epoch": 0.74,
      "learning_rate": 1.7607912789871956e-05,
      "loss": 0.562,
      "step": 407
    },
    {
      "epoch": 0.75,
      "learning_rate": 1.75950585963758e-05,
      "loss": 0.5543,
      "step": 408
    },
    {
      "epoch": 0.75,
      "learning_rate": 1.7582174677340767e-05,
      "loss": 0.5614,
      "step": 409
    },
    {
      "epoch": 0.75,
      "learning_rate": 1.7569261083191942e-05,
      "loss": 0.5856,
      "step": 410
    },
    {
      "epoch": 0.75,
      "learning_rate": 1.7556317864470543e-05,
      "loss": 0.6035,
      "step": 411
    },
    {
      "epoch": 0.75,
      "learning_rate": 1.7543345071833745e-05,
      "loss": 0.5694,
      "step": 412
    },
    {
      "epoch": 0.76,
      "learning_rate": 1.7530342756054452e-05,
      "loss": 0.5486,
      "step": 413
    },
    {
      "epoch": 0.76,
      "learning_rate": 1.751731096802113e-05,
      "loss": 0.567,
      "step": 414
    },
    {
      "epoch": 0.76,
      "learning_rate": 1.7504249758737587e-05,
      "loss": 0.5631,
      "step": 415
    },
    {
      "epoch": 0.76,
      "learning_rate": 1.7491159179322785e-05,
      "loss": 0.5486,
      "step": 416
    },
    {
      "epoch": 0.76,
      "learning_rate": 1.747803928101062e-05,
      "loss": 0.5754,
      "step": 417
    },
    {
      "epoch": 0.76,
      "learning_rate": 1.746489011514976e-05,
      "loss": 0.5776,
      "step": 418
    },
    {
      "epoch": 0.77,
      "learning_rate": 1.7451711733203406e-05,
      "loss": 0.5888,
      "step": 419
    },
    {
      "epoch": 0.77,
      "learning_rate": 1.7438504186749105e-05,
      "loss": 0.5417,
      "step": 420
    },
    {
      "epoch": 0.77,
      "learning_rate": 1.7425267527478543e-05,
      "loss": 0.5689,
      "step": 421
    },
    {
      "epoch": 0.77,
      "learning_rate": 1.7412001807197362e-05,
      "loss": 0.5896,
      "step": 422
    },
    {
      "epoch": 0.77,
      "learning_rate": 1.7398707077824936e-05,
      "loss": 0.5549,
      "step": 423
    },
    {
      "epoch": 0.78,
      "learning_rate": 1.7385383391394174e-05,
      "loss": 0.5503,
      "step": 424
    },
    {
      "epoch": 0.78,
      "learning_rate": 1.737203080005131e-05,
      "loss": 0.5957,
      "step": 425
    },
    {
      "epoch": 0.78,
      "learning_rate": 1.735864935605572e-05,
      "loss": 0.5354,
      "step": 426
    },
    {
      "epoch": 0.78,
      "learning_rate": 1.7345239111779697e-05,
      "loss": 0.558,
      "step": 427
    },
    {
      "epoch": 0.78,
      "learning_rate": 1.733180011970825e-05,
      "loss": 0.5548,
      "step": 428
    },
    {
      "epoch": 0.78,
      "learning_rate": 1.7318332432438906e-05,
      "loss": 0.5616,
      "step": 429
    },
    {
      "epoch": 0.79,
      "learning_rate": 1.7304836102681494e-05,
      "loss": 0.5659,
      "step": 430
    },
    {
      "epoch": 0.79,
      "learning_rate": 1.729131118325795e-05,
      "loss": 0.594,
      "step": 431
    },
    {
      "epoch": 0.79,
      "learning_rate": 1.72777577271021e-05,
      "loss": 0.5509,
      "step": 432
    },
    {
      "epoch": 0.79,
      "learning_rate": 1.726417578725946e-05,
      "loss": 0.5784,
      "step": 433
    },
    {
      "epoch": 0.79,
      "learning_rate": 1.7250565416887016e-05,
      "loss": 0.575,
      "step": 434
    },
    {
      "epoch": 0.8,
      "learning_rate": 1.7236926669253043e-05,
      "loss": 0.5795,
      "step": 435
    },
    {
      "epoch": 0.8,
      "learning_rate": 1.7223259597736863e-05,
      "loss": 0.5651,
      "step": 436
    },
    {
      "epoch": 0.8,
      "learning_rate": 1.7209564255828653e-05,
      "loss": 0.549,
      "step": 437
    },
    {
      "epoch": 0.8,
      "learning_rate": 1.7195840697129252e-05,
      "loss": 0.5601,
      "step": 438
    },
    {
      "epoch": 0.8,
      "learning_rate": 1.718208897534991e-05,
      "loss": 0.5688,
      "step": 439
    },
    {
      "epoch": 0.8,
      "learning_rate": 1.716830914431212e-05,
      "loss": 0.5515,
      "step": 440
    },
    {
      "epoch": 0.81,
      "learning_rate": 1.715450125794738e-05,
      "loss": 0.5623,
      "step": 441
    },
    {
      "epoch": 0.81,
      "learning_rate": 1.7140665370296995e-05,
      "loss": 0.5534,
      "step": 442
    },
    {
      "epoch": 0.81,
      "learning_rate": 1.7126801535511854e-05,
      "loss": 0.5982,
      "step": 443
    },
    {
      "epoch": 0.81,
      "learning_rate": 1.7112909807852237e-05,
      "loss": 0.5784,
      "step": 444
    },
    {
      "epoch": 0.81,
      "learning_rate": 1.709899024168758e-05,
      "loss": 0.5545,
      "step": 445
    },
    {
      "epoch": 0.82,
      "learning_rate": 1.7085042891496283e-05,
      "loss": 0.5658,
      "step": 446
    },
    {
      "epoch": 0.82,
      "learning_rate": 1.7071067811865477e-05,
      "loss": 0.5827,
      "step": 447
    },
    {
      "epoch": 0.82,
      "learning_rate": 1.705706505749083e-05,
      "loss": 0.5529,
      "step": 448
    },
    {
      "epoch": 0.82,
      "learning_rate": 1.704303468317632e-05,
      "loss": 0.5623,
      "step": 449
    },
    {
      "epoch": 0.82,
      "learning_rate": 1.702897674383402e-05,
      "loss": 0.5638,
      "step": 450
    },
    {
      "epoch": 0.82,
      "learning_rate": 1.7014891294483893e-05,
      "loss": 0.5845,
      "step": 451
    },
    {
      "epoch": 0.83,
      "learning_rate": 1.7000778390253566e-05,
      "loss": 0.5605,
      "step": 452
    },
    {
      "epoch": 0.83,
      "learning_rate": 1.6986638086378124e-05,
      "loss": 0.5594,
      "step": 453
    },
    {
      "epoch": 0.83,
      "learning_rate": 1.6972470438199883e-05,
      "loss": 0.5823,
      "step": 454
    },
    {
      "epoch": 0.83,
      "learning_rate": 1.6958275501168183e-05,
      "loss": 0.5924,
      "step": 455
    },
    {
      "epoch": 0.83,
      "learning_rate": 1.6944053330839164e-05,
      "loss": 0.5598,
      "step": 456
    },
    {
      "epoch": 0.84,
      "learning_rate": 1.692980398287555e-05,
      "loss": 0.5459,
      "step": 457
    },
    {
      "epoch": 0.84,
      "learning_rate": 1.6915527513046445e-05,
      "loss": 0.5401,
      "step": 458
    },
    {
      "epoch": 0.84,
      "learning_rate": 1.690122397722709e-05,
      "loss": 0.5689,
      "step": 459
    },
    {
      "epoch": 0.84,
      "learning_rate": 1.6886893431398664e-05,
      "loss": 0.586,
      "step": 460
    },
    {
      "epoch": 0.84,
      "learning_rate": 1.6872535931648046e-05,
      "loss": 0.5453,
      "step": 461
    },
    {
      "epoch": 0.84,
      "learning_rate": 1.685815153416762e-05,
      "loss": 0.549,
      "step": 462
    },
    {
      "epoch": 0.85,
      "learning_rate": 1.6843740295255044e-05,
      "loss": 0.5612,
      "step": 463
    },
    {
      "epoch": 0.85,
      "learning_rate": 1.6829302271313012e-05,
      "loss": 0.5649,
      "step": 464
    },
    {
      "epoch": 0.85,
      "learning_rate": 1.6814837518849064e-05,
      "loss": 0.5521,
      "step": 465
    },
    {
      "epoch": 0.85,
      "learning_rate": 1.680034609447535e-05,
      "loss": 0.5812,
      "step": 466
    },
    {
      "epoch": 0.85,
      "learning_rate": 1.6785828054908393e-05,
      "loss": 0.5477,
      "step": 467
    },
    {
      "epoch": 0.86,
      "learning_rate": 1.6771283456968905e-05,
      "loss": 0.5471,
      "step": 468
    },
    {
      "epoch": 0.86,
      "learning_rate": 1.6756712357581526e-05,
      "loss": 0.5506,
      "step": 469
    },
    {
      "epoch": 0.86,
      "learning_rate": 1.674211481377462e-05,
      "loss": 0.5484,
      "step": 470
    },
    {
      "epoch": 0.86,
      "learning_rate": 1.6727490882680052e-05,
      "loss": 0.5591,
      "step": 471
    },
    {
      "epoch": 0.86,
      "learning_rate": 1.6712840621532963e-05,
      "loss": 0.5707,
      "step": 472
    },
    {
      "epoch": 0.86,
      "learning_rate": 1.669816408767154e-05,
      "loss": 0.5579,
      "step": 473
    },
    {
      "epoch": 0.87,
      "learning_rate": 1.66834613385368e-05,
      "loss": 0.5469,
      "step": 474
    },
    {
      "epoch": 0.87,
      "learning_rate": 1.6668732431672357e-05,
      "loss": 0.5561,
      "step": 475
    },
    {
      "epoch": 0.87,
      "learning_rate": 1.6653977424724208e-05,
      "loss": 0.5755,
      "step": 476
    },
    {
      "epoch": 0.87,
      "learning_rate": 1.663919637544049e-05,
      "loss": 0.5522,
      "step": 477
    },
    {
      "epoch": 0.87,
      "learning_rate": 1.662438934167128e-05,
      "loss": 0.5507,
      "step": 478
    },
    {
      "epoch": 0.88,
      "learning_rate": 1.660955638136834e-05,
      "loss": 0.5565,
      "step": 479
    },
    {
      "epoch": 0.88,
      "learning_rate": 1.65946975525849e-05,
      "loss": 0.5865,
      "step": 480
    },
    {
      "epoch": 0.88,
      "learning_rate": 1.657981291347545e-05,
      "loss": 0.5546,
      "step": 481
    },
    {
      "epoch": 0.88,
      "learning_rate": 1.6564902522295484e-05,
      "loss": 0.5583,
      "step": 482
    },
    {
      "epoch": 0.88,
      "learning_rate": 1.654996643740129e-05,
      "loss": 0.5629,
      "step": 483
    },
    {
      "epoch": 0.89,
      "learning_rate": 1.6535004717249713e-05,
      "loss": 0.5689,
      "step": 484
    },
    {
      "epoch": 0.89,
      "learning_rate": 1.6520017420397933e-05,
      "loss": 0.5913,
      "step": 485
    },
    {
      "epoch": 0.89,
      "learning_rate": 1.6505004605503227e-05,
      "loss": 0.556,
      "step": 486
    },
    {
      "epoch": 0.89,
      "learning_rate": 1.648996633132274e-05,
      "loss": 0.5533,
      "step": 487
    },
    {
      "epoch": 0.89,
      "learning_rate": 1.647490265671328e-05,
      "loss": 0.5738,
      "step": 488
    },
    {
      "epoch": 0.89,
      "learning_rate": 1.645981364063105e-05,
      "loss": 0.5692,
      "step": 489
    },
    {
      "epoch": 0.9,
      "learning_rate": 1.644469934213143e-05,
      "loss": 0.572,
      "step": 490
    },
    {
      "epoch": 0.9,
      "learning_rate": 1.642955982036877e-05,
      "loss": 0.5626,
      "step": 491
    },
    {
      "epoch": 0.9,
      "learning_rate": 1.641439513459612e-05,
      "loss": 0.5657,
      "step": 492
    },
    {
      "epoch": 0.9,
      "learning_rate": 1.6399205344165022e-05,
      "loss": 0.566,
      "step": 493
    },
    {
      "epoch": 0.9,
      "learning_rate": 1.6383990508525283e-05,
      "loss": 0.5658,
      "step": 494
    },
    {
      "epoch": 0.91,
      "learning_rate": 1.636875068722472e-05,
      "loss": 0.5354,
      "step": 495
    },
    {
      "epoch": 0.91,
      "learning_rate": 1.635348593990894e-05,
      "loss": 0.5572,
      "step": 496
    },
    {
      "epoch": 0.91,
      "learning_rate": 1.633819632632111e-05,
      "loss": 0.5348,
      "step": 497
    },
    {
      "epoch": 0.91,
      "learning_rate": 1.6322881906301724e-05,
      "loss": 0.5562,
      "step": 498
    },
    {
      "epoch": 0.91,
      "learning_rate": 1.630754273978834e-05,
      "loss": 0.5529,
      "step": 499
    },
    {
      "epoch": 0.91,
      "learning_rate": 1.62921788868154e-05,
      "loss": 0.5645,
      "step": 500
    },
    {
      "epoch": 0.92,
      "learning_rate": 1.6276790407513943e-05,
      "loss": 0.5347,
      "step": 501
    },
    {
      "epoch": 0.92,
      "learning_rate": 1.6261377362111396e-05,
      "loss": 0.5566,
      "step": 502
    },
    {
      "epoch": 0.92,
      "learning_rate": 1.6245939810931336e-05,
      "loss": 0.5584,
      "step": 503
    },
    {
      "epoch": 0.92,
      "learning_rate": 1.623047781439324e-05,
      "loss": 0.5484,
      "step": 504
    },
    {
      "epoch": 0.92,
      "learning_rate": 1.6214991433012274e-05,
      "loss": 0.584,
      "step": 505
    },
    {
      "epoch": 0.93,
      "learning_rate": 1.6199480727399035e-05,
      "loss": 0.5531,
      "step": 506
    },
    {
      "epoch": 0.93,
      "learning_rate": 1.6183945758259316e-05,
      "loss": 0.5728,
      "step": 507
    },
    {
      "epoch": 0.93,
      "learning_rate": 1.616838658639388e-05,
      "loss": 0.5342,
      "step": 508
    },
    {
      "epoch": 0.93,
      "learning_rate": 1.615280327269821e-05,
      "loss": 0.566,
      "step": 509
    },
    {
      "epoch": 0.93,
      "learning_rate": 1.6137195878162267e-05,
      "loss": 0.5672,
      "step": 510
    },
    {
      "epoch": 0.93,
      "learning_rate": 1.6121564463870285e-05,
      "loss": 0.6008,
      "step": 511
    },
    {
      "epoch": 0.94,
      "learning_rate": 1.610590909100048e-05,
      "loss": 0.5723,
      "step": 512
    },
    {
      "epoch": 0.94,
      "learning_rate": 1.6090229820824846e-05,
      "loss": 0.5521,
      "step": 513
    },
    {
      "epoch": 0.94,
      "learning_rate": 1.6074526714708913e-05,
      "loss": 0.5584,
      "step": 514
    },
    {
      "epoch": 0.94,
      "learning_rate": 1.605879983411149e-05,
      "loss": 0.5813,
      "step": 515
    },
    {
      "epoch": 0.94,
      "learning_rate": 1.6043049240584445e-05,
      "loss": 0.5368,
      "step": 516
    },
    {
      "epoch": 0.95,
      "learning_rate": 1.6027274995772445e-05,
      "loss": 0.5588,
      "step": 517
    },
    {
      "epoch": 0.95,
      "learning_rate": 1.6011477161412724e-05,
      "loss": 0.5642,
      "step": 518
    },
    {
      "epoch": 0.95,
      "learning_rate": 1.5995655799334845e-05,
      "loss": 0.5855,
      "step": 519
    },
    {
      "epoch": 0.95,
      "learning_rate": 1.597981097146045e-05,
      "loss": 0.5612,
      "step": 520
    },
    {
      "epoch": 0.95,
      "learning_rate": 1.5963942739803028e-05,
      "loss": 0.5509,
      "step": 521
    },
    {
      "epoch": 0.95,
      "learning_rate": 1.594805116646766e-05,
      "loss": 0.568,
      "step": 522
    },
    {
      "epoch": 0.96,
      "learning_rate": 1.5932136313650776e-05,
      "loss": 0.567,
      "step": 523
    },
    {
      "epoch": 0.96,
      "learning_rate": 1.5916198243639933e-05,
      "loss": 0.5717,
      "step": 524
    },
    {
      "epoch": 0.96,
      "learning_rate": 1.590023701881354e-05,
      "loss": 0.5676,
      "step": 525
    },
    {
      "epoch": 0.96,
      "learning_rate": 1.5884252701640638e-05,
      "loss": 0.5679,
      "step": 526
    },
    {
      "epoch": 0.96,
      "learning_rate": 1.5868245354680642e-05,
      "loss": 0.5579,
      "step": 527
    },
    {
      "epoch": 0.97,
      "learning_rate": 1.585221504058311e-05,
      "loss": 0.5628,
      "step": 528
    },
    {
      "epoch": 0.97,
      "learning_rate": 1.5836161822087474e-05,
      "loss": 0.5745,
      "step": 529
    },
    {
      "epoch": 0.97,
      "learning_rate": 1.5820085762022827e-05,
      "loss": 0.5609,
      "step": 530
    },
    {
      "epoch": 0.97,
      "learning_rate": 1.580398692330764e-05,
      "loss": 0.5385,
      "step": 531
    },
    {
      "epoch": 0.97,
      "learning_rate": 1.578786536894955e-05,
      "loss": 0.5622,
      "step": 532
    },
    {
      "epoch": 0.97,
      "learning_rate": 1.577172116204509e-05,
      "loss": 0.5523,
      "step": 533
    },
    {
      "epoch": 0.98,
      "learning_rate": 1.5755554365779458e-05,
      "loss": 0.5268,
      "step": 534
    },
    {
      "epoch": 0.98,
      "learning_rate": 1.5739365043426256e-05,
      "loss": 0.5557,
      "step": 535
    },
    {
      "epoch": 0.98,
      "learning_rate": 1.572315325834725e-05,
      "loss": 0.557,
      "step": 536
    },
    {
      "epoch": 0.98,
      "learning_rate": 1.5706919073992125e-05,
      "loss": 0.5497,
      "step": 537
    },
    {
      "epoch": 0.98,
      "learning_rate": 1.5690662553898224e-05,
      "loss": 0.5465,
      "step": 538
    },
    {
      "epoch": 0.99,
      "learning_rate": 1.5674383761690316e-05,
      "loss": 0.5601,
      "step": 539
    },
    {
      "epoch": 0.99,
      "learning_rate": 1.5658082761080332e-05,
      "loss": 0.5325,
      "step": 540
    },
    {
      "epoch": 0.99,
      "learning_rate": 1.5641759615867127e-05,
      "loss": 0.5747,
      "step": 541
    },
    {
      "epoch": 0.99,
      "learning_rate": 1.562541438993622e-05,
      "loss": 0.5634,
      "step": 542
    },
    {
      "epoch": 0.99,
      "learning_rate": 1.560904714725956e-05,
      "loss": 0.5313,
      "step": 543
    },
    {
      "epoch": 0.99,
      "learning_rate": 1.559265795189525e-05,
      "loss": 0.5649,
      "step": 544
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.5576246867987324e-05,
      "loss": 0.5602,
      "step": 545
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.5559813959765482e-05,
      "loss": 0.5628,
      "step": 546
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.554335929154483e-05,
      "loss": 0.5351,
      "step": 547
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.5526882927725652e-05,
      "loss": 0.4116,
      "step": 548
    },
    {
      "epoch": 1.0,
      "learning_rate": 1.5510384932793132e-05,
      "loss": 0.3934,
      "step": 549
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5493865371317125e-05,
      "loss": 0.3794,
      "step": 550
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5477324307951883e-05,
      "loss": 0.4035,
      "step": 551
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5460761807435817e-05,
      "loss": 0.3856,
      "step": 552
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.5444177934591242e-05,
      "loss": 0.3793,
      "step": 553
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.542757275432411e-05,
      "loss": 0.3793,
      "step": 554
    },
    {
      "epoch": 1.01,
      "learning_rate": 1.541094633162378e-05,
      "loss": 0.4176,
      "step": 555
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5394298731562736e-05,
      "loss": 0.3934,
      "step": 556
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5377630019296358e-05,
      "loss": 0.3836,
      "step": 557
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.536094026006264e-05,
      "loss": 0.3855,
      "step": 558
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5344229519181964e-05,
      "loss": 0.3732,
      "step": 559
    },
    {
      "epoch": 1.02,
      "learning_rate": 1.5327497862056825e-05,
      "loss": 0.384,
      "step": 560
    },
    {
      "epoch": 1.03,
      "learning_rate": 1.5310745354171576e-05,
      "loss": 0.3938,
      "step": 561
    },
    {
      "epoch": 1.03,
      "learning_rate": 1.5293972061092187e-05,
      "loss": 0.3776,
      "step": 562
    },
    {
      "epoch": 1.03,
      "learning_rate": 1.5277178048465958e-05,
      "loss": 0.3957,
      "step": 563
    },
    {
      "epoch": 1.03,
      "learning_rate": 1.52603633820213e-05,
      "loss": 0.36,
      "step": 564
    },
    {
      "epoch": 1.03,
      "learning_rate": 1.524352812756745e-05,
      "loss": 0.3807,
      "step": 565
    },
    {
      "epoch": 1.03,
      "learning_rate": 1.5226672350994222e-05,
      "loss": 0.3937,
      "step": 566
    },
    {
      "epoch": 1.04,
      "learning_rate": 1.5209796118271753e-05,
      "loss": 0.3751,
      "step": 567
    },
    {
      "epoch": 1.04,
      "learning_rate": 1.5192899495450237e-05,
      "loss": 0.3935,
      "step": 568
    },
    {
      "epoch": 1.04,
      "learning_rate": 1.5175982548659675e-05,
      "loss": 0.3704,
      "step": 569
    },
    {
      "epoch": 1.04,
      "learning_rate": 1.5159045344109613e-05,
      "loss": 0.3622,
      "step": 570
    },
    {
      "epoch": 1.04,
      "learning_rate": 1.5142087948088873e-05,
      "loss": 0.3732,
      "step": 571
    },
    {
      "epoch": 1.05,
      "learning_rate": 1.512511042696531e-05,
      "loss": 0.3859,
      "step": 572
    },
    {
      "epoch": 1.05,
      "learning_rate": 1.5108112847185545e-05,
      "loss": 0.3515,
      "step": 573
    },
    {
      "epoch": 1.05,
      "learning_rate": 1.5091095275274701e-05,
      "loss": 0.3677,
      "step": 574
    },
    {
      "epoch": 1.05,
      "learning_rate": 1.5074057777836148e-05,
      "loss": 0.3912,
      "step": 575
    },
    {
      "epoch": 1.05,
      "learning_rate": 1.505700042155124e-05,
      "loss": 0.3717,
      "step": 576
    },
    {
      "epoch": 1.06,
      "learning_rate": 1.5039923273179055e-05,
      "loss": 0.3761,
      "step": 577
    },
    {
      "epoch": 1.06,
      "learning_rate": 1.5022826399556135e-05,
      "loss": 0.3749,
      "step": 578
    },
    {
      "epoch": 1.06,
      "learning_rate": 1.5005709867596216e-05,
      "loss": 0.3833,
      "step": 579
    },
    {
      "epoch": 1.06,
      "learning_rate": 1.498857374428998e-05,
      "loss": 0.3908,
      "step": 580
    },
    {
      "epoch": 1.06,
      "learning_rate": 1.4971418096704784e-05,
      "loss": 0.3764,
      "step": 581
    },
    {
      "epoch": 1.06,
      "learning_rate": 1.4954242991984398e-05,
      "loss": 0.3663,
      "step": 582
    },
    {
      "epoch": 1.07,
      "learning_rate": 1.4937048497348743e-05,
      "loss": 0.3787,
      "step": 583
    },
    {
      "epoch": 1.07,
      "learning_rate": 1.4919834680093628e-05,
      "loss": 0.3826,
      "step": 584
    },
    {
      "epoch": 1.07,
      "learning_rate": 1.4902601607590489e-05,
      "loss": 0.3784,
      "step": 585
    },
    {
      "epoch": 1.07,
      "learning_rate": 1.4885349347286118e-05,
      "loss": 0.386,
      "step": 586
    },
    {
      "epoch": 1.07,
      "learning_rate": 1.4868077966702414e-05,
      "loss": 0.3729,
      "step": 587
    },
    {
      "epoch": 1.08,
      "learning_rate": 1.4850787533436101e-05,
      "loss": 0.377,
      "step": 588
    },
    {
      "epoch": 1.08,
      "learning_rate": 1.4833478115158472e-05,
      "loss": 0.4029,
      "step": 589
    },
    {
      "epoch": 1.08,
      "learning_rate": 1.4816149779615128e-05,
      "loss": 0.376,
      "step": 590
    },
    {
      "epoch": 1.08,
      "learning_rate": 1.47988025946257e-05,
      "loss": 0.3731,
      "step": 591
    },
    {
      "epoch": 1.08,
      "learning_rate": 1.4781436628083601e-05,
      "loss": 0.3813,
      "step": 592
    },
    {
      "epoch": 1.08,
      "learning_rate": 1.4764051947955746e-05,
      "loss": 0.3864,
      "step": 593
    },
    {
      "epoch": 1.09,
      "learning_rate": 1.4746648622282294e-05,
      "loss": 0.3943,
      "step": 594
    },
    {
      "epoch": 1.09,
      "learning_rate": 1.4729226719176377e-05,
      "loss": 0.3903,
      "step": 595
    },
    {
      "epoch": 1.09,
      "learning_rate": 1.4711786306823838e-05,
      "loss": 0.3799,
      "step": 596
    },
    {
      "epoch": 1.09,
      "learning_rate": 1.4694327453482956e-05,
      "loss": 0.385,
      "step": 597
    },
    {
      "epoch": 1.09,
      "learning_rate": 1.4676850227484191e-05,
      "loss": 0.3902,
      "step": 598
    },
    {
      "epoch": 1.1,
      "learning_rate": 1.4659354697229903e-05,
      "loss": 0.3802,
      "step": 599
    },
    {
      "epoch": 1.1,
      "learning_rate": 1.46418409311941e-05,
      "loss": 0.3914,
      "step": 600
    },
    {
      "epoch": 1.1,
      "learning_rate": 1.4624308997922151e-05,
      "loss": 0.3899,
      "step": 601
    },
    {
      "epoch": 1.1,
      "learning_rate": 1.4606758966030536e-05,
      "loss": 0.4017,
      "step": 602
    },
    {
      "epoch": 1.1,
      "learning_rate": 1.4589190904206565e-05,
      "loss": 0.38,
      "step": 603
    },
    {
      "epoch": 1.1,
      "learning_rate": 1.4571604881208118e-05,
      "loss": 0.3905,
      "step": 604
    },
    {
      "epoch": 1.11,
      "learning_rate": 1.4554000965863367e-05,
      "loss": 0.3738,
      "step": 605
    },
    {
      "epoch": 1.11,
      "learning_rate": 1.453637922707051e-05,
      "loss": 0.3874,
      "step": 606
    },
    {
      "epoch": 1.11,
      "learning_rate": 1.4518739733797507e-05,
      "loss": 0.3871,
      "step": 607
    },
    {
      "epoch": 1.11,
      "learning_rate": 1.4501082555081802e-05,
      "loss": 0.3947,
      "step": 608
    },
    {
      "epoch": 1.11,
      "learning_rate": 1.448340776003006e-05,
      "loss": 0.3709,
      "step": 609
    },
    {
      "epoch": 1.12,
      "learning_rate": 1.4465715417817889e-05,
      "loss": 0.3882,
      "step": 610
    },
    {
      "epoch": 1.12,
      "learning_rate": 1.4448005597689573e-05,
      "loss": 0.3874,
      "step": 611
    },
    {
      "epoch": 1.12,
      "learning_rate": 1.4430278368957809e-05,
      "loss": 0.3688,
      "step": 612
    },
    {
      "epoch": 1.12,
      "learning_rate": 1.4412533801003412e-05,
      "loss": 0.3661,
      "step": 613
    },
    {
      "epoch": 1.12,
      "learning_rate": 1.4394771963275079e-05,
      "loss": 0.3673,
      "step": 614
    },
    {
      "epoch": 1.12,
      "learning_rate": 1.4376992925289077e-05,
      "loss": 0.3727,
      "step": 615
    },
    {
      "epoch": 1.13,
      "learning_rate": 1.4359196756629006e-05,
      "loss": 0.4,
      "step": 616
    },
    {
      "epoch": 1.13,
      "learning_rate": 1.4341383526945507e-05,
      "loss": 0.3828,
      "step": 617
    },
    {
      "epoch": 1.13,
      "learning_rate": 1.4323553305956e-05,
      "loss": 0.3674,
      "step": 618
    },
    {
      "epoch": 1.13,
      "learning_rate": 1.4305706163444391e-05,
      "loss": 0.3801,
      "step": 619
    },
    {
      "epoch": 1.13,
      "learning_rate": 1.4287842169260827e-05,
      "loss": 0.3809,
      "step": 620
    },
    {
      "epoch": 1.14,
      "learning_rate": 1.426996139332141e-05,
      "loss": 0.3966,
      "step": 621
    },
    {
      "epoch": 1.14,
      "learning_rate": 1.425206390560791e-05,
      "loss": 0.3815,
      "step": 622
    },
    {
      "epoch": 1.14,
      "learning_rate": 1.4234149776167518e-05,
      "loss": 0.3607,
      "step": 623
    },
    {
      "epoch": 1.14,
      "learning_rate": 1.4216219075112542e-05,
      "loss": 0.3858,
      "step": 624
    },
    {
      "epoch": 1.14,
      "learning_rate": 1.4198271872620162e-05,
      "loss": 0.3859,
      "step": 625
    },
    {
      "epoch": 1.14,
      "learning_rate": 1.4180308238932137e-05,
      "loss": 0.3776,
      "step": 626
    },
    {
      "epoch": 1.15,
      "learning_rate": 1.4162328244354528e-05,
      "loss": 0.3681,
      "step": 627
    },
    {
      "epoch": 1.15,
      "learning_rate": 1.4144331959257438e-05,
      "loss": 0.381,
      "step": 628
    },
    {
      "epoch": 1.15,
      "learning_rate": 1.4126319454074725e-05,
      "loss": 0.3737,
      "step": 629
    },
    {
      "epoch": 1.15,
      "learning_rate": 1.4108290799303721e-05,
      "loss": 0.3595,
      "step": 630
    },
    {
      "epoch": 1.15,
      "learning_rate": 1.4090246065504981e-05,
      "loss": 0.3937,
      "step": 631
    },
    {
      "epoch": 1.16,
      "learning_rate": 1.4072185323301977e-05,
      "loss": 0.3699,
      "step": 632
    },
    {
      "epoch": 1.16,
      "learning_rate": 1.4054108643380838e-05,
      "loss": 0.3801,
      "step": 633
    },
    {
      "epoch": 1.16,
      "learning_rate": 1.4036016096490066e-05,
      "loss": 0.3492,
      "step": 634
    },
    {
      "epoch": 1.16,
      "learning_rate": 1.4017907753440278e-05,
      "loss": 0.3773,
      "step": 635
    },
    {
      "epoch": 1.16,
      "learning_rate": 1.3999783685103893e-05,
      "loss": 0.3756,
      "step": 636
    },
    {
      "epoch": 1.16,
      "learning_rate": 1.398164396241489e-05,
      "loss": 0.384,
      "step": 637
    },
    {
      "epoch": 1.17,
      "learning_rate": 1.3963488656368518e-05,
      "loss": 0.4027,
      "step": 638
    },
    {
      "epoch": 1.17,
      "learning_rate": 1.3945317838021001e-05,
      "loss": 0.3945,
      "step": 639
    },
    {
      "epoch": 1.17,
      "learning_rate": 1.3927131578489292e-05,
      "loss": 0.3563,
      "step": 640
    },
    {
      "epoch": 1.17,
      "learning_rate": 1.3908929948950763e-05,
      "loss": 0.384,
      "step": 641
    },
    {
      "epoch": 1.17,
      "learning_rate": 1.3890713020642951e-05,
      "loss": 0.385,
      "step": 642
    },
    {
      "epoch": 1.18,
      "learning_rate": 1.3872480864863272e-05,
      "loss": 0.402,
      "step": 643
    },
    {
      "epoch": 1.18,
      "learning_rate": 1.3854233552968726e-05,
      "loss": 0.3818,
      "step": 644
    },
    {
      "epoch": 1.18,
      "learning_rate": 1.383597115637564e-05,
      "loss": 0.3692,
      "step": 645
    },
    {
      "epoch": 1.18,
      "learning_rate": 1.3817693746559382e-05,
      "loss": 0.3782,
      "step": 646
    },
    {
      "epoch": 1.18,
      "learning_rate": 1.3799401395054073e-05,
      "loss": 0.3891,
      "step": 647
    },
    {
      "epoch": 1.18,
      "learning_rate": 1.3781094173452316e-05,
      "loss": 0.3803,
      "step": 648
    },
    {
      "epoch": 1.19,
      "learning_rate": 1.3762772153404909e-05,
      "loss": 0.3734,
      "step": 649
    },
    {
      "epoch": 1.19,
      "learning_rate": 1.3744435406620571e-05,
      "loss": 0.3802,
      "step": 650
    },
    {
      "epoch": 1.19,
      "learning_rate": 1.3726084004865659e-05,
      "loss": 0.383,
      "step": 651
    },
    {
      "epoch": 1.19,
      "learning_rate": 1.3707718019963887e-05,
      "loss": 0.3854,
      "step": 652
    },
    {
      "epoch": 1.19,
      "learning_rate": 1.3689337523796043e-05,
      "loss": 0.3889,
      "step": 653
    },
    {
      "epoch": 1.2,
      "learning_rate": 1.3670942588299708e-05,
      "loss": 0.3806,
      "step": 654
    },
    {
      "epoch": 1.2,
      "learning_rate": 1.3652533285468972e-05,
      "loss": 0.3754,
      "step": 655
    },
    {
      "epoch": 1.2,
      "learning_rate": 1.3634109687354169e-05,
      "loss": 0.3696,
      "step": 656
    },
    {
      "epoch": 1.2,
      "learning_rate": 1.3615671866061573e-05,
      "loss": 0.3823,
      "step": 657
    },
    {
      "epoch": 1.2,
      "learning_rate": 1.3597219893753119e-05,
      "loss": 0.3897,
      "step": 658
    },
    {
      "epoch": 1.21,
      "learning_rate": 1.3578753842646137e-05,
      "loss": 0.3893,
      "step": 659
    },
    {
      "epoch": 1.21,
      "learning_rate": 1.3560273785013057e-05,
      "loss": 0.3894,
      "step": 660
    },
    {
      "epoch": 1.21,
      "learning_rate": 1.3541779793181122e-05,
      "loss": 0.3959,
      "step": 661
    },
    {
      "epoch": 1.21,
      "learning_rate": 1.3523271939532112e-05,
      "loss": 0.3988,
      "step": 662
    },
    {
      "epoch": 1.21,
      "learning_rate": 1.3504750296502064e-05,
      "loss": 0.3687,
      "step": 663
    },
    {
      "epoch": 1.21,
      "learning_rate": 1.3486214936580977e-05,
      "loss": 0.3587,
      "step": 664
    },
    {
      "epoch": 1.22,
      "learning_rate": 1.3467665932312545e-05,
      "loss": 0.3789,
      "step": 665
    },
    {
      "epoch": 1.22,
      "learning_rate": 1.3449103356293853e-05,
      "loss": 0.3779,
      "step": 666
    },
    {
      "epoch": 1.22,
      "learning_rate": 1.343052728117511e-05,
      "loss": 0.3618,
      "step": 667
    },
    {
      "epoch": 1.22,
      "learning_rate": 1.341193777965935e-05,
      "loss": 0.3809,
      "step": 668
    },
    {
      "epoch": 1.22,
      "learning_rate": 1.3393334924502168e-05,
      "loss": 0.3936,
      "step": 669
    },
    {
      "epoch": 1.23,
      "learning_rate": 1.3374718788511412e-05,
      "loss": 0.403,
      "step": 670
    },
    {
      "epoch": 1.23,
      "learning_rate": 1.3356089444546906e-05,
      "loss": 0.3877,
      "step": 671
    },
    {
      "epoch": 1.23,
      "learning_rate": 1.3337446965520183e-05,
      "loss": 0.3733,
      "step": 672
    },
    {
      "epoch": 1.23,
      "learning_rate": 1.3318791424394161e-05,
      "loss": 0.3867,
      "step": 673
    },
    {
      "epoch": 1.23,
      "learning_rate": 1.330012289418291e-05,
      "loss": 0.3893,
      "step": 674
    },
    {
      "epoch": 1.23,
      "learning_rate": 1.3281441447951304e-05,
      "loss": 0.3911,
      "step": 675
    },
    {
      "epoch": 1.24,
      "learning_rate": 1.3262747158814791e-05,
      "loss": 0.3739,
      "step": 676
    },
    {
      "epoch": 1.24,
      "learning_rate": 1.3244040099939078e-05,
      "loss": 0.3748,
      "step": 677
    },
    {
      "epoch": 1.24,
      "learning_rate": 1.3225320344539845e-05,
      "loss": 0.3797,
      "step": 678
    },
    {
      "epoch": 1.24,
      "learning_rate": 1.3206587965882465e-05,
      "loss": 0.3707,
      "step": 679
    },
    {
      "epoch": 1.24,
      "learning_rate": 1.3187843037281725e-05,
      "loss": 0.38,
      "step": 680
    },
    {
      "epoch": 1.25,
      "learning_rate": 1.3169085632101515e-05,
      "loss": 0.3798,
      "step": 681
    },
    {
      "epoch": 1.25,
      "learning_rate": 1.3150315823754572e-05,
      "loss": 0.3683,
      "step": 682
    },
    {
      "epoch": 1.25,
      "learning_rate": 1.313153368570216e-05,
      "loss": 0.3678,
      "step": 683
    },
    {
      "epoch": 1.25,
      "learning_rate": 1.3112739291453812e-05,
      "loss": 0.3823,
      "step": 684
    },
    {
      "epoch": 1.25,
      "learning_rate": 1.3093932714567026e-05,
      "loss": 0.3803,
      "step": 685
    },
    {
      "epoch": 1.25,
      "learning_rate": 1.3075114028646976e-05,
      "loss": 0.4052,
      "step": 686
    },
    {
      "epoch": 1.26,
      "learning_rate": 1.3056283307346234e-05,
      "loss": 0.3622,
      "step": 687
    },
    {
      "epoch": 1.26,
      "learning_rate": 1.3037440624364468e-05,
      "loss": 0.3754,
      "step": 688
    },
    {
      "epoch": 1.26,
      "learning_rate": 1.3018586053448173e-05,
      "loss": 0.3776,
      "step": 689
    },
    {
      "epoch": 1.26,
      "learning_rate": 1.299971966839036e-05,
      "loss": 0.3856,
      "step": 690
    },
    {
      "epoch": 1.26,
      "learning_rate": 1.298084154303029e-05,
      "loss": 0.3818,
      "step": 691
    },
    {
      "epoch": 1.27,
      "learning_rate": 1.2961951751253158e-05,
      "loss": 0.3781,
      "step": 692
    },
    {
      "epoch": 1.27,
      "learning_rate": 1.2943050366989833e-05,
      "loss": 0.3672,
      "step": 693
    },
    {
      "epoch": 1.27,
      "learning_rate": 1.292413746421655e-05,
      "loss": 0.3942,
      "step": 694
    },
    {
      "epoch": 1.27,
      "learning_rate": 1.290521311695462e-05,
      "loss": 0.3921,
      "step": 695
    },
    {
      "epoch": 1.27,
      "learning_rate": 1.2886277399270153e-05,
      "loss": 0.3753,
      "step": 696
    },
    {
      "epoch": 1.27,
      "learning_rate": 1.2867330385273756e-05,
      "loss": 0.3896,
      "step": 697
    },
    {
      "epoch": 1.28,
      "learning_rate": 1.2848372149120248e-05,
      "loss": 0.3903,
      "step": 698
    },
    {
      "epoch": 1.28,
      "learning_rate": 1.282940276500837e-05,
      "loss": 0.3835,
      "step": 699
    },
    {
      "epoch": 1.28,
      "learning_rate": 1.2810422307180497e-05,
      "loss": 0.3968,
      "step": 700
    },
    {
      "epoch": 1.28,
      "learning_rate": 1.2791430849922335e-05,
      "loss": 0.3778,
      "step": 701
    },
    {
      "epoch": 1.28,
      "learning_rate": 1.2772428467562653e-05,
      "loss": 0.3746,
      "step": 702
    },
    {
      "epoch": 1.29,
      "learning_rate": 1.2753415234472965e-05,
      "loss": 0.4004,
      "step": 703
    },
    {
      "epoch": 1.29,
      "learning_rate": 1.2734391225067264e-05,
      "loss": 0.3841,
      "step": 704
    },
    {
      "epoch": 1.29,
      "learning_rate": 1.2715356513801714e-05,
      "loss": 0.3868,
      "step": 705
    },
    {
      "epoch": 1.29,
      "learning_rate": 1.2696311175174358e-05,
      "loss": 0.3962,
      "step": 706
    },
    {
      "epoch": 1.29,
      "learning_rate": 1.2677255283724844e-05,
      "loss": 0.3979,
      "step": 707
    },
    {
      "epoch": 1.29,
      "learning_rate": 1.265818891403412e-05,
      "loss": 0.3917,
      "step": 708
    },
    {
      "epoch": 1.3,
      "learning_rate": 1.2639112140724133e-05,
      "loss": 0.3858,
      "step": 709
    },
    {
      "epoch": 1.3,
      "learning_rate": 1.2620025038457555e-05,
      "loss": 0.3887,
      "step": 710
    },
    {
      "epoch": 1.3,
      "learning_rate": 1.260092768193749e-05,
      "loss": 0.4006,
      "step": 711
    },
    {
      "epoch": 1.3,
      "learning_rate": 1.258182014590716e-05,
      "loss": 0.3572,
      "step": 712
    },
    {
      "epoch": 1.3,
      "learning_rate": 1.256270250514964e-05,
      "loss": 0.3722,
      "step": 713
    },
    {
      "epoch": 1.31,
      "learning_rate": 1.2543574834487551e-05,
      "loss": 0.3939,
      "step": 714
    },
    {
      "epoch": 1.31,
      "learning_rate": 1.252443720878276e-05,
      "loss": 0.3948,
      "step": 715
    },
    {
      "epoch": 1.31,
      "learning_rate": 1.2505289702936109e-05,
      "loss": 0.3963,
      "step": 716
    },
    {
      "epoch": 1.31,
      "learning_rate": 1.2486132391887103e-05,
      "loss": 0.3698,
      "step": 717
    },
    {
      "epoch": 1.31,
      "learning_rate": 1.2466965350613615e-05,
      "loss": 0.3875,
      "step": 718
    },
    {
      "epoch": 1.31,
      "learning_rate": 1.2447788654131616e-05,
      "loss": 0.3684,
      "step": 719
    },
    {
      "epoch": 1.32,
      "learning_rate": 1.2428602377494851e-05,
      "loss": 0.3773,
      "step": 720
    },
    {
      "epoch": 1.32,
      "learning_rate": 1.240940659579457e-05,
      "loss": 0.3706,
      "step": 721
    },
    {
      "epoch": 1.32,
      "learning_rate": 1.239020138415922e-05,
      "loss": 0.3714,
      "step": 722
    },
    {
      "epoch": 1.32,
      "learning_rate": 1.2370986817754146e-05,
      "loss": 0.39,
      "step": 723
    },
    {
      "epoch": 1.32,
      "learning_rate": 1.2351762971781324e-05,
      "loss": 0.396,
      "step": 724
    },
    {
      "epoch": 1.33,
      "learning_rate": 1.2332529921479037e-05,
      "loss": 0.398,
      "step": 725
    },
    {
      "epoch": 1.33,
      "learning_rate": 1.2313287742121592e-05,
      "loss": 0.3778,
      "step": 726
    },
    {
      "epoch": 1.33,
      "learning_rate": 1.2294036509019025e-05,
      "loss": 0.3743,
      "step": 727
    },
    {
      "epoch": 1.33,
      "learning_rate": 1.227477629751681e-05,
      "loss": 0.3706,
      "step": 728
    },
    {
      "epoch": 1.33,
      "learning_rate": 1.225550718299556e-05,
      "loss": 0.3777,
      "step": 729
    },
    {
      "epoch": 1.33,
      "learning_rate": 1.2236229240870731e-05,
      "loss": 0.3891,
      "step": 730
    },
    {
      "epoch": 1.34,
      "learning_rate": 1.2216942546592333e-05,
      "loss": 0.3915,
      "step": 731
    },
    {
      "epoch": 1.34,
      "learning_rate": 1.2197647175644618e-05,
      "loss": 0.3677,
      "step": 732
    },
    {
      "epoch": 1.34,
      "learning_rate": 1.2178343203545811e-05,
      "loss": 0.38,
      "step": 733
    },
    {
      "epoch": 1.34,
      "learning_rate": 1.2159030705847792e-05,
      "loss": 0.367,
      "step": 734
    },
    {
      "epoch": 1.34,
      "learning_rate": 1.2139709758135814e-05,
      "loss": 0.3878,
      "step": 735
    },
    {
      "epoch": 1.35,
      "learning_rate": 1.2120380436028194e-05,
      "loss": 0.3845,
      "step": 736
    },
    {
      "epoch": 1.35,
      "learning_rate": 1.2101042815176031e-05,
      "loss": 0.3796,
      "step": 737
    },
    {
      "epoch": 1.35,
      "learning_rate": 1.2081696971262905e-05,
      "loss": 0.3773,
      "step": 738
    },
    {
      "epoch": 1.35,
      "learning_rate": 1.2062342980004573e-05,
      "loss": 0.3682,
      "step": 739
    },
    {
      "epoch": 1.35,
      "learning_rate": 1.204298091714868e-05,
      "loss": 0.3771,
      "step": 740
    },
    {
      "epoch": 1.35,
      "learning_rate": 1.2023610858474465e-05,
      "loss": 0.3758,
      "step": 741
    },
    {
      "epoch": 1.36,
      "learning_rate": 1.2004232879792465e-05,
      "loss": 0.3739,
      "step": 742
    },
    {
      "epoch": 1.36,
      "learning_rate": 1.1984847056944207e-05,
      "loss": 0.3826,
      "step": 743
    },
    {
      "epoch": 1.36,
      "learning_rate": 1.1965453465801916e-05,
      "loss": 0.3796,
      "step": 744
    },
    {
      "epoch": 1.36,
      "learning_rate": 1.1946052182268228e-05,
      "loss": 0.3728,
      "step": 745
    },
    {
      "epoch": 1.36,
      "learning_rate": 1.1926643282275884e-05,
      "loss": 0.3811,
      "step": 746
    },
    {
      "epoch": 1.37,
      "learning_rate": 1.1907226841787436e-05,
      "loss": 0.3794,
      "step": 747
    },
    {
      "epoch": 1.37,
      "learning_rate": 1.1887802936794938e-05,
      "loss": 0.3681,
      "step": 748
    },
    {
      "epoch": 1.37,
      "learning_rate": 1.1868371643319669e-05,
      "loss": 0.3765,
      "step": 749
    },
    {
      "epoch": 1.37,
      "learning_rate": 1.1848933037411825e-05,
      "loss": 0.3898,
      "step": 750
    },
    {
      "epoch": 1.37,
      "learning_rate": 1.1829487195150211e-05,
      "loss": 0.4043,
      "step": 751
    },
    {
      "epoch": 1.38,
      "learning_rate": 1.1810034192641969e-05,
      "loss": 0.3779,
      "step": 752
    },
    {
      "epoch": 1.38,
      "learning_rate": 1.1790574106022247e-05,
      "loss": 0.3954,
      "step": 753
    },
    {
      "epoch": 1.38,
      "learning_rate": 1.1771107011453934e-05,
      "loss": 0.3675,
      "step": 754
    },
    {
      "epoch": 1.38,
      "learning_rate": 1.1751632985127341e-05,
      "loss": 0.3658,
      "step": 755
    },
    {
      "epoch": 1.38,
      "learning_rate": 1.1732152103259913e-05,
      "loss": 0.3647,
      "step": 756
    },
    {
      "epoch": 1.38,
      "learning_rate": 1.171266444209591e-05,
      "loss": 0.3907,
      "step": 757
    },
    {
      "epoch": 1.39,
      "learning_rate": 1.1693170077906145e-05,
      "loss": 0.393,
      "step": 758
    },
    {
      "epoch": 1.39,
      "learning_rate": 1.1673669086987653e-05,
      "loss": 0.3661,
      "step": 759
    },
    {
      "epoch": 1.39,
      "learning_rate": 1.1654161545663413e-05,
      "loss": 0.3805,
      "step": 760
    },
    {
      "epoch": 1.39,
      "learning_rate": 1.1634647530282035e-05,
      "loss": 0.3897,
      "step": 761
    },
    {
      "epoch": 1.39,
      "learning_rate": 1.1615127117217465e-05,
      "loss": 0.379,
      "step": 762
    },
    {
      "epoch": 1.4,
      "learning_rate": 1.1595600382868692e-05,
      "loss": 0.3773,
      "step": 763
    },
    {
      "epoch": 1.4,
      "learning_rate": 1.157606740365945e-05,
      "loss": 0.3856,
      "step": 764
    },
    {
      "epoch": 1.4,
      "learning_rate": 1.155652825603791e-05,
      "loss": 0.4042,
      "step": 765
    },
    {
      "epoch": 1.4,
      "learning_rate": 1.1536983016476375e-05,
      "loss": 0.3684,
      "step": 766
    },
    {
      "epoch": 1.4,
      "learning_rate": 1.1517431761471009e-05,
      "loss": 0.3628,
      "step": 767
    },
    {
      "epoch": 1.4,
      "learning_rate": 1.1497874567541505e-05,
      "loss": 0.3742,
      "step": 768
    },
    {
      "epoch": 1.41,
      "learning_rate": 1.1478311511230808e-05,
      "loss": 0.3806,
      "step": 769
    },
    {
      "epoch": 1.41,
      "learning_rate": 1.1458742669104806e-05,
      "loss": 0.38,
      "step": 770
    },
    {
      "epoch": 1.41,
      "learning_rate": 1.143916811775202e-05,
      "loss": 0.3914,
      "step": 771
    },
    {
      "epoch": 1.41,
      "learning_rate": 1.1419587933783334e-05,
      "loss": 0.3888,
      "step": 772
    },
    {
      "epoch": 1.41,
      "learning_rate": 1.140000219383167e-05,
      "loss": 0.378,
      "step": 773
    },
    {
      "epoch": 1.42,
      "learning_rate": 1.1380410974551683e-05,
      "loss": 0.4107,
      "step": 774
    },
    {
      "epoch": 1.42,
      "learning_rate": 1.1360814352619489e-05,
      "loss": 0.3612,
      "step": 775
    },
    {
      "epoch": 1.42,
      "learning_rate": 1.1341212404732348e-05,
      "loss": 0.3933,
      "step": 776
    },
    {
      "epoch": 1.42,
      "learning_rate": 1.1321605207608355e-05,
      "loss": 0.3796,
      "step": 777
    },
    {
      "epoch": 1.42,
      "learning_rate": 1.1301992837986154e-05,
      "loss": 0.3918,
      "step": 778
    },
    {
      "epoch": 1.42,
      "learning_rate": 1.1282375372624632e-05,
      "loss": 0.3687,
      "step": 779
    },
    {
      "epoch": 1.43,
      "learning_rate": 1.1262752888302624e-05,
      "loss": 0.3687,
      "step": 780
    },
    {
      "epoch": 1.43,
      "learning_rate": 1.1243125461818605e-05,
      "loss": 0.3629,
      "step": 781
    },
    {
      "epoch": 1.43,
      "learning_rate": 1.1223493169990394e-05,
      "loss": 0.3728,
      "step": 782
    },
    {
      "epoch": 1.43,
      "learning_rate": 1.1203856089654847e-05,
      "loss": 0.3909,
      "step": 783
    },
    {
      "epoch": 1.43,
      "learning_rate": 1.1184214297667571e-05,
      "loss": 0.3912,
      "step": 784
    },
    {
      "epoch": 1.44,
      "learning_rate": 1.1164567870902603e-05,
      "loss": 0.3864,
      "step": 785
    },
    {
      "epoch": 1.44,
      "learning_rate": 1.1144916886252126e-05,
      "loss": 0.3741,
      "step": 786
    },
    {
      "epoch": 1.44,
      "learning_rate": 1.1125261420626162e-05,
      "loss": 0.3811,
      "step": 787
    },
    {
      "epoch": 1.44,
      "learning_rate": 1.110560155095226e-05,
      "loss": 0.3891,
      "step": 788
    },
    {
      "epoch": 1.44,
      "learning_rate": 1.1085937354175225e-05,
      "loss": 0.3873,
      "step": 789
    },
    {
      "epoch": 1.44,
      "learning_rate": 1.1066268907256783e-05,
      "loss": 0.3624,
      "step": 790
    },
    {
      "epoch": 1.45,
      "learning_rate": 1.1046596287175297e-05,
      "loss": 0.3776,
      "step": 791
    },
    {
      "epoch": 1.45,
      "learning_rate": 1.1026919570925463e-05,
      "loss": 0.4078,
      "step": 792
    },
    {
      "epoch": 1.45,
      "learning_rate": 1.1007238835518008e-05,
      "loss": 0.379,
      "step": 793
    },
    {
      "epoch": 1.45,
      "learning_rate": 1.0987554157979392e-05,
      "loss": 0.3804,
      "step": 794
    },
    {
      "epoch": 1.45,
      "learning_rate": 1.0967865615351508e-05,
      "loss": 0.383,
      "step": 795
    },
    {
      "epoch": 1.46,
      "learning_rate": 1.0948173284691359e-05,
      "loss": 0.3762,
      "step": 796
    },
    {
      "epoch": 1.46,
      "learning_rate": 1.0928477243070793e-05,
      "loss": 0.3947,
      "step": 797
    },
    {
      "epoch": 1.46,
      "learning_rate": 1.090877756757617e-05,
      "loss": 0.3487,
      "step": 798
    },
    {
      "epoch": 1.46,
      "learning_rate": 1.0889074335308081e-05,
      "loss": 0.3884,
      "step": 799
    },
    {
      "epoch": 1.46,
      "learning_rate": 1.0869367623381026e-05,
      "loss": 0.3777,
      "step": 800
    },
    {
      "epoch": 1.46,
      "learning_rate": 1.0849657508923137e-05,
      "loss": 0.403,
      "step": 801
    },
    {
      "epoch": 1.47,
      "learning_rate": 1.0829944069075848e-05,
      "loss": 0.3675,
      "step": 802
    },
    {
      "epoch": 1.47,
      "learning_rate": 1.0810227380993628e-05,
      "loss": 0.3893,
      "step": 803
    },
    {
      "epoch": 1.47,
      "learning_rate": 1.0790507521843636e-05,
      "loss": 0.3923,
      "step": 804
    },
    {
      "epoch": 1.47,
      "learning_rate": 1.0770784568805457e-05,
      "loss": 0.4103,
      "step": 805
    },
    {
      "epoch": 1.47,
      "learning_rate": 1.0751058599070782e-05,
      "loss": 0.4027,
      "step": 806
    },
    {
      "epoch": 1.48,
      "learning_rate": 1.0731329689843105e-05,
      "loss": 0.4037,
      "step": 807
    },
    {
      "epoch": 1.48,
      "learning_rate": 1.071159791833743e-05,
      "loss": 0.3827,
      "step": 808
    },
    {
      "epoch": 1.48,
      "learning_rate": 1.069186336177996e-05,
      "loss": 0.3761,
      "step": 809
    },
    {
      "epoch": 1.48,
      "learning_rate": 1.0672126097407796e-05,
      "loss": 0.376,
      "step": 810
    },
    {
      "epoch": 1.48,
      "learning_rate": 1.0652386202468638e-05,
      "loss": 0.4017,
      "step": 811
    },
    {
      "epoch": 1.48,
      "learning_rate": 1.0632643754220496e-05,
      "loss": 0.3802,
      "step": 812
    },
    {
      "epoch": 1.49,
      "learning_rate": 1.0612898829931341e-05,
      "loss": 0.3995,
      "step": 813
    },
    {
      "epoch": 1.49,
      "learning_rate": 1.0593151506878868e-05,
      "loss": 0.39,
      "step": 814
    },
    {
      "epoch": 1.49,
      "learning_rate": 1.057340186235014e-05,
      "loss": 0.3782,
      "step": 815
    },
    {
      "epoch": 1.49,
      "learning_rate": 1.0553649973641314e-05,
      "loss": 0.3838,
      "step": 816
    },
    {
      "epoch": 1.49,
      "learning_rate": 1.0533895918057326e-05,
      "loss": 0.3689,
      "step": 817
    },
    {
      "epoch": 1.5,
      "learning_rate": 1.0514139772911598e-05,
      "loss": 0.3748,
      "step": 818
    },
    {
      "epoch": 1.5,
      "learning_rate": 1.0494381615525726e-05,
      "loss": 0.3812,
      "step": 819
    },
    {
      "epoch": 1.5,
      "learning_rate": 1.0474621523229182e-05,
      "loss": 0.3822,
      "step": 820
    },
    {
      "epoch": 1.5,
      "learning_rate": 1.0454859573359013e-05,
      "loss": 0.3758,
      "step": 821
    },
    {
      "epoch": 1.5,
      "learning_rate": 1.0435095843259531e-05,
      "loss": 0.3901,
      "step": 822
    },
    {
      "epoch": 1.5,
      "learning_rate": 1.0415330410282021e-05,
      "loss": 0.3999,
      "step": 823
    },
    {
      "epoch": 1.51,
      "learning_rate": 1.0395563351784433e-05,
      "loss": 0.3697,
      "step": 824
    },
    {
      "epoch": 1.51,
      "learning_rate": 1.0375794745131074e-05,
      "loss": 0.3819,
      "step": 825
    },
    {
      "epoch": 1.51,
      "learning_rate": 1.0356024667692314e-05,
      "loss": 0.359,
      "step": 826
    },
    {
      "epoch": 1.51,
      "learning_rate": 1.0336253196844276e-05,
      "loss": 0.3813,
      "step": 827
    },
    {
      "epoch": 1.51,
      "learning_rate": 1.0316480409968543e-05,
      "loss": 0.3665,
      "step": 828
    },
    {
      "epoch": 1.52,
      "learning_rate": 1.0296706384451842e-05,
      "loss": 0.3875,
      "step": 829
    },
    {
      "epoch": 1.52,
      "learning_rate": 1.0276931197685753e-05,
      "loss": 0.3858,
      "step": 830
    },
    {
      "epoch": 1.52,
      "learning_rate": 1.0257154927066393e-05,
      "loss": 0.3853,
      "step": 831
    },
    {
      "epoch": 1.52,
      "learning_rate": 1.0237377649994129e-05,
      "loss": 0.3802,
      "step": 832
    },
    {
      "epoch": 1.52,
      "learning_rate": 1.0217599443873265e-05,
      "loss": 0.3795,
      "step": 833
    },
    {
      "epoch": 1.53,
      "learning_rate": 1.0197820386111738e-05,
      "loss": 0.3867,
      "step": 834
    },
    {
      "epoch": 1.53,
      "learning_rate": 1.0178040554120825e-05,
      "loss": 0.384,
      "step": 835
    },
    {
      "epoch": 1.53,
      "learning_rate": 1.0158260025314821e-05,
      "loss": 0.3789,
      "step": 836
    },
    {
      "epoch": 1.53,
      "learning_rate": 1.0138478877110765e-05,
      "loss": 0.3767,
      "step": 837
    },
    {
      "epoch": 1.53,
      "learning_rate": 1.0118697186928107e-05,
      "loss": 0.3634,
      "step": 838
    },
    {
      "epoch": 1.53,
      "learning_rate": 1.0098915032188416e-05,
      "loss": 0.3804,
      "step": 839
    },
    {
      "epoch": 1.54,
      "learning_rate": 1.0079132490315095e-05,
      "loss": 0.3881,
      "step": 840
    },
    {
      "epoch": 1.54,
      "learning_rate": 1.0059349638733045e-05,
      "loss": 0.3893,
      "step": 841
    },
    {
      "epoch": 1.54,
      "learning_rate": 1.0039566554868393e-05,
      "loss": 0.3855,
      "step": 842
    },
    {
      "epoch": 1.54,
      "learning_rate": 1.0019783316148168e-05,
      "loss": 0.3824,
      "step": 843
    },
    {
      "epoch": 1.54,
      "learning_rate": 1e-05,
      "loss": 0.3813,
      "step": 844
    },
    {
      "epoch": 1.55,
      "learning_rate": 9.980216683851837e-06,
      "loss": 0.392,
      "step": 845
    },
    {
      "epoch": 1.55,
      "learning_rate": 9.960433445131608e-06,
      "loss": 0.381,
      "step": 846
    },
    {
      "epoch": 1.55,
      "learning_rate": 9.940650361266956e-06,
      "loss": 0.3543,
      "step": 847
    },
    {
      "epoch": 1.55,
      "learning_rate": 9.92086750968491e-06,
      "loss": 0.3672,
      "step": 848
    },
    {
      "epoch": 1.55,
      "learning_rate": 9.901084967811586e-06,
      "loss": 0.386,
      "step": 849
    },
    {
      "epoch": 1.55,
      "learning_rate": 9.881302813071898e-06,
      "loss": 0.3724,
      "step": 850
    },
    {
      "epoch": 1.56,
      "learning_rate": 9.86152112288924e-06,
      "loss": 0.3985,
      "step": 851
    },
    {
      "epoch": 1.56,
      "learning_rate": 9.841739974685179e-06,
      "loss": 0.3916,
      "step": 852
    },
    {
      "epoch": 1.56,
      "learning_rate": 9.821959445879178e-06,
      "loss": 0.3681,
      "step": 853
    },
    {
      "epoch": 1.56,
      "learning_rate": 9.802179613888264e-06,
      "loss": 0.3812,
      "step": 854
    },
    {
      "epoch": 1.56,
      "learning_rate": 9.782400556126737e-06,
      "loss": 0.3759,
      "step": 855
    },
    {
      "epoch": 1.57,
      "learning_rate": 9.762622350005873e-06,
      "loss": 0.3888,
      "step": 856
    },
    {
      "epoch": 1.57,
      "learning_rate": 9.742845072933612e-06,
      "loss": 0.3792,
      "step": 857
    },
    {
      "epoch": 1.57,
      "learning_rate": 9.723068802314247e-06,
      "loss": 0.3717,
      "step": 858
    },
    {
      "epoch": 1.57,
      "learning_rate": 9.70329361554816e-06,
      "loss": 0.3732,
      "step": 859
    },
    {
      "epoch": 1.57,
      "learning_rate": 9.683519590031459e-06,
      "loss": 0.3937,
      "step": 860
    },
    {
      "epoch": 1.57,
      "learning_rate": 9.663746803155729e-06,
      "loss": 0.3776,
      "step": 861
    },
    {
      "epoch": 1.58,
      "learning_rate": 9.643975332307687e-06,
      "loss": 0.3752,
      "step": 862
    },
    {
      "epoch": 1.58,
      "learning_rate": 9.62420525486893e-06,
      "loss": 0.3611,
      "step": 863
    },
    {
      "epoch": 1.58,
      "learning_rate": 9.604436648215572e-06,
      "loss": 0.3819,
      "step": 864
    },
    {
      "epoch": 1.58,
      "learning_rate": 9.58466958971798e-06,
      "loss": 0.3577,
      "step": 865
    },
    {
      "epoch": 1.58,
      "learning_rate": 9.564904156740474e-06,
      "loss": 0.3588,
      "step": 866
    },
    {
      "epoch": 1.59,
      "learning_rate": 9.545140426640992e-06,
      "loss": 0.4042,
      "step": 867
    },
    {
      "epoch": 1.59,
      "learning_rate": 9.525378476770821e-06,
      "loss": 0.3754,
      "step": 868
    },
    {
      "epoch": 1.59,
      "learning_rate": 9.505618384474277e-06,
      "loss": 0.3836,
      "step": 869
    },
    {
      "epoch": 1.59,
      "learning_rate": 9.485860227088406e-06,
      "loss": 0.3757,
      "step": 870
    },
    {
      "epoch": 1.59,
      "learning_rate": 9.466104081942674e-06,
      "loss": 0.3551,
      "step": 871
    },
    {
      "epoch": 1.59,
      "learning_rate": 9.44635002635869e-06,
      "loss": 0.3712,
      "step": 872
    },
    {
      "epoch": 1.6,
      "learning_rate": 9.426598137649866e-06,
      "loss": 0.3697,
      "step": 873
    },
    {
      "epoch": 1.6,
      "learning_rate": 9.406848493121134e-06,
      "loss": 0.3597,
      "step": 874
    },
    {
      "epoch": 1.6,
      "learning_rate": 9.38710117006866e-06,
      "loss": 0.3892,
      "step": 875
    },
    {
      "epoch": 1.6,
      "learning_rate": 9.36735624577951e-06,
      "loss": 0.3965,
      "step": 876
    },
    {
      "epoch": 1.6,
      "learning_rate": 9.347613797531362e-06,
      "loss": 0.4039,
      "step": 877
    },
    {
      "epoch": 1.61,
      "learning_rate": 9.327873902592207e-06,
      "loss": 0.362,
      "step": 878
    },
    {
      "epoch": 1.61,
      "learning_rate": 9.308136638220044e-06,
      "loss": 0.3831,
      "step": 879
    },
    {
      "epoch": 1.61,
      "learning_rate": 9.288402081662571e-06,
      "loss": 0.3894,
      "step": 880
    },
    {
      "epoch": 1.61,
      "learning_rate": 9.268670310156897e-06,
      "loss": 0.3932,
      "step": 881
    },
    {
      "epoch": 1.61,
      "learning_rate": 9.248941400929223e-06,
      "loss": 0.3789,
      "step": 882
    },
    {
      "epoch": 1.61,
      "learning_rate": 9.229215431194543e-06,
      "loss": 0.3819,
      "step": 883
    },
    {
      "epoch": 1.62,
      "learning_rate": 9.209492478156367e-06,
      "loss": 0.3734,
      "step": 884
    },
    {
      "epoch": 1.62,
      "learning_rate": 9.189772619006377e-06,
      "loss": 0.4128,
      "step": 885
    },
    {
      "epoch": 1.62,
      "learning_rate": 9.170055930924152e-06,
      "loss": 0.3861,
      "step": 886
    },
    {
      "epoch": 1.62,
      "learning_rate": 9.150342491076866e-06,
      "loss": 0.391,
      "step": 887
    },
    {
      "epoch": 1.62,
      "learning_rate": 9.130632376618978e-06,
      "loss": 0.3616,
      "step": 888
    },
    {
      "epoch": 1.63,
      "learning_rate": 9.110925664691922e-06,
      "loss": 0.3611,
      "step": 889
    },
    {
      "epoch": 1.63,
      "learning_rate": 9.091222432423832e-06,
      "loss": 0.371,
      "step": 890
    },
    {
      "epoch": 1.63,
      "learning_rate": 9.071522756929212e-06,
      "loss": 0.3951,
      "step": 891
    },
    {
      "epoch": 1.63,
      "learning_rate": 9.051826715308646e-06,
      "loss": 0.3876,
      "step": 892
    },
    {
      "epoch": 1.63,
      "learning_rate": 9.032134384648496e-06,
      "loss": 0.3813,
      "step": 893
    },
    {
      "epoch": 1.63,
      "learning_rate": 9.012445842020611e-06,
      "loss": 0.3745,
      "step": 894
    },
    {
      "epoch": 1.64,
      "learning_rate": 8.992761164481997e-06,
      "loss": 0.4143,
      "step": 895
    },
    {
      "epoch": 1.64,
      "learning_rate": 8.97308042907454e-06,
      "loss": 0.3763,
      "step": 896
    },
    {
      "epoch": 1.64,
      "learning_rate": 8.953403712824706e-06,
      "loss": 0.3852,
      "step": 897
    },
    {
      "epoch": 1.64,
      "learning_rate": 8.93373109274322e-06,
      "loss": 0.3908,
      "step": 898
    },
    {
      "epoch": 1.64,
      "learning_rate": 8.914062645824774e-06,
      "loss": 0.3802,
      "step": 899
    },
    {
      "epoch": 1.65,
      "learning_rate": 8.894398449047741e-06,
      "loss": 0.374,
      "step": 900
    },
    {
      "epoch": 1.65,
      "learning_rate": 8.874738579373843e-06,
      "loss": 0.3823,
      "step": 901
    },
    {
      "epoch": 1.65,
      "learning_rate": 8.855083113747876e-06,
      "loss": 0.3841,
      "step": 902
    },
    {
      "epoch": 1.65,
      "learning_rate": 8.8354321290974e-06,
      "loss": 0.3846,
      "step": 903
    },
    {
      "epoch": 1.65,
      "learning_rate": 8.815785702332434e-06,
      "loss": 0.396,
      "step": 904
    },
    {
      "epoch": 1.65,
      "learning_rate": 8.796143910345153e-06,
      "loss": 0.3734,
      "step": 905
    },
    {
      "epoch": 1.66,
      "learning_rate": 8.776506830009607e-06,
      "loss": 0.3672,
      "step": 906
    },
    {
      "epoch": 1.66,
      "learning_rate": 8.756874538181397e-06,
      "loss": 0.3665,
      "step": 907
    },
    {
      "epoch": 1.66,
      "learning_rate": 8.737247111697378e-06,
      "loss": 0.379,
      "step": 908
    },
    {
      "epoch": 1.66,
      "learning_rate": 8.717624627375371e-06,
      "loss": 0.3837,
      "step": 909
    },
    {
      "epoch": 1.66,
      "learning_rate": 8.698007162013851e-06,
      "loss": 0.3702,
      "step": 910
    },
    {
      "epoch": 1.67,
      "learning_rate": 8.67839479239165e-06,
      "loss": 0.3712,
      "step": 911
    },
    {
      "epoch": 1.67,
      "learning_rate": 8.658787595267654e-06,
      "loss": 0.3573,
      "step": 912
    },
    {
      "epoch": 1.67,
      "learning_rate": 8.639185647380513e-06,
      "loss": 0.3914,
      "step": 913
    },
    {
      "epoch": 1.67,
      "learning_rate": 8.619589025448318e-06,
      "loss": 0.3673,
      "step": 914
    },
    {
      "epoch": 1.67,
      "learning_rate": 8.599997806168335e-06,
      "loss": 0.3679,
      "step": 915
    },
    {
      "epoch": 1.67,
      "learning_rate": 8.58041206621667e-06,
      "loss": 0.3771,
      "step": 916
    },
    {
      "epoch": 1.68,
      "learning_rate": 8.56083188224798e-06,
      "loss": 0.3701,
      "step": 917
    },
    {
      "epoch": 1.68,
      "learning_rate": 8.541257330895198e-06,
      "loss": 0.3792,
      "step": 918
    },
    {
      "epoch": 1.68,
      "learning_rate": 8.521688488769194e-06,
      "loss": 0.3777,
      "step": 919
    },
    {
      "epoch": 1.68,
      "learning_rate": 8.502125432458495e-06,
      "loss": 0.366,
      "step": 920
    },
    {
      "epoch": 1.68,
      "learning_rate": 8.482568238528994e-06,
      "loss": 0.3835,
      "step": 921
    },
    {
      "epoch": 1.69,
      "learning_rate": 8.463016983523629e-06,
      "loss": 0.3991,
      "step": 922
    },
    {
      "epoch": 1.69,
      "learning_rate": 8.443471743962098e-06,
      "loss": 0.3829,
      "step": 923
    },
    {
      "epoch": 1.69,
      "learning_rate": 8.423932596340551e-06,
      "loss": 0.38,
      "step": 924
    },
    {
      "epoch": 1.69,
      "learning_rate": 8.404399617131311e-06,
      "loss": 0.363,
      "step": 925
    },
    {
      "epoch": 1.69,
      "learning_rate": 8.384872882782542e-06,
      "loss": 0.393,
      "step": 926
    },
    {
      "epoch": 1.7,
      "learning_rate": 8.365352469717969e-06,
      "loss": 0.3717,
      "step": 927
    },
    {
      "epoch": 1.7,
      "learning_rate": 8.345838454336589e-06,
      "loss": 0.3624,
      "step": 928
    },
    {
      "epoch": 1.7,
      "learning_rate": 8.32633091301235e-06,
      "loss": 0.3872,
      "step": 929
    },
    {
      "epoch": 1.7,
      "learning_rate": 8.306829922093857e-06,
      "loss": 0.3662,
      "step": 930
    },
    {
      "epoch": 1.7,
      "learning_rate": 8.287335557904092e-06,
      "loss": 0.3882,
      "step": 931
    },
    {
      "epoch": 1.7,
      "learning_rate": 8.267847896740092e-06,
      "loss": 0.3654,
      "step": 932
    },
    {
      "epoch": 1.71,
      "learning_rate": 8.248367014872659e-06,
      "loss": 0.3864,
      "step": 933
    },
    {
      "epoch": 1.71,
      "learning_rate": 8.228892988546067e-06,
      "loss": 0.3648,
      "step": 934
    },
    {
      "epoch": 1.71,
      "learning_rate": 8.209425893977758e-06,
      "loss": 0.3853,
      "step": 935
    },
    {
      "epoch": 1.71,
      "learning_rate": 8.189965807358033e-06,
      "loss": 0.3634,
      "step": 936
    },
    {
      "epoch": 1.71,
      "learning_rate": 8.170512804849792e-06,
      "loss": 0.3776,
      "step": 937
    },
    {
      "epoch": 1.72,
      "learning_rate": 8.151066962588181e-06,
      "loss": 0.377,
      "step": 938
    },
    {
      "epoch": 1.72,
      "learning_rate": 8.131628356680331e-06,
      "loss": 0.375,
      "step": 939
    },
    {
      "epoch": 1.72,
      "learning_rate": 8.112197063205063e-06,
      "loss": 0.369,
      "step": 940
    },
    {
      "epoch": 1.72,
      "learning_rate": 8.092773158212567e-06,
      "loss": 0.3819,
      "step": 941
    },
    {
      "epoch": 1.72,
      "learning_rate": 8.073356717724116e-06,
      "loss": 0.372,
      "step": 942
    },
    {
      "epoch": 1.72,
      "learning_rate": 8.053947817731773e-06,
      "loss": 0.3779,
      "step": 943
    },
    {
      "epoch": 1.73,
      "learning_rate": 8.034546534198087e-06,
      "loss": 0.3772,
      "step": 944
    },
    {
      "epoch": 1.73,
      "learning_rate": 8.015152943055797e-06,
      "loss": 0.3895,
      "step": 945
    },
    {
      "epoch": 1.73,
      "learning_rate": 7.995767120207537e-06,
      "loss": 0.3579,
      "step": 946
    },
    {
      "epoch": 1.73,
      "learning_rate": 7.976389141525536e-06,
      "loss": 0.3775,
      "step": 947
    },
    {
      "epoch": 1.73,
      "learning_rate": 7.957019082851322e-06,
      "loss": 0.3559,
      "step": 948
    },
    {
      "epoch": 1.74,
      "learning_rate": 7.93765701999543e-06,
      "loss": 0.3775,
      "step": 949
    },
    {
      "epoch": 1.74,
      "learning_rate": 7.918303028737097e-06,
      "loss": 0.3761,
      "step": 950
    },
    {
      "epoch": 1.74,
      "learning_rate": 7.898957184823974e-06,
      "loss": 0.3846,
      "step": 951
    },
    {
      "epoch": 1.74,
      "learning_rate": 7.879619563971808e-06,
      "loss": 0.3969,
      "step": 952
    },
    {
      "epoch": 1.74,
      "learning_rate": 7.860290241864191e-06,
      "loss": 0.3808,
      "step": 953
    },
    {
      "epoch": 1.74,
      "learning_rate": 7.840969294152212e-06,
      "loss": 0.4052,
      "step": 954
    },
    {
      "epoch": 1.75,
      "learning_rate": 7.821656796454192e-06,
      "loss": 0.366,
      "step": 955
    },
    {
      "epoch": 1.75,
      "learning_rate": 7.802352824355385e-06,
      "loss": 0.3763,
      "step": 956
    },
    {
      "epoch": 1.75,
      "learning_rate": 7.783057453407673e-06,
      "loss": 0.3634,
      "step": 957
    },
    {
      "epoch": 1.75,
      "learning_rate": 7.76377075912927e-06,
      "loss": 0.3709,
      "step": 958
    },
    {
      "epoch": 1.75,
      "learning_rate": 7.744492817004443e-06,
      "loss": 0.4129,
      "step": 959
    },
    {
      "epoch": 1.76,
      "learning_rate": 7.725223702483193e-06,
      "loss": 0.3709,
      "step": 960
    },
    {
      "epoch": 1.76,
      "learning_rate": 7.705963490980977e-06,
      "loss": 0.3795,
      "step": 961
    },
    {
      "epoch": 1.76,
      "learning_rate": 7.68671225787841e-06,
      "loss": 0.3902,
      "step": 962
    },
    {
      "epoch": 1.76,
      "learning_rate": 7.667470078520966e-06,
      "loss": 0.3665,
      "step": 963
    },
    {
      "epoch": 1.76,
      "learning_rate": 7.648237028218676e-06,
      "loss": 0.3944,
      "step": 964
    },
    {
      "epoch": 1.76,
      "learning_rate": 7.6290131822458554e-06,
      "loss": 0.3958,
      "step": 965
    },
    {
      "epoch": 1.77,
      "learning_rate": 7.609798615840785e-06,
      "loss": 0.3705,
      "step": 966
    },
    {
      "epoch": 1.77,
      "learning_rate": 7.590593404205432e-06,
      "loss": 0.3614,
      "step": 967
    },
    {
      "epoch": 1.77,
      "learning_rate": 7.571397622505151e-06,
      "loss": 0.3722,
      "step": 968
    },
    {
      "epoch": 1.77,
      "learning_rate": 7.552211345868388e-06,
      "loss": 0.3805,
      "step": 969
    },
    {
      "epoch": 1.77,
      "learning_rate": 7.533034649386385e-06,
      "loss": 0.3705,
      "step": 970
    },
    {
      "epoch": 1.78,
      "learning_rate": 7.513867608112901e-06,
      "loss": 0.3765,
      "step": 971
    },
    {
      "epoch": 1.78,
      "learning_rate": 7.494710297063894e-06,
      "loss": 0.3618,
      "step": 972
    },
    {
      "epoch": 1.78,
      "learning_rate": 7.4755627912172405e-06,
      "loss": 0.3753,
      "step": 973
    },
    {
      "epoch": 1.78,
      "learning_rate": 7.456425165512453e-06,
      "loss": 0.3808,
      "step": 974
    },
    {
      "epoch": 1.78,
      "learning_rate": 7.437297494850362e-06,
      "loss": 0.3847,
      "step": 975
    },
    {
      "epoch": 1.78,
      "learning_rate": 7.418179854092842e-06,
      "loss": 0.3931,
      "step": 976
    },
    {
      "epoch": 1.79,
      "learning_rate": 7.399072318062514e-06,
      "loss": 0.3744,
      "step": 977
    },
    {
      "epoch": 1.79,
      "learning_rate": 7.379974961542447e-06,
      "loss": 0.3786,
      "step": 978
    },
    {
      "epoch": 1.79,
      "learning_rate": 7.3608878592758695e-06,
      "loss": 0.383,
      "step": 979
    },
    {
      "epoch": 1.79,
      "learning_rate": 7.341811085965884e-06,
      "loss": 0.3758,
      "step": 980
    },
    {
      "epoch": 1.79,
      "learning_rate": 7.322744716275159e-06,
      "loss": 0.3809,
      "step": 981
    },
    {
      "epoch": 1.8,
      "learning_rate": 7.303688824825648e-06,
      "loss": 0.3748,
      "step": 982
    },
    {
      "epoch": 1.8,
      "learning_rate": 7.2846434861982905e-06,
      "loss": 0.3634,
      "step": 983
    },
    {
      "epoch": 1.8,
      "learning_rate": 7.2656087749327385e-06,
      "loss": 0.3976,
      "step": 984
    },
    {
      "epoch": 1.8,
      "learning_rate": 7.246584765527038e-06,
      "loss": 0.3683,
      "step": 985
    },
    {
      "epoch": 1.8,
      "learning_rate": 7.22757153243735e-06,
      "loss": 0.3628,
      "step": 986
    },
    {
      "epoch": 1.8,
      "learning_rate": 7.208569150077668e-06,
      "loss": 0.3715,
      "step": 987
    },
    {
      "epoch": 1.81,
      "learning_rate": 7.189577692819508e-06,
      "loss": 0.3725,
      "step": 988
    },
    {
      "epoch": 1.81,
      "learning_rate": 7.170597234991632e-06,
      "loss": 0.3686,
      "step": 989
    },
    {
      "epoch": 1.81,
      "learning_rate": 7.151627850879757e-06,
      "loss": 0.3601,
      "step": 990
    },
    {
      "epoch": 1.81,
      "learning_rate": 7.132669614726249e-06,
      "loss": 0.3532,
      "step": 991
    },
    {
      "epoch": 1.81,
      "learning_rate": 7.113722600729848e-06,
      "loss": 0.3715,
      "step": 992
    },
    {
      "epoch": 1.82,
      "learning_rate": 7.094786883045383e-06,
      "loss": 0.3907,
      "step": 993
    },
    {
      "epoch": 1.82,
      "learning_rate": 7.075862535783454e-06,
      "loss": 0.359,
      "step": 994
    },
    {
      "epoch": 1.82,
      "learning_rate": 7.056949633010166e-06,
      "loss": 0.3632,
      "step": 995
    },
    {
      "epoch": 1.82,
      "learning_rate": 7.038048248746842e-06,
      "loss": 0.3876,
      "step": 996
    },
    {
      "epoch": 1.82,
      "learning_rate": 7.019158456969714e-06,
      "loss": 0.3827,
      "step": 997
    },
    {
      "epoch": 1.82,
      "learning_rate": 7.000280331609641e-06,
      "loss": 0.3578,
      "step": 998
    },
    {
      "epoch": 1.83,
      "learning_rate": 6.981413946551832e-06,
      "loss": 0.3849,
      "step": 999
    },
    {
      "epoch": 1.83,
      "learning_rate": 6.962559375635536e-06,
      "loss": 0.3857,
      "step": 1000
    },
    {
      "epoch": 1.83,
      "learning_rate": 6.943716692653771e-06,
      "loss": 0.3659,
      "step": 1001
    },
    {
      "epoch": 1.83,
      "learning_rate": 6.924885971353027e-06,
      "loss": 0.3739,
      "step": 1002
    },
    {
      "epoch": 1.83,
      "learning_rate": 6.906067285432978e-06,
      "loss": 0.3648,
      "step": 1003
    },
    {
      "epoch": 1.84,
      "learning_rate": 6.8872607085461875e-06,
      "loss": 0.3642,
      "step": 1004
    },
    {
      "epoch": 1.84,
      "learning_rate": 6.868466314297841e-06,
      "loss": 0.3777,
      "step": 1005
    },
    {
      "epoch": 1.84,
      "learning_rate": 6.849684176245432e-06,
      "loss": 0.367,
      "step": 1006
    },
    {
      "epoch": 1.84,
      "learning_rate": 6.830914367898485e-06,
      "loss": 0.3904,
      "step": 1007
    },
    {
      "epoch": 1.84,
      "learning_rate": 6.812156962718279e-06,
      "loss": 0.3662,
      "step": 1008
    },
    {
      "epoch": 1.85,
      "learning_rate": 6.793412034117537e-06,
      "loss": 0.3596,
      "step": 1009
    },
    {
      "epoch": 1.85,
      "learning_rate": 6.774679655460159e-06,
      "loss": 0.3785,
      "step": 1010
    },
    {
      "epoch": 1.85,
      "learning_rate": 6.755959900060925e-06,
      "loss": 0.3715,
      "step": 1011
    },
    {
      "epoch": 1.85,
      "learning_rate": 6.737252841185212e-06,
      "loss": 0.3673,
      "step": 1012
    },
    {
      "epoch": 1.85,
      "learning_rate": 6.718558552048701e-06,
      "loss": 0.3782,
      "step": 1013
    },
    {
      "epoch": 1.85,
      "learning_rate": 6.699877105817093e-06,
      "loss": 0.3803,
      "step": 1014
    },
    {
      "epoch": 1.86,
      "learning_rate": 6.6812085756058394e-06,
      "loss": 0.3609,
      "step": 1015
    },
    {
      "epoch": 1.86,
      "learning_rate": 6.662553034479824e-06,
      "loss": 0.3797,
      "step": 1016
    },
    {
      "epoch": 1.86,
      "learning_rate": 6.643910555453094e-06,
      "loss": 0.3638,
      "step": 1017
    },
    {
      "epoch": 1.86,
      "learning_rate": 6.625281211488592e-06,
      "loss": 0.364,
      "step": 1018
    },
    {
      "epoch": 1.86,
      "learning_rate": 6.606665075497835e-06,
      "loss": 0.3955,
      "step": 1019
    },
    {
      "epoch": 1.87,
      "learning_rate": 6.588062220340651e-06,
      "loss": 0.373,
      "step": 1020
    },
    {
      "epoch": 1.87,
      "learning_rate": 6.569472718824894e-06,
      "loss": 0.3893,
      "step": 1021
    },
    {
      "epoch": 1.87,
      "learning_rate": 6.55089664370615e-06,
      "loss": 0.3562,
      "step": 1022
    },
    {
      "epoch": 1.87,
      "learning_rate": 6.532334067687458e-06,
      "loss": 0.3911,
      "step": 1023
    },
    {
      "epoch": 1.87,
      "learning_rate": 6.513785063419025e-06,
      "loss": 0.3592,
      "step": 1024
    },
    {
      "epoch": 1.87,
      "learning_rate": 6.4952497034979415e-06,
      "loss": 0.3709,
      "step": 1025
    },
    {
      "epoch": 1.88,
      "learning_rate": 6.47672806046789e-06,
      "loss": 0.3692,
      "step": 1026
    },
    {
      "epoch": 1.88,
      "learning_rate": 6.45822020681888e-06,
      "loss": 0.3548,
      "step": 1027
    },
    {
      "epoch": 1.88,
      "learning_rate": 6.4397262149869475e-06,
      "loss": 0.3748,
      "step": 1028
    },
    {
      "epoch": 1.88,
      "learning_rate": 6.421246157353863e-06,
      "loss": 0.404,
      "step": 1029
    },
    {
      "epoch": 1.88,
      "learning_rate": 6.402780106246884e-06,
      "loss": 0.3677,
      "step": 1030
    },
    {
      "epoch": 1.89,
      "learning_rate": 6.384328133938432e-06,
      "loss": 0.3834,
      "step": 1031
    },
    {
      "epoch": 1.89,
      "learning_rate": 6.365890312645833e-06,
      "loss": 0.3599,
      "step": 1032
    },
    {
      "epoch": 1.89,
      "learning_rate": 6.347466714531031e-06,
      "loss": 0.3856,
      "step": 1033
    },
    {
      "epoch": 1.89,
      "learning_rate": 6.329057411700299e-06,
      "loss": 0.3641,
      "step": 1034
    },
    {
      "epoch": 1.89,
      "learning_rate": 6.310662476203959e-06,
      "loss": 0.3804,
      "step": 1035
    },
    {
      "epoch": 1.89,
      "learning_rate": 6.292281980036116e-06,
      "loss": 0.3792,
      "step": 1036
    },
    {
      "epoch": 1.9,
      "learning_rate": 6.2739159951343435e-06,
      "loss": 0.3855,
      "step": 1037
    },
    {
      "epoch": 1.9,
      "learning_rate": 6.25556459337943e-06,
      "loss": 0.3672,
      "step": 1038
    },
    {
      "epoch": 1.9,
      "learning_rate": 6.237227846595093e-06,
      "loss": 0.3656,
      "step": 1039
    },
    {
      "epoch": 1.9,
      "learning_rate": 6.218905826547688e-06,
      "loss": 0.3711,
      "step": 1040
    },
    {
      "epoch": 1.9,
      "learning_rate": 6.200598604945929e-06,
      "loss": 0.3792,
      "step": 1041
    },
    {
      "epoch": 1.91,
      "learning_rate": 6.1823062534406196e-06,
      "loss": 0.3718,
      "step": 1042
    },
    {
      "epoch": 1.91,
      "learning_rate": 6.1640288436243635e-06,
      "loss": 0.3758,
      "step": 1043
    },
    {
      "epoch": 1.91,
      "learning_rate": 6.14576644703128e-06,
      "loss": 0.3754,
      "step": 1044
    },
    {
      "epoch": 1.91,
      "learning_rate": 6.127519135136732e-06,
      "loss": 0.3731,
      "step": 1045
    },
    {
      "epoch": 1.91,
      "learning_rate": 6.109286979357052e-06,
      "loss": 0.3542,
      "step": 1046
    },
    {
      "epoch": 1.91,
      "learning_rate": 6.091070051049241e-06,
      "loss": 0.3595,
      "step": 1047
    },
    {
      "epoch": 1.92,
      "learning_rate": 6.072868421510711e-06,
      "loss": 0.397,
      "step": 1048
    },
    {
      "epoch": 1.92,
      "learning_rate": 6.0546821619790005e-06,
      "loss": 0.3925,
      "step": 1049
    },
    {
      "epoch": 1.92,
      "learning_rate": 6.036511343631488e-06,
      "loss": 0.3714,
      "step": 1050
    },
    {
      "epoch": 1.92,
      "learning_rate": 6.0183560375851095e-06,
      "loss": 0.371,
      "step": 1051
    },
    {
      "epoch": 1.92,
      "learning_rate": 6.00021631489611e-06,
      "loss": 0.3731,
      "step": 1052
    },
    {
      "epoch": 1.93,
      "learning_rate": 5.982092246559728e-06,
      "loss": 0.386,
      "step": 1053
    },
    {
      "epoch": 1.93,
      "learning_rate": 5.963983903509936e-06,
      "loss": 0.3633,
      "step": 1054
    },
    {
      "epoch": 1.93,
      "learning_rate": 5.945891356619166e-06,
      "loss": 0.3781,
      "step": 1055
    },
    {
      "epoch": 1.93,
      "learning_rate": 5.927814676698027e-06,
      "loss": 0.3689,
      "step": 1056
    },
    {
      "epoch": 1.93,
      "learning_rate": 5.909753934495019e-06,
      "loss": 0.3588,
      "step": 1057
    },
    {
      "epoch": 1.93,
      "learning_rate": 5.891709200696281e-06,
      "loss": 0.3772,
      "step": 1058
    },
    {
      "epoch": 1.94,
      "learning_rate": 5.873680545925281e-06,
      "loss": 0.3636,
      "step": 1059
    },
    {
      "epoch": 1.94,
      "learning_rate": 5.855668040742564e-06,
      "loss": 0.3679,
      "step": 1060
    },
    {
      "epoch": 1.94,
      "learning_rate": 5.837671755645473e-06,
      "loss": 0.3825,
      "step": 1061
    },
    {
      "epoch": 1.94,
      "learning_rate": 5.819691761067866e-06,
      "loss": 0.3932,
      "step": 1062
    },
    {
      "epoch": 1.94,
      "learning_rate": 5.801728127379837e-06,
      "loss": 0.3792,
      "step": 1063
    },
    {
      "epoch": 1.95,
      "learning_rate": 5.783780924887462e-06,
      "loss": 0.3635,
      "step": 1064
    },
    {
      "epoch": 1.95,
      "learning_rate": 5.765850223832489e-06,
      "loss": 0.3677,
      "step": 1065
    },
    {
      "epoch": 1.95,
      "learning_rate": 5.74793609439209e-06,
      "loss": 0.3854,
      "step": 1066
    },
    {
      "epoch": 1.95,
      "learning_rate": 5.730038606678593e-06,
      "loss": 0.3634,
      "step": 1067
    },
    {
      "epoch": 1.95,
      "learning_rate": 5.712157830739176e-06,
      "loss": 0.381,
      "step": 1068
    },
    {
      "epoch": 1.95,
      "learning_rate": 5.69429383655561e-06,
      "loss": 0.3723,
      "step": 1069
    },
    {
      "epoch": 1.96,
      "learning_rate": 5.676446694044003e-06,
      "loss": 0.3711,
      "step": 1070
    },
    {
      "epoch": 1.96,
      "learning_rate": 5.658616473054495e-06,
      "loss": 0.3741,
      "step": 1071
    },
    {
      "epoch": 1.96,
      "learning_rate": 5.640803243370997e-06,
      "loss": 0.3747,
      "step": 1072
    },
    {
      "epoch": 1.96,
      "learning_rate": 5.623007074710928e-06,
      "loss": 0.3629,
      "step": 1073
    },
    {
      "epoch": 1.96,
      "learning_rate": 5.605228036724928e-06,
      "loss": 0.3681,
      "step": 1074
    },
    {
      "epoch": 1.97,
      "learning_rate": 5.587466198996592e-06,
      "loss": 0.3885,
      "step": 1075
    },
    {
      "epoch": 1.97,
      "learning_rate": 5.569721631042193e-06,
      "loss": 0.3743,
      "step": 1076
    },
    {
      "epoch": 1.97,
      "learning_rate": 5.551994402310428e-06,
      "loss": 0.3781,
      "step": 1077
    },
    {
      "epoch": 1.97,
      "learning_rate": 5.5342845821821145e-06,
      "loss": 0.3666,
      "step": 1078
    },
    {
      "epoch": 1.97,
      "learning_rate": 5.51659223996994e-06,
      "loss": 0.3805,
      "step": 1079
    },
    {
      "epoch": 1.97,
      "learning_rate": 5.498917444918199e-06,
      "loss": 0.3554,
      "step": 1080
    },
    {
      "epoch": 1.98,
      "learning_rate": 5.481260266202496e-06,
      "loss": 0.3702,
      "step": 1081
    },
    {
      "epoch": 1.98,
      "learning_rate": 5.463620772929494e-06,
      "loss": 0.3821,
      "step": 1082
    },
    {
      "epoch": 1.98,
      "learning_rate": 5.445999034136637e-06,
      "loss": 0.3757,
      "step": 1083
    },
    {
      "epoch": 1.98,
      "learning_rate": 5.428395118791887e-06,
      "loss": 0.3588,
      "step": 1084
    },
    {
      "epoch": 1.98,
      "learning_rate": 5.410809095793436e-06,
      "loss": 0.3819,
      "step": 1085
    },
    {
      "epoch": 1.99,
      "learning_rate": 5.393241033969466e-06,
      "loss": 0.3716,
      "step": 1086
    },
    {
      "epoch": 1.99,
      "learning_rate": 5.375691002077853e-06,
      "loss": 0.3851,
      "step": 1087
    },
    {
      "epoch": 1.99,
      "learning_rate": 5.358159068805902e-06,
      "loss": 0.3566,
      "step": 1088
    },
    {
      "epoch": 1.99,
      "learning_rate": 5.340645302770097e-06,
      "loss": 0.3563,
      "step": 1089
    },
    {
      "epoch": 1.99,
      "learning_rate": 5.323149772515812e-06,
      "loss": 0.3708,
      "step": 1090
    },
    {
      "epoch": 1.99,
      "learning_rate": 5.305672546517047e-06,
      "loss": 0.3732,
      "step": 1091
    },
    {
      "epoch": 2.0,
      "learning_rate": 5.288213693176165e-06,
      "loss": 0.3659,
      "step": 1092
    },
    {
      "epoch": 2.0,
      "learning_rate": 5.270773280823627e-06,
      "loss": 0.3716,
      "step": 1093
    },
    {
      "epoch": 2.0,
      "learning_rate": 5.253351377717707e-06,
      "loss": 0.3442,
      "step": 1094
    },
    {
      "epoch": 2.0,
      "learning_rate": 5.235948052044255e-06,
      "loss": 0.238,
      "step": 1095
    },
    {
      "epoch": 2.0,
      "learning_rate": 5.218563371916403e-06,
      "loss": 0.2582,
      "step": 1096
    },
    {
      "epoch": 2.01,
      "learning_rate": 5.2011974053743005e-06,
      "loss": 0.2428,
      "step": 1097
    },
    {
      "epoch": 2.01,
      "learning_rate": 5.183850220384874e-06,
      "loss": 0.23,
      "step": 1098
    },
    {
      "epoch": 2.01,
      "learning_rate": 5.166521884841533e-06,
      "loss": 0.2213,
      "step": 1099
    },
    {
      "epoch": 2.01,
      "learning_rate": 5.149212466563901e-06,
      "loss": 0.241,
      "step": 1100
    },
    {
      "epoch": 2.01,
      "learning_rate": 5.131922033297587e-06,
      "loss": 0.2402,
      "step": 1101
    },
    {
      "epoch": 2.02,
      "learning_rate": 5.114650652713885e-06,
      "loss": 0.2182,
      "step": 1102
    },
    {
      "epoch": 2.02,
      "learning_rate": 5.097398392409514e-06,
      "loss": 0.2325,
      "step": 1103
    },
    {
      "epoch": 2.02,
      "learning_rate": 5.0801653199063745e-06,
      "loss": 0.2307,
      "step": 1104
    },
    {
      "epoch": 2.02,
      "learning_rate": 5.062951502651261e-06,
      "loss": 0.2159,
      "step": 1105
    },
    {
      "epoch": 2.02,
      "learning_rate": 5.045757008015606e-06,
      "loss": 0.2085,
      "step": 1106
    },
    {
      "epoch": 2.02,
      "learning_rate": 5.0285819032952195e-06,
      "loss": 0.2257,
      "step": 1107
    },
    {
      "epoch": 2.03,
      "learning_rate": 5.0114262557100246e-06,
      "loss": 0.227,
      "step": 1108
    },
    {
      "epoch": 2.03,
      "learning_rate": 4.99429013240379e-06,
      "loss": 0.2344,
      "step": 1109
    },
    {
      "epoch": 2.03,
      "learning_rate": 4.9771736004438685e-06,
      "loss": 0.2166,
      "step": 1110
    },
    {
      "epoch": 2.03,
      "learning_rate": 4.960076726820947e-06,
      "loss": 0.2376,
      "step": 1111
    },
    {
      "epoch": 2.03,
      "learning_rate": 4.942999578448763e-06,
      "loss": 0.2309,
      "step": 1112
    },
    {
      "epoch": 2.04,
      "learning_rate": 4.925942222163852e-06,
      "loss": 0.2286,
      "step": 1113
    },
    {
      "epoch": 2.04,
      "learning_rate": 4.908904724725299e-06,
      "loss": 0.2108,
      "step": 1114
    },
    {
      "epoch": 2.04,
      "learning_rate": 4.8918871528144574e-06,
      "loss": 0.229,
      "step": 1115
    },
    {
      "epoch": 2.04,
      "learning_rate": 4.874889573034693e-06,
      "loss": 0.2102,
      "step": 1116
    },
    {
      "epoch": 2.04,
      "learning_rate": 4.857912051911131e-06,
      "loss": 0.2198,
      "step": 1117
    },
    {
      "epoch": 2.04,
      "learning_rate": 4.840954655890392e-06,
      "loss": 0.239,
      "step": 1118
    },
    {
      "epoch": 2.05,
      "learning_rate": 4.824017451340325e-06,
      "loss": 0.226,
      "step": 1119
    },
    {
      "epoch": 2.05,
      "learning_rate": 4.8071005045497644e-06,
      "loss": 0.2263,
      "step": 1120
    },
    {
      "epoch": 2.05,
      "learning_rate": 4.7902038817282504e-06,
      "loss": 0.2212,
      "step": 1121
    },
    {
      "epoch": 2.05,
      "learning_rate": 4.773327649005778e-06,
      "loss": 0.2362,
      "step": 1122
    },
    {
      "epoch": 2.05,
      "learning_rate": 4.756471872432551e-06,
      "loss": 0.2148,
      "step": 1123
    },
    {
      "epoch": 2.06,
      "learning_rate": 4.739636617978701e-06,
      "loss": 0.2317,
      "step": 1124
    },
    {
      "epoch": 2.06,
      "learning_rate": 4.7228219515340446e-06,
      "loss": 0.243,
      "step": 1125
    },
    {
      "epoch": 2.06,
      "learning_rate": 4.706027938907819e-06,
      "loss": 0.2238,
      "step": 1126
    },
    {
      "epoch": 2.06,
      "learning_rate": 4.689254645828427e-06,
      "loss": 0.2252,
      "step": 1127
    },
    {
      "epoch": 2.06,
      "learning_rate": 4.6725021379431764e-06,
      "loss": 0.2343,
      "step": 1128
    },
    {
      "epoch": 2.06,
      "learning_rate": 4.655770480818039e-06,
      "loss": 0.2297,
      "step": 1129
    },
    {
      "epoch": 2.07,
      "learning_rate": 4.639059739937365e-06,
      "loss": 0.233,
      "step": 1130
    },
    {
      "epoch": 2.07,
      "learning_rate": 4.622369980703645e-06,
      "loss": 0.2335,
      "step": 1131
    },
    {
      "epoch": 2.07,
      "learning_rate": 4.605701268437265e-06,
      "loss": 0.2341,
      "step": 1132
    },
    {
      "epoch": 2.07,
      "learning_rate": 4.589053668376222e-06,
      "loss": 0.2298,
      "step": 1133
    },
    {
      "epoch": 2.07,
      "learning_rate": 4.572427245675891e-06,
      "loss": 0.2288,
      "step": 1134
    },
    {
      "epoch": 2.08,
      "learning_rate": 4.555822065408762e-06,
      "loss": 0.2173,
      "step": 1135
    },
    {
      "epoch": 2.08,
      "learning_rate": 4.539238192564187e-06,
      "loss": 0.2377,
      "step": 1136
    },
    {
      "epoch": 2.08,
      "learning_rate": 4.522675692048123e-06,
      "loss": 0.2165,
      "step": 1137
    },
    {
      "epoch": 2.08,
      "learning_rate": 4.506134628682878e-06,
      "loss": 0.219,
      "step": 1138
    },
    {
      "epoch": 2.08,
      "learning_rate": 4.489615067206869e-06,
      "loss": 0.2225,
      "step": 1139
    },
    {
      "epoch": 2.08,
      "learning_rate": 4.473117072274352e-06,
      "loss": 0.2371,
      "step": 1140
    },
    {
      "epoch": 2.09,
      "learning_rate": 4.45664070845517e-06,
      "loss": 0.2341,
      "step": 1141
    },
    {
      "epoch": 2.09,
      "learning_rate": 4.440186040234524e-06,
      "loss": 0.2222,
      "step": 1142
    },
    {
      "epoch": 2.09,
      "learning_rate": 4.423753132012681e-06,
      "loss": 0.2418,
      "step": 1143
    },
    {
      "epoch": 2.09,
      "learning_rate": 4.407342048104753e-06,
      "loss": 0.255,
      "step": 1144
    },
    {
      "epoch": 2.09,
      "learning_rate": 4.390952852740445e-06,
      "loss": 0.2225,
      "step": 1145
    },
    {
      "epoch": 2.1,
      "learning_rate": 4.374585610063784e-06,
      "loss": 0.2304,
      "step": 1146
    },
    {
      "epoch": 2.1,
      "learning_rate": 4.358240384132876e-06,
      "loss": 0.2242,
      "step": 1147
    },
    {
      "epoch": 2.1,
      "learning_rate": 4.341917238919671e-06,
      "loss": 0.2019,
      "step": 1148
    },
    {
      "epoch": 2.1,
      "learning_rate": 4.325616238309687e-06,
      "loss": 0.228,
      "step": 1149
    },
    {
      "epoch": 2.1,
      "learning_rate": 4.309337446101779e-06,
      "loss": 0.236,
      "step": 1150
    },
    {
      "epoch": 2.1,
      "learning_rate": 4.293080926007878e-06,
      "loss": 0.2225,
      "step": 1151
    },
    {
      "epoch": 2.11,
      "learning_rate": 4.2768467416527536e-06,
      "loss": 0.2296,
      "step": 1152
    },
    {
      "epoch": 2.11,
      "learning_rate": 4.260634956573746e-06,
      "loss": 0.2249,
      "step": 1153
    },
    {
      "epoch": 2.11,
      "learning_rate": 4.244445634220545e-06,
      "loss": 0.2215,
      "step": 1154
    },
    {
      "epoch": 2.11,
      "learning_rate": 4.228278837954914e-06,
      "loss": 0.2291,
      "step": 1155
    },
    {
      "epoch": 2.11,
      "learning_rate": 4.212134631050453e-06,
      "loss": 0.2361,
      "step": 1156
    },
    {
      "epoch": 2.12,
      "learning_rate": 4.196013076692363e-06,
      "loss": 0.2341,
      "step": 1157
    },
    {
      "epoch": 2.12,
      "learning_rate": 4.179914237977177e-06,
      "loss": 0.2222,
      "step": 1158
    },
    {
      "epoch": 2.12,
      "learning_rate": 4.163838177912527e-06,
      "loss": 0.2285,
      "step": 1159
    },
    {
      "epoch": 2.12,
      "learning_rate": 4.147784959416894e-06,
      "loss": 0.2218,
      "step": 1160
    },
    {
      "epoch": 2.12,
      "learning_rate": 4.13175464531936e-06,
      "loss": 0.2199,
      "step": 1161
    },
    {
      "epoch": 2.12,
      "learning_rate": 4.1157472983593636e-06,
      "loss": 0.2321,
      "step": 1162
    },
    {
      "epoch": 2.13,
      "learning_rate": 4.099762981186463e-06,
      "loss": 0.2171,
      "step": 1163
    },
    {
      "epoch": 2.13,
      "learning_rate": 4.083801756360071e-06,
      "loss": 0.2307,
      "step": 1164
    },
    {
      "epoch": 2.13,
      "learning_rate": 4.067863686349226e-06,
      "loss": 0.22,
      "step": 1165
    },
    {
      "epoch": 2.13,
      "learning_rate": 4.051948833532342e-06,
      "loss": 0.219,
      "step": 1166
    },
    {
      "epoch": 2.13,
      "learning_rate": 4.036057260196973e-06,
      "loss": 0.2258,
      "step": 1167
    },
    {
      "epoch": 2.14,
      "learning_rate": 4.02018902853955e-06,
      "loss": 0.2309,
      "step": 1168
    },
    {
      "epoch": 2.14,
      "learning_rate": 4.004344200665159e-06,
      "loss": 0.2321,
      "step": 1169
    },
    {
      "epoch": 2.14,
      "learning_rate": 3.988522838587281e-06,
      "loss": 0.217,
      "step": 1170
    },
    {
      "epoch": 2.14,
      "learning_rate": 3.972725004227561e-06,
      "loss": 0.2199,
      "step": 1171
    },
    {
      "epoch": 2.14,
      "learning_rate": 3.956950759415556e-06,
      "loss": 0.2276,
      "step": 1172
    },
    {
      "epoch": 2.14,
      "learning_rate": 3.9412001658885114e-06,
      "loss": 0.2356,
      "step": 1173
    },
    {
      "epoch": 2.15,
      "learning_rate": 3.925473285291092e-06,
      "loss": 0.2261,
      "step": 1174
    },
    {
      "epoch": 2.15,
      "learning_rate": 3.909770179175155e-06,
      "loss": 0.2175,
      "step": 1175
    },
    {
      "epoch": 2.15,
      "learning_rate": 3.894090908999524e-06,
      "loss": 0.2233,
      "step": 1176
    },
    {
      "epoch": 2.15,
      "learning_rate": 3.878435536129721e-06,
      "loss": 0.2272,
      "step": 1177
    },
    {
      "epoch": 2.15,
      "learning_rate": 3.862804121837733e-06,
      "loss": 0.2215,
      "step": 1178
    },
    {
      "epoch": 2.16,
      "learning_rate": 3.847196727301796e-06,
      "loss": 0.2259,
      "step": 1179
    },
    {
      "epoch": 2.16,
      "learning_rate": 3.831613413606124e-06,
      "loss": 0.2329,
      "step": 1180
    },
    {
      "epoch": 2.16,
      "learning_rate": 3.816054241740685e-06,
      "loss": 0.2217,
      "step": 1181
    },
    {
      "epoch": 2.16,
      "learning_rate": 3.8005192726009664e-06,
      "loss": 0.2215,
      "step": 1182
    },
    {
      "epoch": 2.16,
      "learning_rate": 3.785008566987728e-06,
      "loss": 0.2322,
      "step": 1183
    },
    {
      "epoch": 2.17,
      "learning_rate": 3.7695221856067597e-06,
      "loss": 0.2227,
      "step": 1184
    },
    {
      "epoch": 2.17,
      "learning_rate": 3.754060189068671e-06,
      "loss": 0.2436,
      "step": 1185
    },
    {
      "epoch": 2.17,
      "learning_rate": 3.7386226378886082e-06,
      "loss": 0.2193,
      "step": 1186
    },
    {
      "epoch": 2.17,
      "learning_rate": 3.723209592486059e-06,
      "loss": 0.2235,
      "step": 1187
    },
    {
      "epoch": 2.17,
      "learning_rate": 3.707821113184601e-06,
      "loss": 0.2304,
      "step": 1188
    },
    {
      "epoch": 2.17,
      "learning_rate": 3.6924572602116606e-06,
      "loss": 0.2248,
      "step": 1189
    },
    {
      "epoch": 2.18,
      "learning_rate": 3.6771180936982785e-06,
      "loss": 0.2369,
      "step": 1190
    },
    {
      "epoch": 2.18,
      "learning_rate": 3.6618036736788888e-06,
      "loss": 0.2376,
      "step": 1191
    },
    {
      "epoch": 2.18,
      "learning_rate": 3.646514060091061e-06,
      "loss": 0.2486,
      "step": 1192
    },
    {
      "epoch": 2.18,
      "learning_rate": 3.6312493127752835e-06,
      "loss": 0.2493,
      "step": 1193
    },
    {
      "epoch": 2.18,
      "learning_rate": 3.61600949147472e-06,
      "loss": 0.2367,
      "step": 1194
    },
    {
      "epoch": 2.19,
      "learning_rate": 3.6007946558349815e-06,
      "loss": 0.24,
      "step": 1195
    },
    {
      "epoch": 2.19,
      "learning_rate": 3.5856048654038867e-06,
      "loss": 0.2247,
      "step": 1196
    },
    {
      "epoch": 2.19,
      "learning_rate": 3.570440179631234e-06,
      "loss": 0.225,
      "step": 1197
    },
    {
      "epoch": 2.19,
      "learning_rate": 3.555300657868571e-06,
      "loss": 0.2321,
      "step": 1198
    },
    {
      "epoch": 2.19,
      "learning_rate": 3.5401863593689554e-06,
      "loss": 0.2221,
      "step": 1199
    },
    {
      "epoch": 2.19,
      "learning_rate": 3.5250973432867195e-06,
      "loss": 0.2307,
      "step": 1200
    },
    {
      "epoch": 2.2,
      "learning_rate": 3.5100336686772596e-06,
      "loss": 0.2337,
      "step": 1201
    },
    {
      "epoch": 2.2,
      "learning_rate": 3.4949953944967785e-06,
      "loss": 0.2388,
      "step": 1202
    },
    {
      "epoch": 2.2,
      "learning_rate": 3.4799825796020716e-06,
      "loss": 0.22,
      "step": 1203
    },
    {
      "epoch": 2.2,
      "learning_rate": 3.46499528275029e-06,
      "loss": 0.2314,
      "step": 1204
    },
    {
      "epoch": 2.2,
      "learning_rate": 3.450033562598714e-06,
      "loss": 0.2283,
      "step": 1205
    },
    {
      "epoch": 2.21,
      "learning_rate": 3.4350974777045175e-06,
      "loss": 0.2371,
      "step": 1206
    },
    {
      "epoch": 2.21,
      "learning_rate": 3.4201870865245525e-06,
      "loss": 0.2307,
      "step": 1207
    },
    {
      "epoch": 2.21,
      "learning_rate": 3.4053024474151032e-06,
      "loss": 0.2241,
      "step": 1208
    },
    {
      "epoch": 2.21,
      "learning_rate": 3.3904436186316636e-06,
      "loss": 0.2346,
      "step": 1209
    },
    {
      "epoch": 2.21,
      "learning_rate": 3.3756106583287206e-06,
      "loss": 0.2175,
      "step": 1210
    },
    {
      "epoch": 2.21,
      "learning_rate": 3.360803624559509e-06,
      "loss": 0.2314,
      "step": 1211
    },
    {
      "epoch": 2.22,
      "learning_rate": 3.346022575275795e-06,
      "loss": 0.2318,
      "step": 1212
    },
    {
      "epoch": 2.22,
      "learning_rate": 3.3312675683276453e-06,
      "loss": 0.2212,
      "step": 1213
    },
    {
      "epoch": 2.22,
      "learning_rate": 3.3165386614632045e-06,
      "loss": 0.2371,
      "step": 1214
    },
    {
      "epoch": 2.22,
      "learning_rate": 3.3018359123284604e-06,
      "loss": 0.2321,
      "step": 1215
    },
    {
      "epoch": 2.22,
      "learning_rate": 3.2871593784670386e-06,
      "loss": 0.2155,
      "step": 1216
    },
    {
      "epoch": 2.23,
      "learning_rate": 3.2725091173199497e-06,
      "loss": 0.2462,
      "step": 1217
    },
    {
      "epoch": 2.23,
      "learning_rate": 3.2578851862253802e-06,
      "loss": 0.2124,
      "step": 1218
    },
    {
      "epoch": 2.23,
      "learning_rate": 3.2432876424184756e-06,
      "loss": 0.2309,
      "step": 1219
    },
    {
      "epoch": 2.23,
      "learning_rate": 3.2287165430310985e-06,
      "loss": 0.2306,
      "step": 1220
    },
    {
      "epoch": 2.23,
      "learning_rate": 3.2141719450916065e-06,
      "loss": 0.228,
      "step": 1221
    },
    {
      "epoch": 2.23,
      "learning_rate": 3.1996539055246543e-06,
      "loss": 0.223,
      "step": 1222
    },
    {
      "epoch": 2.24,
      "learning_rate": 3.1851624811509385e-06,
      "loss": 0.2313,
      "step": 1223
    },
    {
      "epoch": 2.24,
      "learning_rate": 3.17069772868699e-06,
      "loss": 0.2217,
      "step": 1224
    },
    {
      "epoch": 2.24,
      "learning_rate": 3.1562597047449597e-06,
      "loss": 0.2358,
      "step": 1225
    },
    {
      "epoch": 2.24,
      "learning_rate": 3.141848465832381e-06,
      "loss": 0.2052,
      "step": 1226
    },
    {
      "epoch": 2.24,
      "learning_rate": 3.1274640683519577e-06,
      "loss": 0.2137,
      "step": 1227
    },
    {
      "epoch": 2.25,
      "learning_rate": 3.1131065686013417e-06,
      "loss": 0.22,
      "step": 1228
    },
    {
      "epoch": 2.25,
      "learning_rate": 3.0987760227729124e-06,
      "loss": 0.2189,
      "step": 1229
    },
    {
      "epoch": 2.25,
      "learning_rate": 3.084472486953558e-06,
      "loss": 0.2226,
      "step": 1230
    },
    {
      "epoch": 2.25,
      "learning_rate": 3.0701960171244504e-06,
      "loss": 0.2232,
      "step": 1231
    },
    {
      "epoch": 2.25,
      "learning_rate": 3.055946669160841e-06,
      "loss": 0.2341,
      "step": 1232
    },
    {
      "epoch": 2.25,
      "learning_rate": 3.0417244988318217e-06,
      "loss": 0.2355,
      "step": 1233
    },
    {
      "epoch": 2.26,
      "learning_rate": 3.0275295618001177e-06,
      "loss": 0.2165,
      "step": 1234
    },
    {
      "epoch": 2.26,
      "learning_rate": 3.0133619136218772e-06,
      "loss": 0.2178,
      "step": 1235
    },
    {
      "epoch": 2.26,
      "learning_rate": 2.9992216097464346e-06,
      "loss": 0.2284,
      "step": 1236
    },
    {
      "epoch": 2.26,
      "learning_rate": 2.98510870551611e-06,
      "loss": 0.2317,
      "step": 1237
    },
    {
      "epoch": 2.26,
      "learning_rate": 2.9710232561659834e-06,
      "loss": 0.2226,
      "step": 1238
    },
    {
      "epoch": 2.27,
      "learning_rate": 2.9569653168236847e-06,
      "loss": 0.2357,
      "step": 1239
    },
    {
      "epoch": 2.27,
      "learning_rate": 2.942934942509171e-06,
      "loss": 0.2228,
      "step": 1240
    },
    {
      "epoch": 2.27,
      "learning_rate": 2.9289321881345257e-06,
      "loss": 0.2308,
      "step": 1241
    },
    {
      "epoch": 2.27,
      "learning_rate": 2.914957108503722e-06,
      "loss": 0.2207,
      "step": 1242
    },
    {
      "epoch": 2.27,
      "learning_rate": 2.9010097583124208e-06,
      "loss": 0.2319,
      "step": 1243
    },
    {
      "epoch": 2.27,
      "learning_rate": 2.8870901921477656e-06,
      "loss": 0.2325,
      "step": 1244
    },
    {
      "epoch": 2.28,
      "learning_rate": 2.873198464488147e-06,
      "loss": 0.2352,
      "step": 1245
    },
    {
      "epoch": 2.28,
      "learning_rate": 2.8593346297030077e-06,
      "loss": 0.2267,
      "step": 1246
    },
    {
      "epoch": 2.28,
      "learning_rate": 2.8454987420526215e-06,
      "loss": 0.2375,
      "step": 1247
    },
    {
      "epoch": 2.28,
      "learning_rate": 2.831690855687882e-06,
      "loss": 0.2489,
      "step": 1248
    },
    {
      "epoch": 2.28,
      "learning_rate": 2.8179110246500905e-06,
      "loss": 0.2138,
      "step": 1249
    },
    {
      "epoch": 2.29,
      "learning_rate": 2.8041593028707513e-06,
      "loss": 0.2278,
      "step": 1250
    },
    {
      "epoch": 2.29,
      "learning_rate": 2.790435744171348e-06,
      "loss": 0.2378,
      "step": 1251
    },
    {
      "epoch": 2.29,
      "learning_rate": 2.7767404022631404e-06,
      "loss": 0.2267,
      "step": 1252
    },
    {
      "epoch": 2.29,
      "learning_rate": 2.7630733307469593e-06,
      "loss": 0.2359,
      "step": 1253
    },
    {
      "epoch": 2.29,
      "learning_rate": 2.749434583112984e-06,
      "loss": 0.2303,
      "step": 1254
    },
    {
      "epoch": 2.29,
      "learning_rate": 2.7358242127405434e-06,
      "loss": 0.2433,
      "step": 1255
    },
    {
      "epoch": 2.3,
      "learning_rate": 2.7222422728979015e-06,
      "loss": 0.2192,
      "step": 1256
    },
    {
      "epoch": 2.3,
      "learning_rate": 2.708688816742051e-06,
      "loss": 0.2309,
      "step": 1257
    },
    {
      "epoch": 2.3,
      "learning_rate": 2.695163897318508e-06,
      "loss": 0.2202,
      "step": 1258
    },
    {
      "epoch": 2.3,
      "learning_rate": 2.681667567561095e-06,
      "loss": 0.2278,
      "step": 1259
    },
    {
      "epoch": 2.3,
      "learning_rate": 2.668199880291751e-06,
      "loss": 0.2145,
      "step": 1260
    },
    {
      "epoch": 2.31,
      "learning_rate": 2.6547608882203056e-06,
      "loss": 0.223,
      "step": 1261
    },
    {
      "epoch": 2.31,
      "learning_rate": 2.6413506439442804e-06,
      "loss": 0.2479,
      "step": 1262
    },
    {
      "epoch": 2.31,
      "learning_rate": 2.6279691999486945e-06,
      "loss": 0.2328,
      "step": 1263
    },
    {
      "epoch": 2.31,
      "learning_rate": 2.614616608605833e-06,
      "loss": 0.2407,
      "step": 1264
    },
    {
      "epoch": 2.31,
      "learning_rate": 2.601292922175066e-06,
      "loss": 0.2036,
      "step": 1265
    },
    {
      "epoch": 2.31,
      "learning_rate": 2.587998192802639e-06,
      "loss": 0.2284,
      "step": 1266
    },
    {
      "epoch": 2.32,
      "learning_rate": 2.5747324725214595e-06,
      "loss": 0.2138,
      "step": 1267
    },
    {
      "epoch": 2.32,
      "learning_rate": 2.5614958132508983e-06,
      "loss": 0.2319,
      "step": 1268
    },
    {
      "epoch": 2.32,
      "learning_rate": 2.5482882667965945e-06,
      "loss": 0.2272,
      "step": 1269
    },
    {
      "epoch": 2.32,
      "learning_rate": 2.5351098848502385e-06,
      "loss": 0.2344,
      "step": 1270
    },
    {
      "epoch": 2.32,
      "learning_rate": 2.5219607189893804e-06,
      "loss": 0.234,
      "step": 1271
    },
    {
      "epoch": 2.33,
      "learning_rate": 2.508840820677221e-06,
      "loss": 0.2281,
      "step": 1272
    },
    {
      "epoch": 2.33,
      "learning_rate": 2.495750241262417e-06,
      "loss": 0.2278,
      "step": 1273
    },
    {
      "epoch": 2.33,
      "learning_rate": 2.482689031978872e-06,
      "loss": 0.2355,
      "step": 1274
    },
    {
      "epoch": 2.33,
      "learning_rate": 2.4696572439455503e-06,
      "loss": 0.2283,
      "step": 1275
    },
    {
      "epoch": 2.33,
      "learning_rate": 2.4566549281662587e-06,
      "loss": 0.2226,
      "step": 1276
    },
    {
      "epoch": 2.34,
      "learning_rate": 2.443682135529456e-06,
      "loss": 0.2282,
      "step": 1277
    },
    {
      "epoch": 2.34,
      "learning_rate": 2.430738916808061e-06,
      "loss": 0.2181,
      "step": 1278
    },
    {
      "epoch": 2.34,
      "learning_rate": 2.417825322659236e-06,
      "loss": 0.2316,
      "step": 1279
    },
    {
      "epoch": 2.34,
      "learning_rate": 2.4049414036242057e-06,
      "loss": 0.2322,
      "step": 1280
    },
    {
      "epoch": 2.34,
      "learning_rate": 2.392087210128048e-06,
      "loss": 0.2192,
      "step": 1281
    },
    {
      "epoch": 2.34,
      "learning_rate": 2.379262792479504e-06,
      "loss": 0.2271,
      "step": 1282
    },
    {
      "epoch": 2.35,
      "learning_rate": 2.3664682008707753e-06,
      "loss": 0.2293,
      "step": 1283
    },
    {
      "epoch": 2.35,
      "learning_rate": 2.3537034853773357e-06,
      "loss": 0.2374,
      "step": 1284
    },
    {
      "epoch": 2.35,
      "learning_rate": 2.340968695957724e-06,
      "loss": 0.2176,
      "step": 1285
    },
    {
      "epoch": 2.35,
      "learning_rate": 2.328263882453353e-06,
      "loss": 0.2213,
      "step": 1286
    },
    {
      "epoch": 2.35,
      "learning_rate": 2.3155890945883274e-06,
      "loss": 0.2323,
      "step": 1287
    },
    {
      "epoch": 2.36,
      "learning_rate": 2.3029443819692255e-06,
      "loss": 0.229,
      "step": 1288
    },
    {
      "epoch": 2.36,
      "learning_rate": 2.29032979408492e-06,
      "loss": 0.2237,
      "step": 1289
    },
    {
      "epoch": 2.36,
      "learning_rate": 2.2777453803063834e-06,
      "loss": 0.2139,
      "step": 1290
    },
    {
      "epoch": 2.36,
      "learning_rate": 2.265191189886492e-06,
      "loss": 0.2132,
      "step": 1291
    },
    {
      "epoch": 2.36,
      "learning_rate": 2.252667271959834e-06,
      "loss": 0.2254,
      "step": 1292
    },
    {
      "epoch": 2.36,
      "learning_rate": 2.240173675542513e-06,
      "loss": 0.2262,
      "step": 1293
    },
    {
      "epoch": 2.37,
      "learning_rate": 2.2277104495319714e-06,
      "loss": 0.2137,
      "step": 1294
    },
    {
      "epoch": 2.37,
      "learning_rate": 2.215277642706778e-06,
      "loss": 0.2216,
      "step": 1295
    },
    {
      "epoch": 2.37,
      "learning_rate": 2.202875303726445e-06,
      "loss": 0.2329,
      "step": 1296
    },
    {
      "epoch": 2.37,
      "learning_rate": 2.190503481131252e-06,
      "loss": 0.2147,
      "step": 1297
    },
    {
      "epoch": 2.37,
      "learning_rate": 2.1781622233420353e-06,
      "loss": 0.2178,
      "step": 1298
    },
    {
      "epoch": 2.38,
      "learning_rate": 2.1658515786599985e-06,
      "loss": 0.2366,
      "step": 1299
    },
    {
      "epoch": 2.38,
      "learning_rate": 2.1535715952665494e-06,
      "loss": 0.2285,
      "step": 1300
    },
    {
      "epoch": 2.38,
      "learning_rate": 2.1413223212230806e-06,
      "loss": 0.2186,
      "step": 1301
    },
    {
      "epoch": 2.38,
      "learning_rate": 2.129103804470797e-06,
      "loss": 0.2248,
      "step": 1302
    },
    {
      "epoch": 2.38,
      "learning_rate": 2.1169160928305323e-06,
      "loss": 0.2256,
      "step": 1303
    },
    {
      "epoch": 2.38,
      "learning_rate": 2.1047592340025456e-06,
      "loss": 0.231,
      "step": 1304
    },
    {
      "epoch": 2.39,
      "learning_rate": 2.0926332755663493e-06,
      "loss": 0.2316,
      "step": 1305
    },
    {
      "epoch": 2.39,
      "learning_rate": 2.080538264980523e-06,
      "loss": 0.2235,
      "step": 1306
    },
    {
      "epoch": 2.39,
      "learning_rate": 2.068474249582508e-06,
      "loss": 0.2222,
      "step": 1307
    },
    {
      "epoch": 2.39,
      "learning_rate": 2.056441276588448e-06,
      "loss": 0.2166,
      "step": 1308
    },
    {
      "epoch": 2.39,
      "learning_rate": 2.044439393092994e-06,
      "loss": 0.2282,
      "step": 1309
    },
    {
      "epoch": 2.4,
      "learning_rate": 2.0324686460691124e-06,
      "loss": 0.2286,
      "step": 1310
    },
    {
      "epoch": 2.4,
      "learning_rate": 2.020529082367909e-06,
      "loss": 0.2274,
      "step": 1311
    },
    {
      "epoch": 2.4,
      "learning_rate": 2.0086207487184504e-06,
      "loss": 0.23,
      "step": 1312
    },
    {
      "epoch": 2.4,
      "learning_rate": 1.9967436917275683e-06,
      "loss": 0.2103,
      "step": 1313
    },
    {
      "epoch": 2.4,
      "learning_rate": 1.984897957879687e-06,
      "loss": 0.2535,
      "step": 1314
    },
    {
      "epoch": 2.4,
      "learning_rate": 1.9730835935366355e-06,
      "loss": 0.2315,
      "step": 1315
    },
    {
      "epoch": 2.41,
      "learning_rate": 1.9613006449374715e-06,
      "loss": 0.2072,
      "step": 1316
    },
    {
      "epoch": 2.41,
      "learning_rate": 1.949549158198295e-06,
      "loss": 0.2247,
      "step": 1317
    },
    {
      "epoch": 2.41,
      "learning_rate": 1.937829179312076e-06,
      "loss": 0.2368,
      "step": 1318
    },
    {
      "epoch": 2.41,
      "learning_rate": 1.9261407541484657e-06,
      "loss": 0.2217,
      "step": 1319
    },
    {
      "epoch": 2.41,
      "learning_rate": 1.9144839284536177e-06,
      "loss": 0.2369,
      "step": 1320
    },
    {
      "epoch": 2.42,
      "learning_rate": 1.9028587478500126e-06,
      "loss": 0.2233,
      "step": 1321
    },
    {
      "epoch": 2.42,
      "learning_rate": 1.8912652578362857e-06,
      "loss": 0.2573,
      "step": 1322
    },
    {
      "epoch": 2.42,
      "learning_rate": 1.8797035037870326e-06,
      "loss": 0.2232,
      "step": 1323
    },
    {
      "epoch": 2.42,
      "learning_rate": 1.8681735309526438e-06,
      "loss": 0.2237,
      "step": 1324
    },
    {
      "epoch": 2.42,
      "learning_rate": 1.8566753844591257e-06,
      "loss": 0.2256,
      "step": 1325
    },
    {
      "epoch": 2.42,
      "learning_rate": 1.8452091093079217e-06,
      "loss": 0.2164,
      "step": 1326
    },
    {
      "epoch": 2.43,
      "learning_rate": 1.833774750375734e-06,
      "loss": 0.2266,
      "step": 1327
    },
    {
      "epoch": 2.43,
      "learning_rate": 1.822372352414361e-06,
      "loss": 0.2283,
      "step": 1328
    },
    {
      "epoch": 2.43,
      "learning_rate": 1.8110019600505014e-06,
      "loss": 0.2344,
      "step": 1329
    },
    {
      "epoch": 2.43,
      "learning_rate": 1.7996636177855931e-06,
      "loss": 0.2167,
      "step": 1330
    },
    {
      "epoch": 2.43,
      "learning_rate": 1.7883573699956425e-06,
      "loss": 0.2197,
      "step": 1331
    },
    {
      "epoch": 2.44,
      "learning_rate": 1.7770832609310374e-06,
      "loss": 0.2177,
      "step": 1332
    },
    {
      "epoch": 2.44,
      "learning_rate": 1.765841334716384e-06,
      "loss": 0.2287,
      "step": 1333
    },
    {
      "epoch": 2.44,
      "learning_rate": 1.75463163535033e-06,
      "loss": 0.2278,
      "step": 1334
    },
    {
      "epoch": 2.44,
      "learning_rate": 1.7434542067053972e-06,
      "loss": 0.2334,
      "step": 1335
    },
    {
      "epoch": 2.44,
      "learning_rate": 1.7323090925277986e-06,
      "loss": 0.2454,
      "step": 1336
    },
    {
      "epoch": 2.44,
      "learning_rate": 1.7211963364372852e-06,
      "loss": 0.2329,
      "step": 1337
    },
    {
      "epoch": 2.45,
      "learning_rate": 1.7101159819269585e-06,
      "loss": 0.2198,
      "step": 1338
    },
    {
      "epoch": 2.45,
      "learning_rate": 1.6990680723631048e-06,
      "loss": 0.2246,
      "step": 1339
    },
    {
      "epoch": 2.45,
      "learning_rate": 1.6880526509850347e-06,
      "loss": 0.2284,
      "step": 1340
    },
    {
      "epoch": 2.45,
      "learning_rate": 1.6770697609049024e-06,
      "loss": 0.2225,
      "step": 1341
    },
    {
      "epoch": 2.45,
      "learning_rate": 1.6661194451075346e-06,
      "loss": 0.2256,
      "step": 1342
    },
    {
      "epoch": 2.46,
      "learning_rate": 1.6552017464502813e-06,
      "loss": 0.238,
      "step": 1343
    },
    {
      "epoch": 2.46,
      "learning_rate": 1.6443167076628285e-06,
      "loss": 0.2221,
      "step": 1344
    },
    {
      "epoch": 2.46,
      "learning_rate": 1.6334643713470344e-06,
      "loss": 0.2208,
      "step": 1345
    },
    {
      "epoch": 2.46,
      "learning_rate": 1.6226447799767775e-06,
      "loss": 0.2333,
      "step": 1346
    },
    {
      "epoch": 2.46,
      "learning_rate": 1.6118579758977693e-06,
      "loss": 0.2256,
      "step": 1347
    },
    {
      "epoch": 2.46,
      "learning_rate": 1.6011040013274015e-06,
      "loss": 0.2213,
      "step": 1348
    },
    {
      "epoch": 2.47,
      "learning_rate": 1.5903828983545778e-06,
      "loss": 0.2201,
      "step": 1349
    },
    {
      "epoch": 2.47,
      "learning_rate": 1.5796947089395475e-06,
      "loss": 0.2105,
      "step": 1350
    },
    {
      "epoch": 2.47,
      "learning_rate": 1.5690394749137471e-06,
      "loss": 0.234,
      "step": 1351
    },
    {
      "epoch": 2.47,
      "learning_rate": 1.5584172379796247e-06,
      "loss": 0.2156,
      "step": 1352
    },
    {
      "epoch": 2.47,
      "learning_rate": 1.547828039710496e-06,
      "loss": 0.2157,
      "step": 1353
    },
    {
      "epoch": 2.48,
      "learning_rate": 1.5372719215503584e-06,
      "loss": 0.2254,
      "step": 1354
    },
    {
      "epoch": 2.48,
      "learning_rate": 1.5267489248137445e-06,
      "loss": 0.2256,
      "step": 1355
    },
    {
      "epoch": 2.48,
      "learning_rate": 1.5162590906855612e-06,
      "loss": 0.2369,
      "step": 1356
    },
    {
      "epoch": 2.48,
      "learning_rate": 1.5058024602209164e-06,
      "loss": 0.2076,
      "step": 1357
    },
    {
      "epoch": 2.48,
      "learning_rate": 1.4953790743449703e-06,
      "loss": 0.2173,
      "step": 1358
    },
    {
      "epoch": 2.49,
      "learning_rate": 1.4849889738527656e-06,
      "loss": 0.2138,
      "step": 1359
    },
    {
      "epoch": 2.49,
      "learning_rate": 1.4746321994090774e-06,
      "loss": 0.2123,
      "step": 1360
    },
    {
      "epoch": 2.49,
      "learning_rate": 1.4643087915482445e-06,
      "loss": 0.2198,
      "step": 1361
    },
    {
      "epoch": 2.49,
      "learning_rate": 1.4540187906740245e-06,
      "loss": 0.2285,
      "step": 1362
    },
    {
      "epoch": 2.49,
      "learning_rate": 1.4437622370594172e-06,
      "loss": 0.2239,
      "step": 1363
    },
    {
      "epoch": 2.49,
      "learning_rate": 1.4335391708465185e-06,
      "loss": 0.2292,
      "step": 1364
    },
    {
      "epoch": 2.5,
      "learning_rate": 1.4233496320463668e-06,
      "loss": 0.2247,
      "step": 1365
    },
    {
      "epoch": 2.5,
      "learning_rate": 1.4131936605387764e-06,
      "loss": 0.2128,
      "step": 1366
    },
    {
      "epoch": 2.5,
      "learning_rate": 1.4030712960721848e-06,
      "loss": 0.2246,
      "step": 1367
    },
    {
      "epoch": 2.5,
      "learning_rate": 1.3929825782635009e-06,
      "loss": 0.2223,
      "step": 1368
    },
    {
      "epoch": 2.5,
      "learning_rate": 1.3829275465979476e-06,
      "loss": 0.2205,
      "step": 1369
    },
    {
      "epoch": 2.51,
      "learning_rate": 1.3729062404289017e-06,
      "loss": 0.2226,
      "step": 1370
    },
    {
      "epoch": 2.51,
      "learning_rate": 1.3629186989777542e-06,
      "loss": 0.236,
      "step": 1371
    },
    {
      "epoch": 2.51,
      "learning_rate": 1.3529649613337425e-06,
      "loss": 0.2163,
      "step": 1372
    },
    {
      "epoch": 2.51,
      "learning_rate": 1.3430450664537986e-06,
      "loss": 0.2407,
      "step": 1373
    },
    {
      "epoch": 2.51,
      "learning_rate": 1.3331590531624116e-06,
      "loss": 0.2281,
      "step": 1374
    },
    {
      "epoch": 2.51,
      "learning_rate": 1.3233069601514537e-06,
      "loss": 0.2149,
      "step": 1375
    },
    {
      "epoch": 2.52,
      "learning_rate": 1.3134888259800472e-06,
      "loss": 0.2262,
      "step": 1376
    },
    {
      "epoch": 2.52,
      "learning_rate": 1.303704689074402e-06,
      "loss": 0.2205,
      "step": 1377
    },
    {
      "epoch": 2.52,
      "learning_rate": 1.2939545877276726e-06,
      "loss": 0.2056,
      "step": 1378
    },
    {
      "epoch": 2.52,
      "learning_rate": 1.2842385600998032e-06,
      "loss": 0.2155,
      "step": 1379
    },
    {
      "epoch": 2.52,
      "learning_rate": 1.2745566442173773e-06,
      "loss": 0.23,
      "step": 1380
    },
    {
      "epoch": 2.53,
      "learning_rate": 1.264908877973482e-06,
      "loss": 0.2223,
      "step": 1381
    },
    {
      "epoch": 2.53,
      "learning_rate": 1.2552952991275402e-06,
      "loss": 0.2245,
      "step": 1382
    },
    {
      "epoch": 2.53,
      "learning_rate": 1.2457159453051715e-06,
      "loss": 0.2398,
      "step": 1383
    },
    {
      "epoch": 2.53,
      "learning_rate": 1.2361708539980565e-06,
      "loss": 0.2337,
      "step": 1384
    },
    {
      "epoch": 2.53,
      "learning_rate": 1.2266600625637659e-06,
      "loss": 0.2271,
      "step": 1385
    },
    {
      "epoch": 2.53,
      "learning_rate": 1.2171836082256316e-06,
      "loss": 0.2274,
      "step": 1386
    },
    {
      "epoch": 2.54,
      "learning_rate": 1.2077415280726047e-06,
      "loss": 0.2086,
      "step": 1387
    },
    {
      "epoch": 2.54,
      "learning_rate": 1.1983338590590932e-06,
      "loss": 0.2182,
      "step": 1388
    },
    {
      "epoch": 2.54,
      "learning_rate": 1.1889606380048301e-06,
      "loss": 0.2216,
      "step": 1389
    },
    {
      "epoch": 2.54,
      "learning_rate": 1.1796219015947286e-06,
      "loss": 0.2282,
      "step": 1390
    },
    {
      "epoch": 2.54,
      "learning_rate": 1.1703176863787313e-06,
      "loss": 0.2226,
      "step": 1391
    },
    {
      "epoch": 2.55,
      "learning_rate": 1.1610480287716764e-06,
      "loss": 0.2225,
      "step": 1392
    },
    {
      "epoch": 2.55,
      "learning_rate": 1.151812965053144e-06,
      "loss": 0.2129,
      "step": 1393
    },
    {
      "epoch": 2.55,
      "learning_rate": 1.1426125313673287e-06,
      "loss": 0.2307,
      "step": 1394
    },
    {
      "epoch": 2.55,
      "learning_rate": 1.1334467637228818e-06,
      "loss": 0.2319,
      "step": 1395
    },
    {
      "epoch": 2.55,
      "learning_rate": 1.1243156979927873e-06,
      "loss": 0.2159,
      "step": 1396
    },
    {
      "epoch": 2.55,
      "learning_rate": 1.1152193699142067e-06,
      "loss": 0.2068,
      "step": 1397
    },
    {
      "epoch": 2.56,
      "learning_rate": 1.1061578150883445e-06,
      "loss": 0.2347,
      "step": 1398
    },
    {
      "epoch": 2.56,
      "learning_rate": 1.0971310689803173e-06,
      "loss": 0.221,
      "step": 1399
    },
    {
      "epoch": 2.56,
      "learning_rate": 1.088139166919e-06,
      "loss": 0.2269,
      "step": 1400
    },
    {
      "epoch": 2.56,
      "learning_rate": 1.0791821440968963e-06,
      "loss": 0.2128,
      "step": 1401
    },
    {
      "epoch": 2.56,
      "learning_rate": 1.070260035570002e-06,
      "loss": 0.2271,
      "step": 1402
    },
    {
      "epoch": 2.57,
      "learning_rate": 1.0613728762576625e-06,
      "loss": 0.2259,
      "step": 1403
    },
    {
      "epoch": 2.57,
      "learning_rate": 1.0525207009424377e-06,
      "loss": 0.2132,
      "step": 1404
    },
    {
      "epoch": 2.57,
      "learning_rate": 1.043703544269975e-06,
      "loss": 0.2284,
      "step": 1405
    },
    {
      "epoch": 2.57,
      "learning_rate": 1.0349214407488573e-06,
      "loss": 0.229,
      "step": 1406
    },
    {
      "epoch": 2.57,
      "learning_rate": 1.026174424750479e-06,
      "loss": 0.2467,
      "step": 1407
    },
    {
      "epoch": 2.57,
      "learning_rate": 1.0174625305089125e-06,
      "loss": 0.2324,
      "step": 1408
    },
    {
      "epoch": 2.58,
      "learning_rate": 1.0087857921207667e-06,
      "loss": 0.2332,
      "step": 1409
    },
    {
      "epoch": 2.58,
      "learning_rate": 1.0001442435450581e-06,
      "loss": 0.2395,
      "step": 1410
    },
    {
      "epoch": 2.58,
      "learning_rate": 9.915379186030771e-07,
      "loss": 0.2356,
      "step": 1411
    },
    {
      "epoch": 2.58,
      "learning_rate": 9.829668509782576e-07,
      "loss": 0.2155,
      "step": 1412
    },
    {
      "epoch": 2.58,
      "learning_rate": 9.744310742160434e-07,
      "loss": 0.2289,
      "step": 1413
    },
    {
      "epoch": 2.59,
      "learning_rate": 9.65930621723752e-07,
      "loss": 0.2332,
      "step": 1414
    },
    {
      "epoch": 2.59,
      "learning_rate": 9.57465526770457e-07,
      "loss": 0.2308,
      "step": 1415
    },
    {
      "epoch": 2.59,
      "learning_rate": 9.490358224868445e-07,
      "loss": 0.2226,
      "step": 1416
    },
    {
      "epoch": 2.59,
      "learning_rate": 9.406415418650872e-07,
      "loss": 0.2356,
      "step": 1417
    },
    {
      "epoch": 2.59,
      "learning_rate": 9.322827177587212e-07,
      "loss": 0.2406,
      "step": 1418
    },
    {
      "epoch": 2.59,
      "learning_rate": 9.239593828825133e-07,
      "loss": 0.2185,
      "step": 1419
    },
    {
      "epoch": 2.6,
      "learning_rate": 9.156715698123231e-07,
      "loss": 0.2246,
      "step": 1420
    },
    {
      "epoch": 2.6,
      "learning_rate": 9.074193109849971e-07,
      "loss": 0.2206,
      "step": 1421
    },
    {
      "epoch": 2.6,
      "learning_rate": 8.992026386982222e-07,
      "loss": 0.2167,
      "step": 1422
    },
    {
      "epoch": 2.6,
      "learning_rate": 8.910215851104087e-07,
      "loss": 0.2406,
      "step": 1423
    },
    {
      "epoch": 2.6,
      "learning_rate": 8.828761822405641e-07,
      "loss": 0.2078,
      "step": 1424
    },
    {
      "epoch": 2.61,
      "learning_rate": 8.747664619681639e-07,
      "loss": 0.219,
      "step": 1425
    },
    {
      "epoch": 2.61,
      "learning_rate": 8.666924560330293e-07,
      "loss": 0.2145,
      "step": 1426
    },
    {
      "epoch": 2.61,
      "learning_rate": 8.586541960352035e-07,
      "loss": 0.2229,
      "step": 1427
    },
    {
      "epoch": 2.61,
      "learning_rate": 8.506517134348269e-07,
      "loss": 0.2193,
      "step": 1428
    },
    {
      "epoch": 2.61,
      "learning_rate": 8.426850395520126e-07,
      "loss": 0.2321,
      "step": 1429
    },
    {
      "epoch": 2.61,
      "learning_rate": 8.347542055667313e-07,
      "loss": 0.2325,
      "step": 1430
    },
    {
      "epoch": 2.62,
      "learning_rate": 8.268592425186761e-07,
      "loss": 0.2269,
      "step": 1431
    },
    {
      "epoch": 2.62,
      "learning_rate": 8.190001813071524e-07,
      "loss": 0.2141,
      "step": 1432
    },
    {
      "epoch": 2.62,
      "learning_rate": 8.111770526909535e-07,
      "loss": 0.2299,
      "step": 1433
    },
    {
      "epoch": 2.62,
      "learning_rate": 8.033898872882396e-07,
      "loss": 0.2205,
      "step": 1434
    },
    {
      "epoch": 2.62,
      "learning_rate": 7.956387155764145e-07,
      "loss": 0.2404,
      "step": 1435
    },
    {
      "epoch": 2.63,
      "learning_rate": 7.87923567892015e-07,
      "loss": 0.2244,
      "step": 1436
    },
    {
      "epoch": 2.63,
      "learning_rate": 7.802444744305826e-07,
      "loss": 0.2343,
      "step": 1437
    },
    {
      "epoch": 2.63,
      "learning_rate": 7.726014652465508e-07,
      "loss": 0.2221,
      "step": 1438
    },
    {
      "epoch": 2.63,
      "learning_rate": 7.649945702531314e-07,
      "loss": 0.2429,
      "step": 1439
    },
    {
      "epoch": 2.63,
      "learning_rate": 7.574238192221872e-07,
      "loss": 0.2117,
      "step": 1440
    },
    {
      "epoch": 2.63,
      "learning_rate": 7.498892417841208e-07,
      "loss": 0.2254,
      "step": 1441
    },
    {
      "epoch": 2.64,
      "learning_rate": 7.42390867427758e-07,
      "loss": 0.2282,
      "step": 1442
    },
    {
      "epoch": 2.64,
      "learning_rate": 7.349287255002401e-07,
      "loss": 0.2273,
      "step": 1443
    },
    {
      "epoch": 2.64,
      "learning_rate": 7.275028452068944e-07,
      "loss": 0.2244,
      "step": 1444
    },
    {
      "epoch": 2.64,
      "learning_rate": 7.20113255611129e-07,
      "loss": 0.2246,
      "step": 1445
    },
    {
      "epoch": 2.64,
      "learning_rate": 7.127599856343192e-07,
      "loss": 0.224,
      "step": 1446
    },
    {
      "epoch": 2.65,
      "learning_rate": 7.054430640556898e-07,
      "loss": 0.2178,
      "step": 1447
    },
    {
      "epoch": 2.65,
      "learning_rate": 6.981625195122077e-07,
      "loss": 0.2191,
      "step": 1448
    },
    {
      "epoch": 2.65,
      "learning_rate": 6.909183804984698e-07,
      "loss": 0.2344,
      "step": 1449
    },
    {
      "epoch": 2.65,
      "learning_rate": 6.837106753665823e-07,
      "loss": 0.2256,
      "step": 1450
    },
    {
      "epoch": 2.65,
      "learning_rate": 6.76539432326061e-07,
      "loss": 0.2204,
      "step": 1451
    },
    {
      "epoch": 2.66,
      "learning_rate": 6.69404679443717e-07,
      "loss": 0.2277,
      "step": 1452
    },
    {
      "epoch": 2.66,
      "learning_rate": 6.623064446435434e-07,
      "loss": 0.2196,
      "step": 1453
    },
    {
      "epoch": 2.66,
      "learning_rate": 6.55244755706611e-07,
      "loss": 0.2256,
      "step": 1454
    },
    {
      "epoch": 2.66,
      "learning_rate": 6.482196402709562e-07,
      "loss": 0.2312,
      "step": 1455
    },
    {
      "epoch": 2.66,
      "learning_rate": 6.41231125831474e-07,
      "loss": 0.2293,
      "step": 1456
    },
    {
      "epoch": 2.66,
      "learning_rate": 6.342792397398101e-07,
      "loss": 0.2246,
      "step": 1457
    },
    {
      "epoch": 2.67,
      "learning_rate": 6.273640092042577e-07,
      "loss": 0.2194,
      "step": 1458
    },
    {
      "epoch": 2.67,
      "learning_rate": 6.204854612896427e-07,
      "loss": 0.2295,
      "step": 1459
    },
    {
      "epoch": 2.67,
      "learning_rate": 6.136436229172238e-07,
      "loss": 0.2144,
      "step": 1460
    },
    {
      "epoch": 2.67,
      "learning_rate": 6.06838520864591e-07,
      "loss": 0.2115,
      "step": 1461
    },
    {
      "epoch": 2.67,
      "learning_rate": 6.000701817655474e-07,
      "loss": 0.238,
      "step": 1462
    },
    {
      "epoch": 2.68,
      "learning_rate": 5.933386321100155e-07,
      "loss": 0.2273,
      "step": 1463
    },
    {
      "epoch": 2.68,
      "learning_rate": 5.866438982439382e-07,
      "loss": 0.2274,
      "step": 1464
    },
    {
      "epoch": 2.68,
      "learning_rate": 5.799860063691609e-07,
      "loss": 0.2175,
      "step": 1465
    },
    {
      "epoch": 2.68,
      "learning_rate": 5.733649825433385e-07,
      "loss": 0.2202,
      "step": 1466
    },
    {
      "epoch": 2.68,
      "learning_rate": 5.667808526798358e-07,
      "loss": 0.2238,
      "step": 1467
    },
    {
      "epoch": 2.68,
      "learning_rate": 5.602336425476174e-07,
      "loss": 0.2211,
      "step": 1468
    },
    {
      "epoch": 2.69,
      "learning_rate": 5.537233777711526e-07,
      "loss": 0.203,
      "step": 1469
    },
    {
      "epoch": 2.69,
      "learning_rate": 5.472500838303141e-07,
      "loss": 0.2197,
      "step": 1470
    },
    {
      "epoch": 2.69,
      "learning_rate": 5.408137860602803e-07,
      "loss": 0.2118,
      "step": 1471
    },
    {
      "epoch": 2.69,
      "learning_rate": 5.344145096514308e-07,
      "loss": 0.2388,
      "step": 1472
    },
    {
      "epoch": 2.69,
      "learning_rate": 5.280522796492504e-07,
      "loss": 0.2448,
      "step": 1473
    },
    {
      "epoch": 2.7,
      "learning_rate": 5.217271209542384e-07,
      "loss": 0.2383,
      "step": 1474
    },
    {
      "epoch": 2.7,
      "learning_rate": 5.154390583217961e-07,
      "loss": 0.2271,
      "step": 1475
    },
    {
      "epoch": 2.7,
      "learning_rate": 5.091881163621426e-07,
      "loss": 0.2254,
      "step": 1476
    },
    {
      "epoch": 2.7,
      "learning_rate": 5.029743195402149e-07,
      "loss": 0.2384,
      "step": 1477
    },
    {
      "epoch": 2.7,
      "learning_rate": 4.96797692175568e-07,
      "loss": 0.2375,
      "step": 1478
    },
    {
      "epoch": 2.7,
      "learning_rate": 4.906582584422859e-07,
      "loss": 0.21,
      "step": 1479
    },
    {
      "epoch": 2.71,
      "learning_rate": 4.845560423688822e-07,
      "loss": 0.2286,
      "step": 1480
    },
    {
      "epoch": 2.71,
      "learning_rate": 4.784910678382093e-07,
      "loss": 0.2467,
      "step": 1481
    },
    {
      "epoch": 2.71,
      "learning_rate": 4.724633585873628e-07,
      "loss": 0.2318,
      "step": 1482
    },
    {
      "epoch": 2.71,
      "learning_rate": 4.6647293820759167e-07,
      "loss": 0.2179,
      "step": 1483
    },
    {
      "epoch": 2.71,
      "learning_rate": 4.6051983014420155e-07,
      "loss": 0.2177,
      "step": 1484
    },
    {
      "epoch": 2.72,
      "learning_rate": 4.546040576964628e-07,
      "loss": 0.2291,
      "step": 1485
    },
    {
      "epoch": 2.72,
      "learning_rate": 4.487256440175292e-07,
      "loss": 0.2166,
      "step": 1486
    },
    {
      "epoch": 2.72,
      "learning_rate": 4.428846121143326e-07,
      "loss": 0.2256,
      "step": 1487
    },
    {
      "epoch": 2.72,
      "learning_rate": 4.370809848475033e-07,
      "loss": 0.2134,
      "step": 1488
    },
    {
      "epoch": 2.72,
      "learning_rate": 4.313147849312749e-07,
      "loss": 0.2329,
      "step": 1489
    },
    {
      "epoch": 2.72,
      "learning_rate": 4.2558603493340066e-07,
      "loss": 0.2207,
      "step": 1490
    },
    {
      "epoch": 2.73,
      "learning_rate": 4.198947572750611e-07,
      "loss": 0.2215,
      "step": 1491
    },
    {
      "epoch": 2.73,
      "learning_rate": 4.142409742307774e-07,
      "loss": 0.2353,
      "step": 1492
    },
    {
      "epoch": 2.73,
      "learning_rate": 4.0862470792832565e-07,
      "loss": 0.2102,
      "step": 1493
    },
    {
      "epoch": 2.73,
      "learning_rate": 4.030459803486464e-07,
      "loss": 0.2189,
      "step": 1494
    },
    {
      "epoch": 2.73,
      "learning_rate": 3.975048133257631e-07,
      "loss": 0.2178,
      "step": 1495
    },
    {
      "epoch": 2.74,
      "learning_rate": 3.9200122854669674e-07,
      "loss": 0.2272,
      "step": 1496
    },
    {
      "epoch": 2.74,
      "learning_rate": 3.8653524755137063e-07,
      "loss": 0.2198,
      "step": 1497
    },
    {
      "epoch": 2.74,
      "learning_rate": 3.8110689173254443e-07,
      "loss": 0.2215,
      "step": 1498
    },
    {
      "epoch": 2.74,
      "learning_rate": 3.7571618233571584e-07,
      "loss": 0.2324,
      "step": 1499
    },
    {
      "epoch": 2.74,
      "learning_rate": 3.703631404590402e-07,
      "loss": 0.2319,
      "step": 1500
    },
    {
      "epoch": 2.74,
      "learning_rate": 3.6504778705325314e-07,
      "loss": 0.2204,
      "step": 1501
    },
    {
      "epoch": 2.75,
      "learning_rate": 3.5977014292158496e-07,
      "loss": 0.2236,
      "step": 1502
    },
    {
      "epoch": 2.75,
      "learning_rate": 3.545302287196772e-07,
      "loss": 0.2181,
      "step": 1503
    },
    {
      "epoch": 2.75,
      "learning_rate": 3.493280649555042e-07,
      "loss": 0.2298,
      "step": 1504
    },
    {
      "epoch": 2.75,
      "learning_rate": 3.441636719892938e-07,
      "loss": 0.2221,
      "step": 1505
    },
    {
      "epoch": 2.75,
      "learning_rate": 3.3903707003344775e-07,
      "loss": 0.221,
      "step": 1506
    },
    {
      "epoch": 2.76,
      "learning_rate": 3.339482791524562e-07,
      "loss": 0.2164,
      "step": 1507
    },
    {
      "epoch": 2.76,
      "learning_rate": 3.28897319262832e-07,
      "loss": 0.2261,
      "step": 1508
    },
    {
      "epoch": 2.76,
      "learning_rate": 3.2388421013301973e-07,
      "loss": 0.2268,
      "step": 1509
    },
    {
      "epoch": 2.76,
      "learning_rate": 3.1890897138332266e-07,
      "loss": 0.2278,
      "step": 1510
    },
    {
      "epoch": 2.76,
      "learning_rate": 3.139716224858336e-07,
      "loss": 0.2172,
      "step": 1511
    },
    {
      "epoch": 2.76,
      "learning_rate": 3.090721827643439e-07,
      "loss": 0.2258,
      "step": 1512
    },
    {
      "epoch": 2.77,
      "learning_rate": 3.042106713942816e-07,
      "loss": 0.2308,
      "step": 1513
    },
    {
      "epoch": 2.77,
      "learning_rate": 2.993871074026289e-07,
      "loss": 0.231,
      "step": 1514
    },
    {
      "epoch": 2.77,
      "learning_rate": 2.9460150966784786e-07,
      "loss": 0.2189,
      "step": 1515
    },
    {
      "epoch": 2.77,
      "learning_rate": 2.898538969198117e-07,
      "loss": 0.2305,
      "step": 1516
    },
    {
      "epoch": 2.77,
      "learning_rate": 2.85144287739727e-07,
      "loss": 0.2188,
      "step": 1517
    },
    {
      "epoch": 2.78,
      "learning_rate": 2.8047270056005937e-07,
      "loss": 0.2247,
      "step": 1518
    },
    {
      "epoch": 2.78,
      "learning_rate": 2.7583915366446554e-07,
      "loss": 0.2252,
      "step": 1519
    },
    {
      "epoch": 2.78,
      "learning_rate": 2.712436651877237e-07,
      "loss": 0.2302,
      "step": 1520
    },
    {
      "epoch": 2.78,
      "learning_rate": 2.666862531156533e-07,
      "loss": 0.2167,
      "step": 1521
    },
    {
      "epoch": 2.78,
      "learning_rate": 2.6216693528505197e-07,
      "loss": 0.229,
      "step": 1522
    },
    {
      "epoch": 2.78,
      "learning_rate": 2.5768572938362767e-07,
      "loss": 0.2085,
      "step": 1523
    },
    {
      "epoch": 2.79,
      "learning_rate": 2.5324265294992103e-07,
      "loss": 0.2208,
      "step": 1524
    },
    {
      "epoch": 2.79,
      "learning_rate": 2.488377233732431e-07,
      "loss": 0.219,
      "step": 1525
    },
    {
      "epoch": 2.79,
      "learning_rate": 2.4447095789360885e-07,
      "loss": 0.2299,
      "step": 1526
    },
    {
      "epoch": 2.79,
      "learning_rate": 2.401423736016628e-07,
      "loss": 0.2214,
      "step": 1527
    },
    {
      "epoch": 2.79,
      "learning_rate": 2.3585198743861782e-07,
      "loss": 0.2281,
      "step": 1528
    },
    {
      "epoch": 2.8,
      "learning_rate": 2.3159981619618633e-07,
      "loss": 0.2118,
      "step": 1529
    },
    {
      "epoch": 2.8,
      "learning_rate": 2.273858765165149e-07,
      "loss": 0.2244,
      "step": 1530
    },
    {
      "epoch": 2.8,
      "learning_rate": 2.2321018489211977e-07,
      "loss": 0.2415,
      "step": 1531
    },
    {
      "epoch": 2.8,
      "learning_rate": 2.190727576658225e-07,
      "loss": 0.2179,
      "step": 1532
    },
    {
      "epoch": 2.8,
      "learning_rate": 2.1497361103068548e-07,
      "loss": 0.2175,
      "step": 1533
    },
    {
      "epoch": 2.81,
      "learning_rate": 2.1091276102994662e-07,
      "loss": 0.2237,
      "step": 1534
    },
    {
      "epoch": 2.81,
      "learning_rate": 2.068902235569603e-07,
      "loss": 0.2146,
      "step": 1535
    },
    {
      "epoch": 2.81,
      "learning_rate": 2.0290601435513423e-07,
      "loss": 0.2036,
      "step": 1536
    },
    {
      "epoch": 2.81,
      "learning_rate": 1.9896014901786386e-07,
      "loss": 0.2328,
      "step": 1537
    },
    {
      "epoch": 2.81,
      "learning_rate": 1.9505264298847693e-07,
      "loss": 0.2265,
      "step": 1538
    },
    {
      "epoch": 2.81,
      "learning_rate": 1.9118351156017124e-07,
      "loss": 0.2165,
      "step": 1539
    },
    {
      "epoch": 2.82,
      "learning_rate": 1.8735276987595031e-07,
      "loss": 0.2332,
      "step": 1540
    },
    {
      "epoch": 2.82,
      "learning_rate": 1.8356043292857008e-07,
      "loss": 0.2325,
      "step": 1541
    },
    {
      "epoch": 2.82,
      "learning_rate": 1.7980651556048e-07,
      "loss": 0.24,
      "step": 1542
    },
    {
      "epoch": 2.82,
      "learning_rate": 1.760910324637599e-07,
      "loss": 0.2246,
      "step": 1543
    },
    {
      "epoch": 2.82,
      "learning_rate": 1.7241399818006765e-07,
      "loss": 0.2169,
      "step": 1544
    },
    {
      "epoch": 2.83,
      "learning_rate": 1.6877542710057814e-07,
      "loss": 0.2167,
      "step": 1545
    },
    {
      "epoch": 2.83,
      "learning_rate": 1.651753334659323e-07,
      "loss": 0.2103,
      "step": 1546
    },
    {
      "epoch": 2.83,
      "learning_rate": 1.6161373136617365e-07,
      "loss": 0.224,
      "step": 1547
    },
    {
      "epoch": 2.83,
      "learning_rate": 1.5809063474070075e-07,
      "loss": 0.2212,
      "step": 1548
    },
    {
      "epoch": 2.83,
      "learning_rate": 1.5460605737820823e-07,
      "loss": 0.2103,
      "step": 1549
    },
    {
      "epoch": 2.83,
      "learning_rate": 1.5116001291663463e-07,
      "loss": 0.2193,
      "step": 1550
    },
    {
      "epoch": 2.84,
      "learning_rate": 1.4775251484310803e-07,
      "loss": 0.2057,
      "step": 1551
    },
    {
      "epoch": 2.84,
      "learning_rate": 1.443835764938939e-07,
      "loss": 0.202,
      "step": 1552
    },
    {
      "epoch": 2.84,
      "learning_rate": 1.4105321105434056e-07,
      "loss": 0.2245,
      "step": 1553
    },
    {
      "epoch": 2.84,
      "learning_rate": 1.3776143155883492e-07,
      "loss": 0.2147,
      "step": 1554
    },
    {
      "epoch": 2.84,
      "learning_rate": 1.3450825089074137e-07,
      "loss": 0.2134,
      "step": 1555
    },
    {
      "epoch": 2.85,
      "learning_rate": 1.3129368178235845e-07,
      "loss": 0.2199,
      "step": 1556
    },
    {
      "epoch": 2.85,
      "learning_rate": 1.281177368148645e-07,
      "loss": 0.2249,
      "step": 1557
    },
    {
      "epoch": 2.85,
      "learning_rate": 1.2498042841827317e-07,
      "loss": 0.2215,
      "step": 1558
    },
    {
      "epoch": 2.85,
      "learning_rate": 1.2188176887138137e-07,
      "loss": 0.2268,
      "step": 1559
    },
    {
      "epoch": 2.85,
      "learning_rate": 1.1882177030172248e-07,
      "loss": 0.225,
      "step": 1560
    },
    {
      "epoch": 2.85,
      "learning_rate": 1.1580044468551876e-07,
      "loss": 0.231,
      "step": 1561
    },
    {
      "epoch": 2.86,
      "learning_rate": 1.128178038476324e-07,
      "loss": 0.2231,
      "step": 1562
    },
    {
      "epoch": 2.86,
      "learning_rate": 1.0987385946152451e-07,
      "loss": 0.2284,
      "step": 1563
    },
    {
      "epoch": 2.86,
      "learning_rate": 1.0696862304920175e-07,
      "loss": 0.229,
      "step": 1564
    },
    {
      "epoch": 2.86,
      "learning_rate": 1.0410210598118086e-07,
      "loss": 0.2214,
      "step": 1565
    },
    {
      "epoch": 2.86,
      "learning_rate": 1.0127431947643318e-07,
      "loss": 0.2261,
      "step": 1566
    },
    {
      "epoch": 2.87,
      "learning_rate": 9.848527460234902e-08,
      "loss": 0.2347,
      "step": 1567
    },
    {
      "epoch": 2.87,
      "learning_rate": 9.573498227469336e-08,
      "loss": 0.2339,
      "step": 1568
    },
    {
      "epoch": 2.87,
      "learning_rate": 9.302345325755801e-08,
      "loss": 0.2262,
      "step": 1569
    },
    {
      "epoch": 2.87,
      "learning_rate": 9.03506981633262e-08,
      "loss": 0.2076,
      "step": 1570
    },
    {
      "epoch": 2.87,
      "learning_rate": 8.771672745262583e-08,
      "loss": 0.2345,
      "step": 1571
    },
    {
      "epoch": 2.87,
      "learning_rate": 8.512155143429068e-08,
      "loss": 0.2214,
      "step": 1572
    },
    {
      "epoch": 2.88,
      "learning_rate": 8.256518026532046e-08,
      "loss": 0.218,
      "step": 1573
    },
    {
      "epoch": 2.88,
      "learning_rate": 8.004762395083965e-08,
      "loss": 0.2267,
      "step": 1574
    },
    {
      "epoch": 2.88,
      "learning_rate": 7.756889234405984e-08,
      "loss": 0.2094,
      "step": 1575
    },
    {
      "epoch": 2.88,
      "learning_rate": 7.512899514624084e-08,
      "loss": 0.2129,
      "step": 1576
    },
    {
      "epoch": 2.88,
      "learning_rate": 7.272794190665178e-08,
      "loss": 0.2206,
      "step": 1577
    },
    {
      "epoch": 2.89,
      "learning_rate": 7.036574202253343e-08,
      "loss": 0.2357,
      "step": 1578
    },
    {
      "epoch": 2.89,
      "learning_rate": 6.804240473906486e-08,
      "loss": 0.2163,
      "step": 1579
    },
    {
      "epoch": 2.89,
      "learning_rate": 6.575793914932127e-08,
      "loss": 0.2225,
      "step": 1580
    },
    {
      "epoch": 2.89,
      "learning_rate": 6.351235419424506e-08,
      "loss": 0.2033,
      "step": 1581
    },
    {
      "epoch": 2.89,
      "learning_rate": 6.130565866260485e-08,
      "loss": 0.2257,
      "step": 1582
    },
    {
      "epoch": 2.89,
      "learning_rate": 5.913786119096654e-08,
      "loss": 0.2178,
      "step": 1583
    },
    {
      "epoch": 2.9,
      "learning_rate": 5.7008970263654485e-08,
      "loss": 0.2265,
      "step": 1584
    },
    {
      "epoch": 2.9,
      "learning_rate": 5.491899421272262e-08,
      "loss": 0.2275,
      "step": 1585
    },
    {
      "epoch": 2.9,
      "learning_rate": 5.2867941217917826e-08,
      "loss": 0.2143,
      "step": 1586
    },
    {
      "epoch": 2.9,
      "learning_rate": 5.085581930665107e-08,
      "loss": 0.2268,
      "step": 1587
    },
    {
      "epoch": 2.9,
      "learning_rate": 4.8882636353965174e-08,
      "loss": 0.2317,
      "step": 1588
    },
    {
      "epoch": 2.91,
      "learning_rate": 4.6948400082502676e-08,
      "loss": 0.223,
      "step": 1589
    },
    {
      "epoch": 2.91,
      "learning_rate": 4.505311806247803e-08,
      "loss": 0.2234,
      "step": 1590
    },
    {
      "epoch": 2.91,
      "learning_rate": 4.319679771164431e-08,
      "loss": 0.2162,
      "step": 1591
    },
    {
      "epoch": 2.91,
      "learning_rate": 4.1379446295268796e-08,
      "loss": 0.2307,
      "step": 1592
    },
    {
      "epoch": 2.91,
      "learning_rate": 3.960107092609966e-08,
      "loss": 0.2119,
      "step": 1593
    },
    {
      "epoch": 2.91,
      "learning_rate": 3.786167856434375e-08,
      "loss": 0.2124,
      "step": 1594
    },
    {
      "epoch": 2.92,
      "learning_rate": 3.6161276017634416e-08,
      "loss": 0.2148,
      "step": 1595
    },
    {
      "epoch": 2.92,
      "learning_rate": 3.449986994100596e-08,
      "loss": 0.2364,
      "step": 1596
    },
    {
      "epoch": 2.92,
      "learning_rate": 3.287746683687032e-08,
      "loss": 0.2057,
      "step": 1597
    },
    {
      "epoch": 2.92,
      "learning_rate": 3.1294073054987105e-08,
      "loss": 0.2212,
      "step": 1598
    },
    {
      "epoch": 2.92,
      "learning_rate": 2.9749694792442498e-08,
      "loss": 0.2166,
      "step": 1599
    },
    {
      "epoch": 2.93,
      "learning_rate": 2.8244338093625923e-08,
      "loss": 0.2194,
      "step": 1600
    },
    {
      "epoch": 2.93,
      "learning_rate": 2.67780088502001e-08,
      "loss": 0.2327,
      "step": 1601
    },
    {
      "epoch": 2.93,
      "learning_rate": 2.5350712801084365e-08,
      "loss": 0.2194,
      "step": 1602
    },
    {
      "epoch": 2.93,
      "learning_rate": 2.3962455532430262e-08,
      "loss": 0.2273,
      "step": 1603
    },
    {
      "epoch": 2.93,
      "learning_rate": 2.261324247759933e-08,
      "loss": 0.2118,
      "step": 1604
    },
    {
      "epoch": 2.93,
      "learning_rate": 2.1303078917139787e-08,
      "loss": 0.2252,
      "step": 1605
    },
    {
      "epoch": 2.94,
      "learning_rate": 2.0031969978770993e-08,
      "loss": 0.2274,
      "step": 1606
    },
    {
      "epoch": 2.94,
      "learning_rate": 1.8799920637359027e-08,
      "loss": 0.2256,
      "step": 1607
    },
    {
      "epoch": 2.94,
      "learning_rate": 1.760693571489669e-08,
      "loss": 0.2135,
      "step": 1608
    },
    {
      "epoch": 2.94,
      "learning_rate": 1.6453019880490194e-08,
      "loss": 0.2098,
      "step": 1609
    },
    {
      "epoch": 2.94,
      "learning_rate": 1.533817765033252e-08,
      "loss": 0.2179,
      "step": 1610
    },
    {
      "epoch": 2.95,
      "learning_rate": 1.4262413387695629e-08,
      "loss": 0.2273,
      "step": 1611
    },
    {
      "epoch": 2.95,
      "learning_rate": 1.3225731302903833e-08,
      "loss": 0.2448,
      "step": 1612
    },
    {
      "epoch": 2.95,
      "learning_rate": 1.2228135453324907e-08,
      "loss": 0.2242,
      "step": 1613
    },
    {
      "epoch": 2.95,
      "learning_rate": 1.1269629743346777e-08,
      "loss": 0.2096,
      "step": 1614
    },
    {
      "epoch": 2.95,
      "learning_rate": 1.0350217924370853e-08,
      "loss": 0.2198,
      "step": 1615
    },
    {
      "epoch": 2.95,
      "learning_rate": 9.469903594790941e-09,
      "loss": 0.2105,
      "step": 1616
    },
    {
      "epoch": 2.96,
      "learning_rate": 8.628690199979917e-09,
      "loss": 0.2255,
      "step": 1617
    },
    {
      "epoch": 2.96,
      "learning_rate": 7.826581032279734e-09,
      "loss": 0.2211,
      "step": 1618
    },
    {
      "epoch": 2.96,
      "learning_rate": 7.063579230983664e-09,
      "loss": 0.2208,
      "step": 1619
    },
    {
      "epoch": 2.96,
      "learning_rate": 6.3396877823296286e-09,
      "loss": 0.2099,
      "step": 1620
    },
    {
      "epoch": 2.96,
      "learning_rate": 5.654909519483554e-09,
      "loss": 0.2167,
      "step": 1621
    },
    {
      "epoch": 2.97,
      "learning_rate": 5.0092471225293705e-09,
      "loss": 0.2323,
      "step": 1622
    },
    {
      "epoch": 2.97,
      "learning_rate": 4.402703118461249e-09,
      "loss": 0.225,
      "step": 1623
    },
    {
      "epoch": 2.97,
      "learning_rate": 3.835279881171383e-09,
      "loss": 0.2259,
      "step": 1624
    },
    {
      "epoch": 2.97,
      "learning_rate": 3.3069796314399993e-09,
      "loss": 0.2149,
      "step": 1625
    },
    {
      "epoch": 2.97,
      "learning_rate": 2.8178044369286948e-09,
      "loss": 0.2228,
      "step": 1626
    },
    {
      "epoch": 2.98,
      "learning_rate": 2.367756212171557e-09,
      "loss": 0.2286,
      "step": 1627
    },
    {
      "epoch": 2.98,
      "learning_rate": 1.9568367185673897e-09,
      "loss": 0.2264,
      "step": 1628
    },
    {
      "epoch": 2.98,
      "learning_rate": 1.5850475643719443e-09,
      "loss": 0.2208,
      "step": 1629
    },
    {
      "epoch": 2.98,
      "learning_rate": 1.2523902046934767e-09,
      "loss": 0.2182,
      "step": 1630
    },
    {
      "epoch": 2.98,
      "learning_rate": 9.58865941487197e-10,
      "loss": 0.2207,
      "step": 1631
    },
    {
      "epoch": 2.98,
      "learning_rate": 7.044759235486087e-10,
      "loss": 0.2339,
      "step": 1632
    },
    {
      "epoch": 2.99,
      "learning_rate": 4.892211465079566e-10,
      "loss": 0.2241,
      "step": 1633
    },
    {
      "epoch": 2.99,
      "learning_rate": 3.1310245283022735e-10,
      "loss": 0.2129,
      "step": 1634
    },
    {
      "epoch": 2.99,
      "learning_rate": 1.761205318095982e-10,
      "loss": 0.1959,
      "step": 1635
    },
    {
      "epoch": 2.99,
      "learning_rate": 7.827591956388603e-11,
      "loss": 0.2139,
      "step": 1636
    },
    {
      "epoch": 2.99,
      "learning_rate": 1.956899903787779e-11,
      "loss": 0.2199,
      "step": 1637
    },
    {
      "epoch": 3.0,
      "learning_rate": 0.0,
      "loss": 0.22,
      "step": 1638
    },
    {
      "epoch": 3.0,
      "step": 1638,
      "total_flos": 4.3401313435024097e+18,
      "train_loss": 0.3987319528856411,
      "train_runtime": 82484.0165,
      "train_samples_per_second": 2.546,
      "train_steps_per_second": 0.02
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 1638,
  "num_train_epochs": 3,
  "save_steps": 2000,
  "total_flos": 4.3401313435024097e+18,
  "trial_name": null,
  "trial_params": null
}