diff --git "a/trainer_state.json" "b/trainer_state.json" deleted file mode 100644--- "a/trainer_state.json" +++ /dev/null @@ -1,29308 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 1.0, - "eval_steps": 500, - "global_step": 4880, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 0.0, - "loss": 3.5933, - "step": 1 - }, - { - "epoch": 0.0, - "learning_rate": 2.777904193701827e-06, - "loss": 2.2437, - "step": 2 - }, - { - "epoch": 0.0, - "learning_rate": 4.4028739776134346e-06, - "loss": 2.2515, - "step": 3 - }, - { - "epoch": 0.0, - "learning_rate": 5.555808387403654e-06, - "loss": 1.783, - "step": 4 - }, - { - "epoch": 0.0, - "learning_rate": 6.4500937922616966e-06, - "loss": 1.2938, - "step": 5 - }, - { - "epoch": 0.0, - "learning_rate": 7.180778171315261e-06, - "loss": 0.9161, - "step": 6 - }, - { - "epoch": 0.0, - "learning_rate": 7.798563011193284e-06, - "loss": 1.0127, - "step": 7 - }, - { - "epoch": 0.0, - "learning_rate": 8.33371258110548e-06, - "loss": 0.8876, - "step": 8 - }, - { - "epoch": 0.0, - "learning_rate": 8.805747955226869e-06, - "loss": 0.9679, - "step": 9 - }, - { - "epoch": 0.0, - "learning_rate": 9.227997985963526e-06, - "loss": 0.9053, - "step": 10 - }, - { - "epoch": 0.0, - "learning_rate": 9.609969601237248e-06, - "loss": 0.8196, - "step": 11 - }, - { - "epoch": 0.0, - "learning_rate": 9.958682365017088e-06, - "loss": 0.8252, - "step": 12 - }, - { - "epoch": 0.0, - "learning_rate": 1.0279467011564946e-05, - "loss": 0.7619, - "step": 13 - }, - { - "epoch": 0.0, - "learning_rate": 1.057646720489511e-05, - "loss": 0.7925, - "step": 14 - }, - { - "epoch": 0.0, - "learning_rate": 1.0852967769875131e-05, - "loss": 0.8032, - "step": 15 - }, - { - "epoch": 0.0, - "learning_rate": 1.1111616774807308e-05, - "loss": 0.7657, - "step": 16 - }, - { - "epoch": 0.0, - "learning_rate": 1.1354580168309704e-05, - "loss": 0.8107, - "step": 17 - }, - { - "epoch": 0.0, - "learning_rate": 1.1583652148928695e-05, - "loss": 0.7799, - "step": 18 - }, - { - "epoch": 0.0, - "learning_rate": 1.1800335654136308e-05, - "loss": 0.8401, - "step": 19 - }, - { - "epoch": 0.0, - "learning_rate": 1.200590217966535e-05, - "loss": 0.8119, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 1.2201436988806718e-05, - "loss": 0.7124, - "step": 21 - }, - { - "epoch": 0.0, - "learning_rate": 1.2387873794939074e-05, - "loss": 0.7553, - "step": 22 - }, - { - "epoch": 0.0, - "learning_rate": 1.2566021728200815e-05, - "loss": 0.7419, - "step": 23 - }, - { - "epoch": 0.0, - "learning_rate": 1.2736586558718915e-05, - "loss": 0.8081, - "step": 24 - }, - { - "epoch": 0.01, - "learning_rate": 1.2900187584523393e-05, - "loss": 0.8098, - "step": 25 - }, - { - "epoch": 0.01, - "learning_rate": 1.3057371205266773e-05, - "loss": 0.8393, - "step": 26 - }, - { - "epoch": 0.01, - "learning_rate": 1.3208621932840304e-05, - "loss": 0.7638, - "step": 27 - }, - { - "epoch": 0.01, - "learning_rate": 1.3354371398596937e-05, - "loss": 0.7409, - "step": 28 - }, - { - "epoch": 0.01, - "learning_rate": 1.3495005779288657e-05, - "loss": 0.7461, - "step": 29 - }, - { - "epoch": 0.01, - "learning_rate": 1.3630871963576958e-05, - "loss": 0.7704, - "step": 30 - }, - { - "epoch": 0.01, - "learning_rate": 1.3762282707045817e-05, - "loss": 0.6995, - "step": 31 - }, - { - "epoch": 0.01, - "learning_rate": 1.3889520968509133e-05, - "loss": 0.6895, - "step": 32 - }, - { - "epoch": 0.01, - "learning_rate": 1.4012843578850681e-05, - "loss": 0.7926, - "step": 33 - }, - { - "epoch": 0.01, - "learning_rate": 1.413248436201153e-05, - "loss": 0.793, - "step": 34 - }, - { - "epoch": 0.01, - "learning_rate": 1.4248656803454982e-05, - "loss": 0.6742, - "step": 35 - }, - { - "epoch": 0.01, - "learning_rate": 1.4361556342630522e-05, - "loss": 0.6313, - "step": 36 - }, - { - "epoch": 0.01, - "learning_rate": 1.4471362351274756e-05, - "loss": 0.7821, - "step": 37 - }, - { - "epoch": 0.01, - "learning_rate": 1.4578239847838136e-05, - "loss": 0.763, - "step": 38 - }, - { - "epoch": 0.01, - "learning_rate": 1.468234098917838e-05, - "loss": 0.7297, - "step": 39 - }, - { - "epoch": 0.01, - "learning_rate": 1.4783806373367179e-05, - "loss": 0.7348, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 1.4882766181604204e-05, - "loss": 0.6909, - "step": 41 - }, - { - "epoch": 0.01, - "learning_rate": 1.4979341182508544e-05, - "loss": 0.7491, - "step": 42 - }, - { - "epoch": 0.01, - "learning_rate": 1.5073643618223373e-05, - "loss": 0.7822, - "step": 43 - }, - { - "epoch": 0.01, - "learning_rate": 1.51657779886409e-05, - "loss": 0.7643, - "step": 44 - }, - { - "epoch": 0.01, - "learning_rate": 1.5255841747488564e-05, - "loss": 0.7291, - "step": 45 - }, - { - "epoch": 0.01, - "learning_rate": 1.5343925921902642e-05, - "loss": 0.6382, - "step": 46 - }, - { - "epoch": 0.01, - "learning_rate": 1.5430115665364723e-05, - "loss": 0.7925, - "step": 47 - }, - { - "epoch": 0.01, - "learning_rate": 1.5514490752420744e-05, - "loss": 0.8972, - "step": 48 - }, - { - "epoch": 0.01, - "learning_rate": 1.5597126022386567e-05, - "loss": 0.749, - "step": 49 - }, - { - "epoch": 0.01, - "learning_rate": 1.5678091778225222e-05, - "loss": 0.8301, - "step": 50 - }, - { - "epoch": 0.01, - "learning_rate": 1.5757454145923137e-05, - "loss": 0.7883, - "step": 51 - }, - { - "epoch": 0.01, - "learning_rate": 1.58352753989686e-05, - "loss": 0.7751, - "step": 52 - }, - { - "epoch": 0.01, - "learning_rate": 1.5911614251921585e-05, - "loss": 0.7092, - "step": 53 - }, - { - "epoch": 0.01, - "learning_rate": 1.598652612654213e-05, - "loss": 0.6999, - "step": 54 - }, - { - "epoch": 0.01, - "learning_rate": 1.6060063393498945e-05, - "loss": 0.7918, - "step": 55 - }, - { - "epoch": 0.01, - "learning_rate": 1.6132275592298766e-05, - "loss": 0.7217, - "step": 56 - }, - { - "epoch": 0.01, - "learning_rate": 1.6203209631749744e-05, - "loss": 0.7239, - "step": 57 - }, - { - "epoch": 0.01, - "learning_rate": 1.6272909972990483e-05, - "loss": 0.7712, - "step": 58 - }, - { - "epoch": 0.01, - "learning_rate": 1.6341418796873162e-05, - "loss": 0.6962, - "step": 59 - }, - { - "epoch": 0.01, - "learning_rate": 1.6408776157278784e-05, - "loss": 0.8628, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 1.6475020121759948e-05, - "loss": 0.728, - "step": 61 - }, - { - "epoch": 0.01, - "learning_rate": 1.6540186900747646e-05, - "loss": 0.7969, - "step": 62 - }, - { - "epoch": 0.01, - "learning_rate": 1.660431096642015e-05, - "loss": 0.656, - "step": 63 - }, - { - "epoch": 0.01, - "learning_rate": 1.666742516221096e-05, - "loss": 0.625, - "step": 64 - }, - { - "epoch": 0.01, - "learning_rate": 1.672956080382664e-05, - "loss": 0.7408, - "step": 65 - }, - { - "epoch": 0.01, - "learning_rate": 1.6790747772552507e-05, - "loss": 0.7619, - "step": 66 - }, - { - "epoch": 0.01, - "learning_rate": 1.6851014601541966e-05, - "loss": 0.7388, - "step": 67 - }, - { - "epoch": 0.01, - "learning_rate": 1.6910388555713357e-05, - "loss": 0.6945, - "step": 68 - }, - { - "epoch": 0.01, - "learning_rate": 1.696889570581425e-05, - "loss": 0.8354, - "step": 69 - }, - { - "epoch": 0.01, - "learning_rate": 1.702656099715681e-05, - "loss": 0.7311, - "step": 70 - }, - { - "epoch": 0.01, - "learning_rate": 1.7083408313477785e-05, - "loss": 0.6938, - "step": 71 - }, - { - "epoch": 0.01, - "learning_rate": 1.713946053633235e-05, - "loss": 0.7499, - "step": 72 - }, - { - "epoch": 0.01, - "learning_rate": 1.719473960039136e-05, - "loss": 0.7368, - "step": 73 - }, - { - "epoch": 0.02, - "learning_rate": 1.7249266544976585e-05, - "loss": 0.7388, - "step": 74 - }, - { - "epoch": 0.02, - "learning_rate": 1.7303061562136828e-05, - "loss": 0.7117, - "step": 75 - }, - { - "epoch": 0.02, - "learning_rate": 1.7356144041539965e-05, - "loss": 0.6664, - "step": 76 - }, - { - "epoch": 0.02, - "learning_rate": 1.7408532612430532e-05, - "loss": 0.8124, - "step": 77 - }, - { - "epoch": 0.02, - "learning_rate": 1.7460245182880206e-05, - "loss": 0.5355, - "step": 78 - }, - { - "epoch": 0.02, - "learning_rate": 1.7511298976538014e-05, - "loss": 0.779, - "step": 79 - }, - { - "epoch": 0.02, - "learning_rate": 1.7561710567069004e-05, - "loss": 0.8136, - "step": 80 - }, - { - "epoch": 0.02, - "learning_rate": 1.7611495910453738e-05, - "loss": 0.6966, - "step": 81 - }, - { - "epoch": 0.02, - "learning_rate": 1.766067037530603e-05, - "loss": 0.7834, - "step": 82 - }, - { - "epoch": 0.02, - "learning_rate": 1.7709248771353133e-05, - "loss": 0.6937, - "step": 83 - }, - { - "epoch": 0.02, - "learning_rate": 1.775724537621037e-05, - "loss": 0.7236, - "step": 84 - }, - { - "epoch": 0.02, - "learning_rate": 1.78046739605714e-05, - "loss": 0.8308, - "step": 85 - }, - { - "epoch": 0.02, - "learning_rate": 1.78515478119252e-05, - "loss": 0.8143, - "step": 86 - }, - { - "epoch": 0.02, - "learning_rate": 1.789787975690209e-05, - "loss": 0.7556, - "step": 87 - }, - { - "epoch": 0.02, - "learning_rate": 1.794368218234273e-05, - "loss": 0.7089, - "step": 88 - }, - { - "epoch": 0.02, - "learning_rate": 1.7988967055176675e-05, - "loss": 0.6929, - "step": 89 - }, - { - "epoch": 0.02, - "learning_rate": 1.8033745941190393e-05, - "loss": 0.7764, - "step": 90 - }, - { - "epoch": 0.02, - "learning_rate": 1.8078030022758228e-05, - "loss": 0.7275, - "step": 91 - }, - { - "epoch": 0.02, - "learning_rate": 1.8121830115604468e-05, - "loss": 0.717, - "step": 92 - }, - { - "epoch": 0.02, - "learning_rate": 1.8165156684659252e-05, - "loss": 0.8622, - "step": 93 - }, - { - "epoch": 0.02, - "learning_rate": 1.820801985906655e-05, - "loss": 0.7165, - "step": 94 - }, - { - "epoch": 0.02, - "learning_rate": 1.8250429446398005e-05, - "loss": 0.7525, - "step": 95 - }, - { - "epoch": 0.02, - "learning_rate": 1.829239494612257e-05, - "loss": 0.8292, - "step": 96 - }, - { - "epoch": 0.02, - "learning_rate": 1.8333925562378166e-05, - "loss": 0.7814, - "step": 97 - }, - { - "epoch": 0.02, - "learning_rate": 1.8375030216088396e-05, - "loss": 0.8116, - "step": 98 - }, - { - "epoch": 0.02, - "learning_rate": 1.8415717556464116e-05, - "loss": 0.7939, - "step": 99 - }, - { - "epoch": 0.02, - "learning_rate": 1.845599597192705e-05, - "loss": 0.8212, - "step": 100 - }, - { - "epoch": 0.02, - "learning_rate": 1.8495873600489872e-05, - "loss": 0.8217, - "step": 101 - }, - { - "epoch": 0.02, - "learning_rate": 1.8535358339624962e-05, - "loss": 0.5936, - "step": 102 - }, - { - "epoch": 0.02, - "learning_rate": 1.857445785565174e-05, - "loss": 0.757, - "step": 103 - }, - { - "epoch": 0.02, - "learning_rate": 1.8613179592670426e-05, - "loss": 0.755, - "step": 104 - }, - { - "epoch": 0.02, - "learning_rate": 1.8651530781068415e-05, - "loss": 0.7283, - "step": 105 - }, - { - "epoch": 0.02, - "learning_rate": 1.868951844562341e-05, - "loss": 0.7322, - "step": 106 - }, - { - "epoch": 0.02, - "learning_rate": 1.872714941322616e-05, - "loss": 0.8023, - "step": 107 - }, - { - "epoch": 0.02, - "learning_rate": 1.876443032024396e-05, - "loss": 0.7104, - "step": 108 - }, - { - "epoch": 0.02, - "learning_rate": 1.880136761954479e-05, - "loss": 0.7041, - "step": 109 - }, - { - "epoch": 0.02, - "learning_rate": 1.883796758720077e-05, - "loss": 0.8201, - "step": 110 - }, - { - "epoch": 0.02, - "learning_rate": 1.887423632888819e-05, - "loss": 0.6919, - "step": 111 - }, - { - "epoch": 0.02, - "learning_rate": 1.8910179786000588e-05, - "loss": 0.705, - "step": 112 - }, - { - "epoch": 0.02, - "learning_rate": 1.8945803741490124e-05, - "loss": 0.762, - "step": 113 - }, - { - "epoch": 0.02, - "learning_rate": 1.898111382545157e-05, - "loss": 0.7917, - "step": 114 - }, - { - "epoch": 0.02, - "learning_rate": 1.901611552046251e-05, - "loss": 0.7186, - "step": 115 - }, - { - "epoch": 0.02, - "learning_rate": 1.905081416669231e-05, - "loss": 0.7368, - "step": 116 - }, - { - "epoch": 0.02, - "learning_rate": 1.9085214966791815e-05, - "loss": 0.7207, - "step": 117 - }, - { - "epoch": 0.02, - "learning_rate": 1.9119322990574988e-05, - "loss": 0.7109, - "step": 118 - }, - { - "epoch": 0.02, - "learning_rate": 1.9153143179502987e-05, - "loss": 0.6874, - "step": 119 - }, - { - "epoch": 0.02, - "learning_rate": 1.918668035098061e-05, - "loss": 0.6352, - "step": 120 - }, - { - "epoch": 0.02, - "learning_rate": 1.9219939202474497e-05, - "loss": 0.7289, - "step": 121 - }, - { - "epoch": 0.03, - "learning_rate": 1.9252924315461777e-05, - "loss": 0.7384, - "step": 122 - }, - { - "epoch": 0.03, - "learning_rate": 1.9285640159217637e-05, - "loss": 0.6951, - "step": 123 - }, - { - "epoch": 0.03, - "learning_rate": 1.9318091094449472e-05, - "loss": 0.7997, - "step": 124 - }, - { - "epoch": 0.03, - "learning_rate": 1.9350281376785095e-05, - "loss": 0.7681, - "step": 125 - }, - { - "epoch": 0.03, - "learning_rate": 1.938221516012198e-05, - "loss": 0.6825, - "step": 126 - }, - { - "epoch": 0.03, - "learning_rate": 1.9413896499844137e-05, - "loss": 0.7522, - "step": 127 - }, - { - "epoch": 0.03, - "learning_rate": 1.9445329355912786e-05, - "loss": 0.7496, - "step": 128 - }, - { - "epoch": 0.03, - "learning_rate": 1.9476517595836804e-05, - "loss": 0.8061, - "step": 129 - }, - { - "epoch": 0.03, - "learning_rate": 1.950746499752847e-05, - "loss": 0.6837, - "step": 130 - }, - { - "epoch": 0.03, - "learning_rate": 1.953817525204977e-05, - "loss": 0.7653, - "step": 131 - }, - { - "epoch": 0.03, - "learning_rate": 1.9568651966254333e-05, - "loss": 0.7, - "step": 132 - }, - { - "epoch": 0.03, - "learning_rate": 1.9598898665329592e-05, - "loss": 0.7534, - "step": 133 - }, - { - "epoch": 0.03, - "learning_rate": 1.962891879524379e-05, - "loss": 0.6868, - "step": 134 - }, - { - "epoch": 0.03, - "learning_rate": 1.9658715725102002e-05, - "loss": 0.7463, - "step": 135 - }, - { - "epoch": 0.03, - "learning_rate": 1.9688292749415182e-05, - "loss": 0.7433, - "step": 136 - }, - { - "epoch": 0.03, - "learning_rate": 1.971765309028616e-05, - "loss": 0.7416, - "step": 137 - }, - { - "epoch": 0.03, - "learning_rate": 1.9746799899516077e-05, - "loss": 0.6743, - "step": 138 - }, - { - "epoch": 0.03, - "learning_rate": 1.9775736260634812e-05, - "loss": 0.7101, - "step": 139 - }, - { - "epoch": 0.03, - "learning_rate": 1.980446519085863e-05, - "loss": 0.6958, - "step": 140 - }, - { - "epoch": 0.03, - "learning_rate": 1.9832989642978158e-05, - "loss": 0.8082, - "step": 141 - }, - { - "epoch": 0.03, - "learning_rate": 1.9861312507179614e-05, - "loss": 0.6331, - "step": 142 - }, - { - "epoch": 0.03, - "learning_rate": 1.9889436612802193e-05, - "loss": 0.6387, - "step": 143 - }, - { - "epoch": 0.03, - "learning_rate": 1.9917364730034175e-05, - "loss": 0.7494, - "step": 144 - }, - { - "epoch": 0.03, - "learning_rate": 1.9945099571550352e-05, - "loss": 0.7863, - "step": 145 - }, - { - "epoch": 0.03, - "learning_rate": 1.9972643794093188e-05, - "loss": 0.7496, - "step": 146 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.6587, - "step": 147 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.6725, - "step": 148 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.6336, - "step": 149 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.664, - "step": 150 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.7358, - "step": 151 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.6943, - "step": 152 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.8279, - "step": 153 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.7096, - "step": 154 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.7393, - "step": 155 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.6511, - "step": 156 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.7672, - "step": 157 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.7328, - "step": 158 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.7681, - "step": 159 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.702, - "step": 160 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.7541, - "step": 161 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.7012, - "step": 162 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.6909, - "step": 163 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.7527, - "step": 164 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.6535, - "step": 165 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.795, - "step": 166 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.7008, - "step": 167 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.7038, - "step": 168 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.6115, - "step": 169 - }, - { - "epoch": 0.03, - "learning_rate": 2e-05, - "loss": 0.7737, - "step": 170 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7338, - "step": 171 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.5894, - "step": 172 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7355, - "step": 173 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7827, - "step": 174 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.693, - "step": 175 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7025, - "step": 176 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7198, - "step": 177 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7633, - "step": 178 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7382, - "step": 179 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7321, - "step": 180 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.716, - "step": 181 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.6415, - "step": 182 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7029, - "step": 183 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.6033, - "step": 184 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.6357, - "step": 185 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7661, - "step": 186 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7686, - "step": 187 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.776, - "step": 188 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7607, - "step": 189 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.6341, - "step": 190 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7233, - "step": 191 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7386, - "step": 192 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.6846, - "step": 193 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7549, - "step": 194 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7705, - "step": 195 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7053, - "step": 196 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7366, - "step": 197 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.6767, - "step": 198 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.6908, - "step": 199 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.808, - "step": 200 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7585, - "step": 201 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.6659, - "step": 202 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.769, - "step": 203 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.6418, - "step": 204 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.6602, - "step": 205 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.6665, - "step": 206 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.723, - "step": 207 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7086, - "step": 208 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.6996, - "step": 209 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7188, - "step": 210 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.6405, - "step": 211 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7082, - "step": 212 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7082, - "step": 213 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.8405, - "step": 214 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.696, - "step": 215 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.754, - "step": 216 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7583, - "step": 217 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.8066, - "step": 218 - }, - { - "epoch": 0.04, - "learning_rate": 2e-05, - "loss": 0.7413, - "step": 219 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7483, - "step": 220 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6971, - "step": 221 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7321, - "step": 222 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6654, - "step": 223 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6638, - "step": 224 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7429, - "step": 225 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6914, - "step": 226 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.628, - "step": 227 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.5703, - "step": 228 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.748, - "step": 229 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6488, - "step": 230 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7674, - "step": 231 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7331, - "step": 232 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6303, - "step": 233 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7467, - "step": 234 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6545, - "step": 235 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7122, - "step": 236 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7089, - "step": 237 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6745, - "step": 238 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6949, - "step": 239 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6594, - "step": 240 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7559, - "step": 241 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7299, - "step": 242 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.8002, - "step": 243 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.679, - "step": 244 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7069, - "step": 245 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6686, - "step": 246 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6485, - "step": 247 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7147, - "step": 248 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7549, - "step": 249 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7259, - "step": 250 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7116, - "step": 251 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6694, - "step": 252 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6708, - "step": 253 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7052, - "step": 254 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6206, - "step": 255 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6469, - "step": 256 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6367, - "step": 257 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7953, - "step": 258 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7412, - "step": 259 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6589, - "step": 260 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6694, - "step": 261 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7408, - "step": 262 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.6781, - "step": 263 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7357, - "step": 264 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7842, - "step": 265 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7053, - "step": 266 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7964, - "step": 267 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.5816, - "step": 268 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7075, - "step": 269 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7609, - "step": 270 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.6835, - "step": 271 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.6958, - "step": 272 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7561, - "step": 273 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.6144, - "step": 274 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.6885, - "step": 275 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7209, - "step": 276 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.8189, - "step": 277 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.8516, - "step": 278 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.649, - "step": 279 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7142, - "step": 280 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7713, - "step": 281 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7112, - "step": 282 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.6876, - "step": 283 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.6398, - "step": 284 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7238, - "step": 285 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7647, - "step": 286 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.6657, - "step": 287 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.6503, - "step": 288 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7837, - "step": 289 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.6454, - "step": 290 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7443, - "step": 291 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7553, - "step": 292 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7038, - "step": 293 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7083, - "step": 294 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.6687, - "step": 295 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.6178, - "step": 296 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7232, - "step": 297 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.6914, - "step": 298 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.6593, - "step": 299 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.6093, - "step": 300 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7572, - "step": 301 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.6613, - "step": 302 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.6429, - "step": 303 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7426, - "step": 304 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7407, - "step": 305 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7487, - "step": 306 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7152, - "step": 307 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7909, - "step": 308 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7102, - "step": 309 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.788, - "step": 310 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7569, - "step": 311 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7237, - "step": 312 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.6722, - "step": 313 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.6713, - "step": 314 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7267, - "step": 315 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.7173, - "step": 316 - }, - { - "epoch": 0.06, - "learning_rate": 2e-05, - "loss": 0.6927, - "step": 317 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6649, - "step": 318 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.5921, - "step": 319 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6165, - "step": 320 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6915, - "step": 321 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6684, - "step": 322 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6895, - "step": 323 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.7137, - "step": 324 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6599, - "step": 325 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.7181, - "step": 326 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.7215, - "step": 327 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.7143, - "step": 328 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.7017, - "step": 329 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.635, - "step": 330 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.7292, - "step": 331 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.7254, - "step": 332 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.678, - "step": 333 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.7352, - "step": 334 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.7054, - "step": 335 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.7679, - "step": 336 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6704, - "step": 337 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6112, - "step": 338 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6599, - "step": 339 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6101, - "step": 340 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.7185, - "step": 341 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.7228, - "step": 342 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.8103, - "step": 343 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6849, - "step": 344 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6996, - "step": 345 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6056, - "step": 346 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6246, - "step": 347 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.64, - "step": 348 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6807, - "step": 349 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.7482, - "step": 350 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.7594, - "step": 351 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.7464, - "step": 352 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6593, - "step": 353 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.5938, - "step": 354 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.5647, - "step": 355 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.7233, - "step": 356 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6683, - "step": 357 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6731, - "step": 358 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.7113, - "step": 359 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6975, - "step": 360 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6961, - "step": 361 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.7218, - "step": 362 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.644, - "step": 363 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6757, - "step": 364 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.8026, - "step": 365 - }, - { - "epoch": 0.07, - "learning_rate": 2e-05, - "loss": 0.6789, - "step": 366 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6496, - "step": 367 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7154, - "step": 368 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7145, - "step": 369 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7941, - "step": 370 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.729, - "step": 371 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7, - "step": 372 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6689, - "step": 373 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6966, - "step": 374 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7141, - "step": 375 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6823, - "step": 376 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7261, - "step": 377 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.5853, - "step": 378 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6727, - "step": 379 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7361, - "step": 380 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6227, - "step": 381 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6445, - "step": 382 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6538, - "step": 383 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6603, - "step": 384 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6354, - "step": 385 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6385, - "step": 386 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7387, - "step": 387 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7445, - "step": 388 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7223, - "step": 389 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7246, - "step": 390 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7516, - "step": 391 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6528, - "step": 392 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6888, - "step": 393 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6924, - "step": 394 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7548, - "step": 395 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.715, - "step": 396 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6871, - "step": 397 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7206, - "step": 398 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6759, - "step": 399 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7274, - "step": 400 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.734, - "step": 401 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7622, - "step": 402 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.621, - "step": 403 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7987, - "step": 404 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.615, - "step": 405 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7788, - "step": 406 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6893, - "step": 407 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6126, - "step": 408 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7482, - "step": 409 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6323, - "step": 410 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.5427, - "step": 411 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7652, - "step": 412 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.7298, - "step": 413 - }, - { - "epoch": 0.08, - "learning_rate": 2e-05, - "loss": 0.6953, - "step": 414 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.7206, - "step": 415 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6602, - "step": 416 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.7037, - "step": 417 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6637, - "step": 418 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.7056, - "step": 419 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.7106, - "step": 420 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6892, - "step": 421 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6835, - "step": 422 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6684, - "step": 423 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6629, - "step": 424 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.5997, - "step": 425 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.5005, - "step": 426 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.8433, - "step": 427 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6478, - "step": 428 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.7336, - "step": 429 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6782, - "step": 430 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.8452, - "step": 431 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6738, - "step": 432 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.594, - "step": 433 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6854, - "step": 434 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.5772, - "step": 435 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6632, - "step": 436 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.7032, - "step": 437 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.7648, - "step": 438 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6433, - "step": 439 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6564, - "step": 440 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6607, - "step": 441 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6297, - "step": 442 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.7133, - "step": 443 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6954, - "step": 444 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6774, - "step": 445 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.745, - "step": 446 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6741, - "step": 447 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6453, - "step": 448 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6907, - "step": 449 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.652, - "step": 450 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.7482, - "step": 451 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6682, - "step": 452 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.7049, - "step": 453 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6845, - "step": 454 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.651, - "step": 455 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.5878, - "step": 456 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6766, - "step": 457 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.6529, - "step": 458 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.658, - "step": 459 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.5985, - "step": 460 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.7648, - "step": 461 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.5697, - "step": 462 - }, - { - "epoch": 0.09, - "learning_rate": 2e-05, - "loss": 0.7304, - "step": 463 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6591, - "step": 464 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6808, - "step": 465 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7478, - "step": 466 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.678, - "step": 467 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6345, - "step": 468 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7491, - "step": 469 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7139, - "step": 470 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6806, - "step": 471 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6833, - "step": 472 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6758, - "step": 473 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6757, - "step": 474 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6636, - "step": 475 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7409, - "step": 476 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6294, - "step": 477 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6845, - "step": 478 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6906, - "step": 479 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7524, - "step": 480 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6531, - "step": 481 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7938, - "step": 482 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6079, - "step": 483 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6926, - "step": 484 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6532, - "step": 485 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7488, - "step": 486 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6806, - "step": 487 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7024, - "step": 488 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6581, - "step": 489 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6251, - "step": 490 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6486, - "step": 491 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6678, - "step": 492 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7151, - "step": 493 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7141, - "step": 494 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7639, - "step": 495 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6541, - "step": 496 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6812, - "step": 497 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6929, - "step": 498 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6723, - "step": 499 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7187, - "step": 500 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6863, - "step": 501 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7004, - "step": 502 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7588, - "step": 503 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7651, - "step": 504 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6996, - "step": 505 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7957, - "step": 506 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7748, - "step": 507 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7243, - "step": 508 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7035, - "step": 509 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.7437, - "step": 510 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.5604, - "step": 511 - }, - { - "epoch": 0.1, - "learning_rate": 2e-05, - "loss": 0.6613, - "step": 512 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.7252, - "step": 513 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.5621, - "step": 514 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.62, - "step": 515 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.7899, - "step": 516 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.676, - "step": 517 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6621, - "step": 518 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.7237, - "step": 519 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6666, - "step": 520 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.663, - "step": 521 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.5789, - "step": 522 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6196, - "step": 523 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.7289, - "step": 524 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.7567, - "step": 525 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6741, - "step": 526 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6315, - "step": 527 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6384, - "step": 528 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6716, - "step": 529 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6609, - "step": 530 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6282, - "step": 531 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.7058, - "step": 532 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.7332, - "step": 533 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.7322, - "step": 534 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.7791, - "step": 535 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6599, - "step": 536 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6865, - "step": 537 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6362, - "step": 538 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6386, - "step": 539 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.7055, - "step": 540 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6994, - "step": 541 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6183, - "step": 542 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.7084, - "step": 543 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.7297, - "step": 544 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.641, - "step": 545 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.5763, - "step": 546 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6394, - "step": 547 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.7124, - "step": 548 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.7095, - "step": 549 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6968, - "step": 550 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.7188, - "step": 551 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6456, - "step": 552 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6588, - "step": 553 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.7761, - "step": 554 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.734, - "step": 555 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.5238, - "step": 556 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.7283, - "step": 557 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6823, - "step": 558 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.7075, - "step": 559 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.701, - "step": 560 - }, - { - "epoch": 0.11, - "learning_rate": 2e-05, - "loss": 0.6799, - "step": 561 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6108, - "step": 562 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6246, - "step": 563 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.8066, - "step": 564 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6521, - "step": 565 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6633, - "step": 566 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6635, - "step": 567 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.7016, - "step": 568 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6583, - "step": 569 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.7364, - "step": 570 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.702, - "step": 571 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6225, - "step": 572 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6941, - "step": 573 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6512, - "step": 574 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.7587, - "step": 575 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6767, - "step": 576 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6574, - "step": 577 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.5819, - "step": 578 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.7464, - "step": 579 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6489, - "step": 580 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6448, - "step": 581 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6396, - "step": 582 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.7496, - "step": 583 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6479, - "step": 584 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.748, - "step": 585 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.7459, - "step": 586 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6365, - "step": 587 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6988, - "step": 588 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6936, - "step": 589 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.7366, - "step": 590 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6768, - "step": 591 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6824, - "step": 592 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.634, - "step": 593 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.628, - "step": 594 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.7488, - "step": 595 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.7293, - "step": 596 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6924, - "step": 597 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6995, - "step": 598 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6338, - "step": 599 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6208, - "step": 600 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6893, - "step": 601 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6452, - "step": 602 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.7166, - "step": 603 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6728, - "step": 604 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.7752, - "step": 605 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.806, - "step": 606 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.7193, - "step": 607 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.7166, - "step": 608 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.7332, - "step": 609 - }, - { - "epoch": 0.12, - "learning_rate": 2e-05, - "loss": 0.6738, - "step": 610 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.7027, - "step": 611 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6611, - "step": 612 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6549, - "step": 613 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6228, - "step": 614 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.7228, - "step": 615 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.7166, - "step": 616 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6706, - "step": 617 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6759, - "step": 618 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6955, - "step": 619 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6279, - "step": 620 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6504, - "step": 621 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6923, - "step": 622 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6937, - "step": 623 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.5783, - "step": 624 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6434, - "step": 625 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6681, - "step": 626 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.8075, - "step": 627 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6908, - "step": 628 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.7097, - "step": 629 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6797, - "step": 630 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6661, - "step": 631 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.7454, - "step": 632 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6342, - "step": 633 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6969, - "step": 634 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6676, - "step": 635 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6047, - "step": 636 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6822, - "step": 637 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6806, - "step": 638 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6603, - "step": 639 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.635, - "step": 640 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6865, - "step": 641 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.5745, - "step": 642 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.7357, - "step": 643 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6073, - "step": 644 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.5909, - "step": 645 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6642, - "step": 646 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.7432, - "step": 647 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6398, - "step": 648 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.789, - "step": 649 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6823, - "step": 650 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.678, - "step": 651 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.7414, - "step": 652 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.622, - "step": 653 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6967, - "step": 654 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6606, - "step": 655 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6544, - "step": 656 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6875, - "step": 657 - }, - { - "epoch": 0.13, - "learning_rate": 2e-05, - "loss": 0.6241, - "step": 658 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.5142, - "step": 659 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6544, - "step": 660 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6933, - "step": 661 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.7041, - "step": 662 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.715, - "step": 663 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6322, - "step": 664 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6158, - "step": 665 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.7144, - "step": 666 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.7337, - "step": 667 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.68, - "step": 668 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6575, - "step": 669 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.5375, - "step": 670 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.664, - "step": 671 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6454, - "step": 672 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.608, - "step": 673 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.7546, - "step": 674 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6216, - "step": 675 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.7178, - "step": 676 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.7667, - "step": 677 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6609, - "step": 678 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.739, - "step": 679 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6172, - "step": 680 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.5774, - "step": 681 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6768, - "step": 682 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6116, - "step": 683 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6605, - "step": 684 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.7476, - "step": 685 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6337, - "step": 686 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.5993, - "step": 687 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6237, - "step": 688 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.7372, - "step": 689 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.664, - "step": 690 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6418, - "step": 691 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.561, - "step": 692 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.7864, - "step": 693 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.5959, - "step": 694 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6646, - "step": 695 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.743, - "step": 696 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.7094, - "step": 697 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6766, - "step": 698 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6628, - "step": 699 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6761, - "step": 700 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6928, - "step": 701 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.7777, - "step": 702 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6324, - "step": 703 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.5845, - "step": 704 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6076, - "step": 705 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.5902, - "step": 706 - }, - { - "epoch": 0.14, - "learning_rate": 2e-05, - "loss": 0.6924, - "step": 707 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6597, - "step": 708 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.7348, - "step": 709 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.7262, - "step": 710 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.771, - "step": 711 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6897, - "step": 712 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6493, - "step": 713 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.7074, - "step": 714 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6856, - "step": 715 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.635, - "step": 716 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6572, - "step": 717 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.7154, - "step": 718 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.5945, - "step": 719 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.7107, - "step": 720 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6668, - "step": 721 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6624, - "step": 722 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6594, - "step": 723 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6694, - "step": 724 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.5934, - "step": 725 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6615, - "step": 726 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6995, - "step": 727 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6304, - "step": 728 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.577, - "step": 729 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6296, - "step": 730 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6694, - "step": 731 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.7218, - "step": 732 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6778, - "step": 733 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6475, - "step": 734 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6181, - "step": 735 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.7239, - "step": 736 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.7164, - "step": 737 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6465, - "step": 738 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.5395, - "step": 739 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.7525, - "step": 740 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6447, - "step": 741 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6483, - "step": 742 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6884, - "step": 743 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6521, - "step": 744 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6653, - "step": 745 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6186, - "step": 746 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.7399, - "step": 747 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6453, - "step": 748 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6097, - "step": 749 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.7957, - "step": 750 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6909, - "step": 751 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6045, - "step": 752 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6879, - "step": 753 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6404, - "step": 754 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.6124, - "step": 755 - }, - { - "epoch": 0.15, - "learning_rate": 2e-05, - "loss": 0.5486, - "step": 756 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.632, - "step": 757 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6652, - "step": 758 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6397, - "step": 759 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.7202, - "step": 760 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6751, - "step": 761 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6702, - "step": 762 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6379, - "step": 763 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6957, - "step": 764 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.5908, - "step": 765 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6792, - "step": 766 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6149, - "step": 767 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.674, - "step": 768 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.7632, - "step": 769 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.7255, - "step": 770 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.7662, - "step": 771 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.7067, - "step": 772 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6964, - "step": 773 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.693, - "step": 774 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6245, - "step": 775 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.641, - "step": 776 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.7247, - "step": 777 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.5783, - "step": 778 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6615, - "step": 779 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.7686, - "step": 780 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.655, - "step": 781 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.5948, - "step": 782 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6811, - "step": 783 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6051, - "step": 784 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6142, - "step": 785 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.7052, - "step": 786 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6221, - "step": 787 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.5936, - "step": 788 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6245, - "step": 789 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.5755, - "step": 790 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.646, - "step": 791 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6032, - "step": 792 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.7553, - "step": 793 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.7255, - "step": 794 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.5559, - "step": 795 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6671, - "step": 796 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6871, - "step": 797 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.7559, - "step": 798 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.7349, - "step": 799 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6544, - "step": 800 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.5861, - "step": 801 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.7518, - "step": 802 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6714, - "step": 803 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.7257, - "step": 804 - }, - { - "epoch": 0.16, - "learning_rate": 2e-05, - "loss": 0.6051, - "step": 805 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6422, - "step": 806 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6749, - "step": 807 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.68, - "step": 808 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6228, - "step": 809 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6966, - "step": 810 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6657, - "step": 811 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6009, - "step": 812 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6593, - "step": 813 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6816, - "step": 814 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.7351, - "step": 815 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.7257, - "step": 816 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.7073, - "step": 817 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.7456, - "step": 818 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6362, - "step": 819 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6455, - "step": 820 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6298, - "step": 821 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.7739, - "step": 822 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.7036, - "step": 823 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6409, - "step": 824 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.5835, - "step": 825 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6362, - "step": 826 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6869, - "step": 827 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6987, - "step": 828 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.69, - "step": 829 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6365, - "step": 830 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.7188, - "step": 831 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6898, - "step": 832 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6323, - "step": 833 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.5877, - "step": 834 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.7097, - "step": 835 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.7226, - "step": 836 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.7918, - "step": 837 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6788, - "step": 838 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.7387, - "step": 839 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6852, - "step": 840 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.7552, - "step": 841 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.7141, - "step": 842 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6219, - "step": 843 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.7512, - "step": 844 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6824, - "step": 845 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6581, - "step": 846 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.5837, - "step": 847 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.7348, - "step": 848 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6135, - "step": 849 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6587, - "step": 850 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6061, - "step": 851 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.7178, - "step": 852 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6227, - "step": 853 - }, - { - "epoch": 0.17, - "learning_rate": 2e-05, - "loss": 0.6201, - "step": 854 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.769, - "step": 855 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.7176, - "step": 856 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6684, - "step": 857 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6482, - "step": 858 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6502, - "step": 859 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6501, - "step": 860 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6362, - "step": 861 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6213, - "step": 862 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6176, - "step": 863 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.705, - "step": 864 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6171, - "step": 865 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6956, - "step": 866 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6325, - "step": 867 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.5795, - "step": 868 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.7084, - "step": 869 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.7046, - "step": 870 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6679, - "step": 871 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6848, - "step": 872 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.5927, - "step": 873 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6251, - "step": 874 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6128, - "step": 875 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6965, - "step": 876 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.5963, - "step": 877 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6476, - "step": 878 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.7532, - "step": 879 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6082, - "step": 880 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6451, - "step": 881 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6136, - "step": 882 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.7406, - "step": 883 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6558, - "step": 884 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.7368, - "step": 885 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6876, - "step": 886 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6384, - "step": 887 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6194, - "step": 888 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6306, - "step": 889 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6477, - "step": 890 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.5833, - "step": 891 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6238, - "step": 892 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.593, - "step": 893 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6138, - "step": 894 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.7079, - "step": 895 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.7442, - "step": 896 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6619, - "step": 897 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6592, - "step": 898 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6824, - "step": 899 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.7087, - "step": 900 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6533, - "step": 901 - }, - { - "epoch": 0.18, - "learning_rate": 2e-05, - "loss": 0.6847, - "step": 902 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.7431, - "step": 903 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6867, - "step": 904 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.587, - "step": 905 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6416, - "step": 906 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.796, - "step": 907 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6206, - "step": 908 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.7399, - "step": 909 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.5629, - "step": 910 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6663, - "step": 911 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.634, - "step": 912 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6887, - "step": 913 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6165, - "step": 914 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.5412, - "step": 915 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6012, - "step": 916 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.7151, - "step": 917 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6806, - "step": 918 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.823, - "step": 919 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6232, - "step": 920 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6273, - "step": 921 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.7487, - "step": 922 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.7119, - "step": 923 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6995, - "step": 924 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6576, - "step": 925 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6589, - "step": 926 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6055, - "step": 927 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.7146, - "step": 928 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.5982, - "step": 929 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.7858, - "step": 930 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.5705, - "step": 931 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.7226, - "step": 932 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.606, - "step": 933 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.5563, - "step": 934 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6203, - "step": 935 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.7144, - "step": 936 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6277, - "step": 937 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6817, - "step": 938 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6144, - "step": 939 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.559, - "step": 940 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.5832, - "step": 941 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6869, - "step": 942 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6433, - "step": 943 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6935, - "step": 944 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.5438, - "step": 945 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6716, - "step": 946 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.639, - "step": 947 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6388, - "step": 948 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.4984, - "step": 949 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6068, - "step": 950 - }, - { - "epoch": 0.19, - "learning_rate": 2e-05, - "loss": 0.6877, - "step": 951 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.7352, - "step": 952 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6986, - "step": 953 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6908, - "step": 954 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6482, - "step": 955 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6362, - "step": 956 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6203, - "step": 957 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6226, - "step": 958 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6201, - "step": 959 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6184, - "step": 960 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.5994, - "step": 961 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6674, - "step": 962 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.5404, - "step": 963 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.689, - "step": 964 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6004, - "step": 965 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6651, - "step": 966 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.5901, - "step": 967 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6555, - "step": 968 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6917, - "step": 969 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6808, - "step": 970 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6102, - "step": 971 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6119, - "step": 972 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.5975, - "step": 973 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6739, - "step": 974 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6095, - "step": 975 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.5919, - "step": 976 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6142, - "step": 977 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.7113, - "step": 978 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6284, - "step": 979 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6045, - "step": 980 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6191, - "step": 981 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6484, - "step": 982 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6186, - "step": 983 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.7092, - "step": 984 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6366, - "step": 985 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.5705, - "step": 986 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.7535, - "step": 987 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6635, - "step": 988 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.5427, - "step": 989 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6184, - "step": 990 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6438, - "step": 991 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6358, - "step": 992 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6101, - "step": 993 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.5851, - "step": 994 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6932, - "step": 995 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.644, - "step": 996 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6704, - "step": 997 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6197, - "step": 998 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6507, - "step": 999 - }, - { - "epoch": 0.2, - "learning_rate": 2e-05, - "loss": 0.6113, - "step": 1000 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6994, - "step": 1001 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.658, - "step": 1002 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6907, - "step": 1003 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.7391, - "step": 1004 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.7143, - "step": 1005 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6489, - "step": 1006 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.617, - "step": 1007 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6398, - "step": 1008 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6041, - "step": 1009 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.7074, - "step": 1010 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.7226, - "step": 1011 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.7117, - "step": 1012 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.617, - "step": 1013 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6797, - "step": 1014 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.711, - "step": 1015 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6428, - "step": 1016 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.7002, - "step": 1017 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6981, - "step": 1018 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6506, - "step": 1019 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6155, - "step": 1020 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.5652, - "step": 1021 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.5125, - "step": 1022 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.7397, - "step": 1023 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.7486, - "step": 1024 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6793, - "step": 1025 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6361, - "step": 1026 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.7413, - "step": 1027 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.5978, - "step": 1028 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.662, - "step": 1029 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6291, - "step": 1030 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.7377, - "step": 1031 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.5765, - "step": 1032 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6671, - "step": 1033 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6792, - "step": 1034 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6564, - "step": 1035 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.7739, - "step": 1036 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6083, - "step": 1037 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6733, - "step": 1038 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6114, - "step": 1039 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6019, - "step": 1040 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6702, - "step": 1041 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6238, - "step": 1042 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6901, - "step": 1043 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6328, - "step": 1044 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.7132, - "step": 1045 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6065, - "step": 1046 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6804, - "step": 1047 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6418, - "step": 1048 - }, - { - "epoch": 0.21, - "learning_rate": 2e-05, - "loss": 0.6696, - "step": 1049 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.7145, - "step": 1050 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.557, - "step": 1051 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.7561, - "step": 1052 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.698, - "step": 1053 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.7152, - "step": 1054 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.6396, - "step": 1055 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.6995, - "step": 1056 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.6834, - "step": 1057 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.6459, - "step": 1058 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.7157, - "step": 1059 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.6417, - "step": 1060 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.7058, - "step": 1061 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.5558, - "step": 1062 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.633, - "step": 1063 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.6779, - "step": 1064 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.6031, - "step": 1065 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.5753, - "step": 1066 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.5808, - "step": 1067 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.6272, - "step": 1068 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.6266, - "step": 1069 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.569, - "step": 1070 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.544, - "step": 1071 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.5732, - "step": 1072 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.7747, - "step": 1073 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.7562, - "step": 1074 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.6582, - "step": 1075 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.7008, - "step": 1076 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.7537, - "step": 1077 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.6373, - "step": 1078 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.7236, - "step": 1079 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.6451, - "step": 1080 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.696, - "step": 1081 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.7102, - "step": 1082 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.576, - "step": 1083 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.7003, - "step": 1084 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.6365, - "step": 1085 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.761, - "step": 1086 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.6537, - "step": 1087 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.7344, - "step": 1088 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.5138, - "step": 1089 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.7847, - "step": 1090 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.7128, - "step": 1091 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.6208, - "step": 1092 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.6172, - "step": 1093 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.6408, - "step": 1094 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.7208, - "step": 1095 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.6224, - "step": 1096 - }, - { - "epoch": 0.22, - "learning_rate": 2e-05, - "loss": 0.5909, - "step": 1097 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6549, - "step": 1098 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.5581, - "step": 1099 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.7082, - "step": 1100 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6478, - "step": 1101 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6389, - "step": 1102 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.596, - "step": 1103 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6611, - "step": 1104 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6585, - "step": 1105 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6603, - "step": 1106 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6885, - "step": 1107 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6397, - "step": 1108 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6683, - "step": 1109 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.7009, - "step": 1110 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.5484, - "step": 1111 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.5533, - "step": 1112 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6838, - "step": 1113 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6347, - "step": 1114 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6776, - "step": 1115 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6996, - "step": 1116 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6821, - "step": 1117 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6139, - "step": 1118 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6726, - "step": 1119 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6364, - "step": 1120 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6987, - "step": 1121 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.598, - "step": 1122 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6022, - "step": 1123 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6012, - "step": 1124 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6298, - "step": 1125 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.8205, - "step": 1126 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6418, - "step": 1127 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6049, - "step": 1128 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6482, - "step": 1129 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.5518, - "step": 1130 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6703, - "step": 1131 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.741, - "step": 1132 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6855, - "step": 1133 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6276, - "step": 1134 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6044, - "step": 1135 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.7202, - "step": 1136 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6628, - "step": 1137 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6165, - "step": 1138 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.593, - "step": 1139 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.5988, - "step": 1140 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.7152, - "step": 1141 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6734, - "step": 1142 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6044, - "step": 1143 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6555, - "step": 1144 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6538, - "step": 1145 - }, - { - "epoch": 0.23, - "learning_rate": 2e-05, - "loss": 0.6817, - "step": 1146 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.7671, - "step": 1147 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6772, - "step": 1148 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.7171, - "step": 1149 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.7165, - "step": 1150 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6369, - "step": 1151 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.773, - "step": 1152 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.606, - "step": 1153 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.7211, - "step": 1154 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6838, - "step": 1155 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6672, - "step": 1156 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6732, - "step": 1157 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.7406, - "step": 1158 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6354, - "step": 1159 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.655, - "step": 1160 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.5637, - "step": 1161 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6254, - "step": 1162 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.654, - "step": 1163 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6854, - "step": 1164 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6169, - "step": 1165 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6182, - "step": 1166 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.57, - "step": 1167 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6579, - "step": 1168 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6909, - "step": 1169 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6223, - "step": 1170 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6148, - "step": 1171 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6265, - "step": 1172 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.708, - "step": 1173 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6172, - "step": 1174 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6738, - "step": 1175 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6833, - "step": 1176 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.5787, - "step": 1177 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.5966, - "step": 1178 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.683, - "step": 1179 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6435, - "step": 1180 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.5218, - "step": 1181 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.7322, - "step": 1182 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6499, - "step": 1183 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6833, - "step": 1184 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6515, - "step": 1185 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.7825, - "step": 1186 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6375, - "step": 1187 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6673, - "step": 1188 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6708, - "step": 1189 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6932, - "step": 1190 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6658, - "step": 1191 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6586, - "step": 1192 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6702, - "step": 1193 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6532, - "step": 1194 - }, - { - "epoch": 0.24, - "learning_rate": 2e-05, - "loss": 0.6575, - "step": 1195 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6407, - "step": 1196 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6089, - "step": 1197 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6457, - "step": 1198 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6274, - "step": 1199 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.7074, - "step": 1200 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6165, - "step": 1201 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.7355, - "step": 1202 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.7119, - "step": 1203 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.7076, - "step": 1204 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.703, - "step": 1205 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6362, - "step": 1206 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.5702, - "step": 1207 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.7143, - "step": 1208 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.5852, - "step": 1209 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6978, - "step": 1210 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6493, - "step": 1211 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6978, - "step": 1212 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6983, - "step": 1213 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.71, - "step": 1214 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6542, - "step": 1215 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.8028, - "step": 1216 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6388, - "step": 1217 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6126, - "step": 1218 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6492, - "step": 1219 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.5767, - "step": 1220 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6226, - "step": 1221 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.5321, - "step": 1222 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6352, - "step": 1223 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6815, - "step": 1224 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.7402, - "step": 1225 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6293, - "step": 1226 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6911, - "step": 1227 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6166, - "step": 1228 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.7317, - "step": 1229 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6767, - "step": 1230 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.7053, - "step": 1231 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6056, - "step": 1232 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6263, - "step": 1233 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.7042, - "step": 1234 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6166, - "step": 1235 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6299, - "step": 1236 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.4878, - "step": 1237 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6734, - "step": 1238 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6762, - "step": 1239 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6496, - "step": 1240 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6255, - "step": 1241 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6411, - "step": 1242 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6747, - "step": 1243 - }, - { - "epoch": 0.25, - "learning_rate": 2e-05, - "loss": 0.6898, - "step": 1244 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.7236, - "step": 1245 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.687, - "step": 1246 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.7456, - "step": 1247 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6305, - "step": 1248 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.653, - "step": 1249 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6874, - "step": 1250 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6514, - "step": 1251 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6214, - "step": 1252 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6947, - "step": 1253 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6309, - "step": 1254 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6895, - "step": 1255 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.603, - "step": 1256 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6313, - "step": 1257 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6994, - "step": 1258 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.5837, - "step": 1259 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6208, - "step": 1260 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.5889, - "step": 1261 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6142, - "step": 1262 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6514, - "step": 1263 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6616, - "step": 1264 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6304, - "step": 1265 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6669, - "step": 1266 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6703, - "step": 1267 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.5874, - "step": 1268 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6265, - "step": 1269 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.7109, - "step": 1270 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6221, - "step": 1271 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6602, - "step": 1272 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6728, - "step": 1273 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.7756, - "step": 1274 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6301, - "step": 1275 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.5923, - "step": 1276 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6509, - "step": 1277 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.5422, - "step": 1278 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.5187, - "step": 1279 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6602, - "step": 1280 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6574, - "step": 1281 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.7069, - "step": 1282 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.5213, - "step": 1283 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6693, - "step": 1284 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6518, - "step": 1285 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6757, - "step": 1286 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6206, - "step": 1287 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.7423, - "step": 1288 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6364, - "step": 1289 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6904, - "step": 1290 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.7014, - "step": 1291 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.6562, - "step": 1292 - }, - { - "epoch": 0.26, - "learning_rate": 2e-05, - "loss": 0.7273, - "step": 1293 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.7376, - "step": 1294 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.7053, - "step": 1295 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.5962, - "step": 1296 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6576, - "step": 1297 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6618, - "step": 1298 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.5752, - "step": 1299 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6483, - "step": 1300 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6361, - "step": 1301 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6702, - "step": 1302 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.7074, - "step": 1303 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.7663, - "step": 1304 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6261, - "step": 1305 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.5822, - "step": 1306 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.59, - "step": 1307 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6189, - "step": 1308 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6359, - "step": 1309 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6897, - "step": 1310 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6452, - "step": 1311 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6726, - "step": 1312 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6968, - "step": 1313 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.5979, - "step": 1314 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.5337, - "step": 1315 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6642, - "step": 1316 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6893, - "step": 1317 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6525, - "step": 1318 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.691, - "step": 1319 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6667, - "step": 1320 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6178, - "step": 1321 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.5809, - "step": 1322 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.5846, - "step": 1323 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6057, - "step": 1324 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.656, - "step": 1325 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6954, - "step": 1326 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.695, - "step": 1327 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.7239, - "step": 1328 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6211, - "step": 1329 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.7004, - "step": 1330 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6536, - "step": 1331 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6554, - "step": 1332 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.7055, - "step": 1333 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6657, - "step": 1334 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6852, - "step": 1335 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.5484, - "step": 1336 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6961, - "step": 1337 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.7264, - "step": 1338 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.5096, - "step": 1339 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.675, - "step": 1340 - }, - { - "epoch": 0.27, - "learning_rate": 2e-05, - "loss": 0.6402, - "step": 1341 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.5934, - "step": 1342 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6606, - "step": 1343 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6708, - "step": 1344 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6009, - "step": 1345 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6399, - "step": 1346 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6858, - "step": 1347 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6678, - "step": 1348 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6964, - "step": 1349 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.577, - "step": 1350 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.7234, - "step": 1351 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.5581, - "step": 1352 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.665, - "step": 1353 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6179, - "step": 1354 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6927, - "step": 1355 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.5353, - "step": 1356 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6213, - "step": 1357 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6876, - "step": 1358 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.5087, - "step": 1359 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6655, - "step": 1360 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6145, - "step": 1361 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6218, - "step": 1362 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6026, - "step": 1363 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6534, - "step": 1364 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.7058, - "step": 1365 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6206, - "step": 1366 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6182, - "step": 1367 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6086, - "step": 1368 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6502, - "step": 1369 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.5437, - "step": 1370 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.655, - "step": 1371 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6379, - "step": 1372 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.5962, - "step": 1373 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6207, - "step": 1374 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.7236, - "step": 1375 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6935, - "step": 1376 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6281, - "step": 1377 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6721, - "step": 1378 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6589, - "step": 1379 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6916, - "step": 1380 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.7272, - "step": 1381 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.5616, - "step": 1382 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6615, - "step": 1383 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6086, - "step": 1384 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.666, - "step": 1385 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.5401, - "step": 1386 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6872, - "step": 1387 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.609, - "step": 1388 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.5865, - "step": 1389 - }, - { - "epoch": 0.28, - "learning_rate": 2e-05, - "loss": 0.6339, - "step": 1390 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.515, - "step": 1391 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.7301, - "step": 1392 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.5994, - "step": 1393 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.551, - "step": 1394 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6097, - "step": 1395 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.662, - "step": 1396 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6908, - "step": 1397 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6619, - "step": 1398 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6509, - "step": 1399 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6514, - "step": 1400 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.5903, - "step": 1401 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.5886, - "step": 1402 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6435, - "step": 1403 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6247, - "step": 1404 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6725, - "step": 1405 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6016, - "step": 1406 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6083, - "step": 1407 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.641, - "step": 1408 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.7085, - "step": 1409 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6556, - "step": 1410 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6226, - "step": 1411 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6792, - "step": 1412 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6587, - "step": 1413 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6499, - "step": 1414 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.692, - "step": 1415 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6565, - "step": 1416 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.5969, - "step": 1417 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6624, - "step": 1418 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.7259, - "step": 1419 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.7329, - "step": 1420 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6502, - "step": 1421 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6075, - "step": 1422 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.5913, - "step": 1423 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.7233, - "step": 1424 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.7057, - "step": 1425 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.7174, - "step": 1426 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.5809, - "step": 1427 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6696, - "step": 1428 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6206, - "step": 1429 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.5733, - "step": 1430 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6462, - "step": 1431 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6214, - "step": 1432 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6022, - "step": 1433 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6228, - "step": 1434 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6431, - "step": 1435 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.5747, - "step": 1436 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.5762, - "step": 1437 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.7035, - "step": 1438 - }, - { - "epoch": 0.29, - "learning_rate": 2e-05, - "loss": 0.6576, - "step": 1439 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.7713, - "step": 1440 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6283, - "step": 1441 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6317, - "step": 1442 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6508, - "step": 1443 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6571, - "step": 1444 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.7406, - "step": 1445 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6077, - "step": 1446 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6485, - "step": 1447 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6351, - "step": 1448 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.7025, - "step": 1449 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.7838, - "step": 1450 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.7589, - "step": 1451 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.7009, - "step": 1452 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.658, - "step": 1453 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6818, - "step": 1454 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6626, - "step": 1455 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6455, - "step": 1456 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6355, - "step": 1457 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6224, - "step": 1458 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.5949, - "step": 1459 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6881, - "step": 1460 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6308, - "step": 1461 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6899, - "step": 1462 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6092, - "step": 1463 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6879, - "step": 1464 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.5923, - "step": 1465 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6143, - "step": 1466 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.7073, - "step": 1467 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6079, - "step": 1468 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6343, - "step": 1469 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.5787, - "step": 1470 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6325, - "step": 1471 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.5616, - "step": 1472 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.7077, - "step": 1473 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6397, - "step": 1474 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6605, - "step": 1475 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.7234, - "step": 1476 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6845, - "step": 1477 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6747, - "step": 1478 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6468, - "step": 1479 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.682, - "step": 1480 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6291, - "step": 1481 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6634, - "step": 1482 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6562, - "step": 1483 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.7508, - "step": 1484 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6226, - "step": 1485 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6614, - "step": 1486 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6975, - "step": 1487 - }, - { - "epoch": 0.3, - "learning_rate": 2e-05, - "loss": 0.6329, - "step": 1488 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6905, - "step": 1489 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6363, - "step": 1490 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6547, - "step": 1491 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6342, - "step": 1492 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6333, - "step": 1493 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6142, - "step": 1494 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6435, - "step": 1495 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6361, - "step": 1496 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.7512, - "step": 1497 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6581, - "step": 1498 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6007, - "step": 1499 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6914, - "step": 1500 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.5848, - "step": 1501 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.627, - "step": 1502 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.599, - "step": 1503 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.5575, - "step": 1504 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6457, - "step": 1505 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.645, - "step": 1506 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6797, - "step": 1507 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.5825, - "step": 1508 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6116, - "step": 1509 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6532, - "step": 1510 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6297, - "step": 1511 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6108, - "step": 1512 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6521, - "step": 1513 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.5926, - "step": 1514 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6329, - "step": 1515 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.7242, - "step": 1516 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.5671, - "step": 1517 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6214, - "step": 1518 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6657, - "step": 1519 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6452, - "step": 1520 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.617, - "step": 1521 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6232, - "step": 1522 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6472, - "step": 1523 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.647, - "step": 1524 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6859, - "step": 1525 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.5704, - "step": 1526 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6808, - "step": 1527 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.59, - "step": 1528 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6577, - "step": 1529 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6084, - "step": 1530 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.7145, - "step": 1531 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.5952, - "step": 1532 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6863, - "step": 1533 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.5718, - "step": 1534 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.7134, - "step": 1535 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6423, - "step": 1536 - }, - { - "epoch": 0.31, - "learning_rate": 2e-05, - "loss": 0.6195, - "step": 1537 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.7082, - "step": 1538 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6905, - "step": 1539 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.5957, - "step": 1540 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6627, - "step": 1541 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6006, - "step": 1542 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.5906, - "step": 1543 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6008, - "step": 1544 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6068, - "step": 1545 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6209, - "step": 1546 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.625, - "step": 1547 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.5909, - "step": 1548 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6569, - "step": 1549 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6833, - "step": 1550 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6198, - "step": 1551 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6804, - "step": 1552 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6794, - "step": 1553 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6735, - "step": 1554 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6527, - "step": 1555 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6248, - "step": 1556 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.5375, - "step": 1557 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6459, - "step": 1558 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6457, - "step": 1559 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.5992, - "step": 1560 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.7108, - "step": 1561 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.5396, - "step": 1562 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6144, - "step": 1563 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.5887, - "step": 1564 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.5704, - "step": 1565 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.5489, - "step": 1566 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.5386, - "step": 1567 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.688, - "step": 1568 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6179, - "step": 1569 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.545, - "step": 1570 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.5684, - "step": 1571 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6241, - "step": 1572 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6429, - "step": 1573 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.7045, - "step": 1574 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6688, - "step": 1575 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.5272, - "step": 1576 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6869, - "step": 1577 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.5506, - "step": 1578 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.5607, - "step": 1579 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.5614, - "step": 1580 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.7133, - "step": 1581 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6226, - "step": 1582 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.5873, - "step": 1583 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.5962, - "step": 1584 - }, - { - "epoch": 0.32, - "learning_rate": 2e-05, - "loss": 0.6786, - "step": 1585 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6396, - "step": 1586 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.638, - "step": 1587 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6683, - "step": 1588 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.5982, - "step": 1589 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.5975, - "step": 1590 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6221, - "step": 1591 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6488, - "step": 1592 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6883, - "step": 1593 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.7735, - "step": 1594 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.5947, - "step": 1595 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6383, - "step": 1596 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.5676, - "step": 1597 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6718, - "step": 1598 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6137, - "step": 1599 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6468, - "step": 1600 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6027, - "step": 1601 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.5759, - "step": 1602 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.7385, - "step": 1603 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.5546, - "step": 1604 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6124, - "step": 1605 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6711, - "step": 1606 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.5776, - "step": 1607 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.5952, - "step": 1608 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.5699, - "step": 1609 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6292, - "step": 1610 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6202, - "step": 1611 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6521, - "step": 1612 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6618, - "step": 1613 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.7178, - "step": 1614 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.5738, - "step": 1615 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6379, - "step": 1616 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6187, - "step": 1617 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.5761, - "step": 1618 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.5948, - "step": 1619 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6489, - "step": 1620 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6491, - "step": 1621 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6471, - "step": 1622 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6231, - "step": 1623 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.7144, - "step": 1624 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.7462, - "step": 1625 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6166, - "step": 1626 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6848, - "step": 1627 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6641, - "step": 1628 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.5558, - "step": 1629 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6521, - "step": 1630 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6846, - "step": 1631 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6376, - "step": 1632 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.7226, - "step": 1633 - }, - { - "epoch": 0.33, - "learning_rate": 2e-05, - "loss": 0.6028, - "step": 1634 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6295, - "step": 1635 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6565, - "step": 1636 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6597, - "step": 1637 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6146, - "step": 1638 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6121, - "step": 1639 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6805, - "step": 1640 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6872, - "step": 1641 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6689, - "step": 1642 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6828, - "step": 1643 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6005, - "step": 1644 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.7138, - "step": 1645 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6507, - "step": 1646 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6372, - "step": 1647 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.7036, - "step": 1648 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6356, - "step": 1649 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.7281, - "step": 1650 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6995, - "step": 1651 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6166, - "step": 1652 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6548, - "step": 1653 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6269, - "step": 1654 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6606, - "step": 1655 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.5683, - "step": 1656 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6788, - "step": 1657 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6471, - "step": 1658 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6593, - "step": 1659 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6231, - "step": 1660 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.5802, - "step": 1661 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6077, - "step": 1662 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6302, - "step": 1663 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6518, - "step": 1664 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.5356, - "step": 1665 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.654, - "step": 1666 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6434, - "step": 1667 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6406, - "step": 1668 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.7069, - "step": 1669 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.5641, - "step": 1670 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6043, - "step": 1671 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6471, - "step": 1672 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.5969, - "step": 1673 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6743, - "step": 1674 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.5805, - "step": 1675 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6646, - "step": 1676 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.714, - "step": 1677 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6179, - "step": 1678 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6105, - "step": 1679 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.5499, - "step": 1680 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.6597, - "step": 1681 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.7499, - "step": 1682 - }, - { - "epoch": 0.34, - "learning_rate": 2e-05, - "loss": 0.5818, - "step": 1683 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6983, - "step": 1684 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.7198, - "step": 1685 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.5627, - "step": 1686 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6066, - "step": 1687 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6413, - "step": 1688 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.7082, - "step": 1689 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6288, - "step": 1690 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.5927, - "step": 1691 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.5272, - "step": 1692 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.5796, - "step": 1693 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6009, - "step": 1694 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6673, - "step": 1695 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.5844, - "step": 1696 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6234, - "step": 1697 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6236, - "step": 1698 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.5545, - "step": 1699 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6506, - "step": 1700 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6221, - "step": 1701 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.5702, - "step": 1702 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6227, - "step": 1703 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6343, - "step": 1704 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6416, - "step": 1705 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6937, - "step": 1706 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6517, - "step": 1707 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.5847, - "step": 1708 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.5919, - "step": 1709 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.5993, - "step": 1710 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6585, - "step": 1711 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6228, - "step": 1712 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.7174, - "step": 1713 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6961, - "step": 1714 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6366, - "step": 1715 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.599, - "step": 1716 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6542, - "step": 1717 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6384, - "step": 1718 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6735, - "step": 1719 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.5894, - "step": 1720 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.753, - "step": 1721 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6413, - "step": 1722 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6407, - "step": 1723 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.7195, - "step": 1724 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6306, - "step": 1725 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6729, - "step": 1726 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6168, - "step": 1727 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6153, - "step": 1728 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6916, - "step": 1729 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.6961, - "step": 1730 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.5611, - "step": 1731 - }, - { - "epoch": 0.35, - "learning_rate": 2e-05, - "loss": 0.5829, - "step": 1732 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.5948, - "step": 1733 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.636, - "step": 1734 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6116, - "step": 1735 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.5252, - "step": 1736 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.665, - "step": 1737 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6611, - "step": 1738 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6141, - "step": 1739 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.5872, - "step": 1740 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.5905, - "step": 1741 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.7207, - "step": 1742 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6156, - "step": 1743 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6896, - "step": 1744 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6601, - "step": 1745 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6359, - "step": 1746 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.7041, - "step": 1747 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.7286, - "step": 1748 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6977, - "step": 1749 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6971, - "step": 1750 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6611, - "step": 1751 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6263, - "step": 1752 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6384, - "step": 1753 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.5694, - "step": 1754 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.5705, - "step": 1755 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6317, - "step": 1756 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6945, - "step": 1757 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6763, - "step": 1758 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6885, - "step": 1759 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6198, - "step": 1760 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6926, - "step": 1761 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6696, - "step": 1762 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.5807, - "step": 1763 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6043, - "step": 1764 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.5971, - "step": 1765 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.5754, - "step": 1766 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6933, - "step": 1767 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6025, - "step": 1768 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.5577, - "step": 1769 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6955, - "step": 1770 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6202, - "step": 1771 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6438, - "step": 1772 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.5862, - "step": 1773 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6379, - "step": 1774 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6854, - "step": 1775 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.7397, - "step": 1776 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.5839, - "step": 1777 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6544, - "step": 1778 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6699, - "step": 1779 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.6778, - "step": 1780 - }, - { - "epoch": 0.36, - "learning_rate": 2e-05, - "loss": 0.7119, - "step": 1781 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6345, - "step": 1782 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6685, - "step": 1783 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.5623, - "step": 1784 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6061, - "step": 1785 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.546, - "step": 1786 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.7122, - "step": 1787 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6772, - "step": 1788 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.5978, - "step": 1789 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6832, - "step": 1790 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6285, - "step": 1791 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6344, - "step": 1792 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.5718, - "step": 1793 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6017, - "step": 1794 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.7037, - "step": 1795 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6462, - "step": 1796 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6232, - "step": 1797 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.5504, - "step": 1798 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6381, - "step": 1799 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6969, - "step": 1800 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6225, - "step": 1801 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6461, - "step": 1802 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.641, - "step": 1803 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6609, - "step": 1804 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.5293, - "step": 1805 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6409, - "step": 1806 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6423, - "step": 1807 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.7463, - "step": 1808 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6452, - "step": 1809 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.5809, - "step": 1810 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.5799, - "step": 1811 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6301, - "step": 1812 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6275, - "step": 1813 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6616, - "step": 1814 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6747, - "step": 1815 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.7339, - "step": 1816 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.7122, - "step": 1817 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6681, - "step": 1818 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.5774, - "step": 1819 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.7313, - "step": 1820 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6683, - "step": 1821 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6839, - "step": 1822 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6108, - "step": 1823 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.618, - "step": 1824 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.638, - "step": 1825 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.688, - "step": 1826 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.6265, - "step": 1827 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.7186, - "step": 1828 - }, - { - "epoch": 0.37, - "learning_rate": 2e-05, - "loss": 0.5223, - "step": 1829 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6627, - "step": 1830 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.5776, - "step": 1831 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6841, - "step": 1832 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6502, - "step": 1833 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6705, - "step": 1834 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6102, - "step": 1835 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.7109, - "step": 1836 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.7024, - "step": 1837 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6905, - "step": 1838 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6702, - "step": 1839 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.5728, - "step": 1840 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6071, - "step": 1841 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.706, - "step": 1842 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.7144, - "step": 1843 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6798, - "step": 1844 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.5263, - "step": 1845 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.7383, - "step": 1846 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.589, - "step": 1847 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6669, - "step": 1848 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6263, - "step": 1849 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.7544, - "step": 1850 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6239, - "step": 1851 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.5154, - "step": 1852 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6572, - "step": 1853 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6953, - "step": 1854 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.5159, - "step": 1855 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6651, - "step": 1856 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.5816, - "step": 1857 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6216, - "step": 1858 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6261, - "step": 1859 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.7155, - "step": 1860 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.509, - "step": 1861 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.5776, - "step": 1862 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.7086, - "step": 1863 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.7016, - "step": 1864 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6006, - "step": 1865 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6259, - "step": 1866 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6828, - "step": 1867 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6994, - "step": 1868 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6875, - "step": 1869 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6061, - "step": 1870 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.598, - "step": 1871 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6003, - "step": 1872 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.5545, - "step": 1873 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.5903, - "step": 1874 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.5882, - "step": 1875 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6341, - "step": 1876 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.7696, - "step": 1877 - }, - { - "epoch": 0.38, - "learning_rate": 2e-05, - "loss": 0.6644, - "step": 1878 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6849, - "step": 1879 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.5922, - "step": 1880 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.643, - "step": 1881 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6131, - "step": 1882 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6784, - "step": 1883 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6789, - "step": 1884 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6682, - "step": 1885 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6008, - "step": 1886 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6414, - "step": 1887 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6378, - "step": 1888 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6626, - "step": 1889 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.7382, - "step": 1890 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.5741, - "step": 1891 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.678, - "step": 1892 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6084, - "step": 1893 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6358, - "step": 1894 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.5919, - "step": 1895 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6248, - "step": 1896 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6644, - "step": 1897 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.5821, - "step": 1898 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6385, - "step": 1899 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6108, - "step": 1900 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6965, - "step": 1901 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6914, - "step": 1902 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6188, - "step": 1903 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6566, - "step": 1904 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.7075, - "step": 1905 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6389, - "step": 1906 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6284, - "step": 1907 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6064, - "step": 1908 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6151, - "step": 1909 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6825, - "step": 1910 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6277, - "step": 1911 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.681, - "step": 1912 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6606, - "step": 1913 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6586, - "step": 1914 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6731, - "step": 1915 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.4801, - "step": 1916 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.628, - "step": 1917 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6494, - "step": 1918 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6888, - "step": 1919 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6926, - "step": 1920 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6401, - "step": 1921 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6701, - "step": 1922 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6092, - "step": 1923 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6873, - "step": 1924 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.5825, - "step": 1925 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6294, - "step": 1926 - }, - { - "epoch": 0.39, - "learning_rate": 2e-05, - "loss": 0.6203, - "step": 1927 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6123, - "step": 1928 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.5892, - "step": 1929 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6657, - "step": 1930 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6088, - "step": 1931 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6725, - "step": 1932 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.7679, - "step": 1933 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6638, - "step": 1934 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6732, - "step": 1935 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6084, - "step": 1936 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.5639, - "step": 1937 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.643, - "step": 1938 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.7656, - "step": 1939 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.5841, - "step": 1940 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6523, - "step": 1941 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.5688, - "step": 1942 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.5876, - "step": 1943 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.5764, - "step": 1944 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.5671, - "step": 1945 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6282, - "step": 1946 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6446, - "step": 1947 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6212, - "step": 1948 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.7197, - "step": 1949 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.657, - "step": 1950 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.5624, - "step": 1951 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.5896, - "step": 1952 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.7028, - "step": 1953 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6396, - "step": 1954 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6836, - "step": 1955 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.7093, - "step": 1956 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6474, - "step": 1957 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6557, - "step": 1958 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6635, - "step": 1959 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6971, - "step": 1960 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6038, - "step": 1961 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6606, - "step": 1962 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.5316, - "step": 1963 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.5678, - "step": 1964 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6282, - "step": 1965 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.627, - "step": 1966 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6703, - "step": 1967 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.62, - "step": 1968 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6911, - "step": 1969 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6313, - "step": 1970 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.5748, - "step": 1971 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.5943, - "step": 1972 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6297, - "step": 1973 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.6868, - "step": 1974 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.7116, - "step": 1975 - }, - { - "epoch": 0.4, - "learning_rate": 2e-05, - "loss": 0.7112, - "step": 1976 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.7308, - "step": 1977 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6591, - "step": 1978 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.7395, - "step": 1979 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6013, - "step": 1980 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6737, - "step": 1981 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.5201, - "step": 1982 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.7402, - "step": 1983 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6144, - "step": 1984 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.5985, - "step": 1985 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6374, - "step": 1986 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6407, - "step": 1987 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6123, - "step": 1988 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.5937, - "step": 1989 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6523, - "step": 1990 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6047, - "step": 1991 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.5947, - "step": 1992 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.7118, - "step": 1993 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6934, - "step": 1994 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6448, - "step": 1995 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6076, - "step": 1996 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.7217, - "step": 1997 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6117, - "step": 1998 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6281, - "step": 1999 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6746, - "step": 2000 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.5876, - "step": 2001 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.5878, - "step": 2002 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6636, - "step": 2003 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6614, - "step": 2004 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.5251, - "step": 2005 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6993, - "step": 2006 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.5564, - "step": 2007 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.7228, - "step": 2008 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6798, - "step": 2009 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.5752, - "step": 2010 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6509, - "step": 2011 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6953, - "step": 2012 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6818, - "step": 2013 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6386, - "step": 2014 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6247, - "step": 2015 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.5275, - "step": 2016 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.5978, - "step": 2017 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.5442, - "step": 2018 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6417, - "step": 2019 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6663, - "step": 2020 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6203, - "step": 2021 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6442, - "step": 2022 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.6447, - "step": 2023 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.5637, - "step": 2024 - }, - { - "epoch": 0.41, - "learning_rate": 2e-05, - "loss": 0.5508, - "step": 2025 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.5733, - "step": 2026 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6539, - "step": 2027 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6768, - "step": 2028 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6872, - "step": 2029 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.5795, - "step": 2030 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.5534, - "step": 2031 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6816, - "step": 2032 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.657, - "step": 2033 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6587, - "step": 2034 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6691, - "step": 2035 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6083, - "step": 2036 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.656, - "step": 2037 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6793, - "step": 2038 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6849, - "step": 2039 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.674, - "step": 2040 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6871, - "step": 2041 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.703, - "step": 2042 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.666, - "step": 2043 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.5881, - "step": 2044 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6232, - "step": 2045 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.618, - "step": 2046 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.585, - "step": 2047 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.5989, - "step": 2048 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.5504, - "step": 2049 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6311, - "step": 2050 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.5961, - "step": 2051 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6624, - "step": 2052 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.5215, - "step": 2053 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.736, - "step": 2054 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.7164, - "step": 2055 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.5367, - "step": 2056 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6617, - "step": 2057 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6017, - "step": 2058 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.652, - "step": 2059 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6159, - "step": 2060 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6104, - "step": 2061 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6984, - "step": 2062 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.7222, - "step": 2063 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6733, - "step": 2064 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.5981, - "step": 2065 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.5314, - "step": 2066 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6724, - "step": 2067 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.7152, - "step": 2068 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6115, - "step": 2069 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6577, - "step": 2070 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.5852, - "step": 2071 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.5502, - "step": 2072 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.602, - "step": 2073 - }, - { - "epoch": 0.42, - "learning_rate": 2e-05, - "loss": 0.6898, - "step": 2074 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.7251, - "step": 2075 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6396, - "step": 2076 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6974, - "step": 2077 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.5308, - "step": 2078 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6546, - "step": 2079 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.5248, - "step": 2080 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6496, - "step": 2081 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6292, - "step": 2082 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6083, - "step": 2083 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6418, - "step": 2084 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.5793, - "step": 2085 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6706, - "step": 2086 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6607, - "step": 2087 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6056, - "step": 2088 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6542, - "step": 2089 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6759, - "step": 2090 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6692, - "step": 2091 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.5675, - "step": 2092 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.7321, - "step": 2093 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.5772, - "step": 2094 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6055, - "step": 2095 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6345, - "step": 2096 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6248, - "step": 2097 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.7552, - "step": 2098 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6643, - "step": 2099 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.57, - "step": 2100 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6882, - "step": 2101 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6398, - "step": 2102 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6635, - "step": 2103 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6229, - "step": 2104 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6466, - "step": 2105 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6315, - "step": 2106 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.7009, - "step": 2107 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6017, - "step": 2108 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6957, - "step": 2109 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.582, - "step": 2110 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6212, - "step": 2111 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.683, - "step": 2112 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6057, - "step": 2113 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6456, - "step": 2114 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.5829, - "step": 2115 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.654, - "step": 2116 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.7344, - "step": 2117 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.7272, - "step": 2118 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6387, - "step": 2119 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6289, - "step": 2120 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6458, - "step": 2121 - }, - { - "epoch": 0.43, - "learning_rate": 2e-05, - "loss": 0.6254, - "step": 2122 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.5635, - "step": 2123 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.602, - "step": 2124 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.522, - "step": 2125 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.5558, - "step": 2126 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.7041, - "step": 2127 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.5763, - "step": 2128 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6749, - "step": 2129 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6144, - "step": 2130 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.5893, - "step": 2131 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.7274, - "step": 2132 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6426, - "step": 2133 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.5457, - "step": 2134 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6438, - "step": 2135 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6089, - "step": 2136 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6307, - "step": 2137 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.5783, - "step": 2138 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.5079, - "step": 2139 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.5994, - "step": 2140 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.5225, - "step": 2141 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6609, - "step": 2142 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.571, - "step": 2143 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.7049, - "step": 2144 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6804, - "step": 2145 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.5623, - "step": 2146 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.5496, - "step": 2147 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.5998, - "step": 2148 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6527, - "step": 2149 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6207, - "step": 2150 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6734, - "step": 2151 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6653, - "step": 2152 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6343, - "step": 2153 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.5883, - "step": 2154 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6738, - "step": 2155 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.5899, - "step": 2156 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.5956, - "step": 2157 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6279, - "step": 2158 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6692, - "step": 2159 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6036, - "step": 2160 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6379, - "step": 2161 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6624, - "step": 2162 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.531, - "step": 2163 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6045, - "step": 2164 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6419, - "step": 2165 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.5705, - "step": 2166 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.5903, - "step": 2167 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.5458, - "step": 2168 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6597, - "step": 2169 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6799, - "step": 2170 - }, - { - "epoch": 0.44, - "learning_rate": 2e-05, - "loss": 0.6163, - "step": 2171 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6209, - "step": 2172 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6722, - "step": 2173 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6904, - "step": 2174 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6282, - "step": 2175 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.7478, - "step": 2176 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.5154, - "step": 2177 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6039, - "step": 2178 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.7557, - "step": 2179 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.5357, - "step": 2180 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.65, - "step": 2181 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6298, - "step": 2182 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.5897, - "step": 2183 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6573, - "step": 2184 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6131, - "step": 2185 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6899, - "step": 2186 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6092, - "step": 2187 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.7284, - "step": 2188 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6877, - "step": 2189 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6474, - "step": 2190 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6277, - "step": 2191 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.5228, - "step": 2192 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6099, - "step": 2193 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.5998, - "step": 2194 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6355, - "step": 2195 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.5632, - "step": 2196 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.5959, - "step": 2197 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.5757, - "step": 2198 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6557, - "step": 2199 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.5871, - "step": 2200 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.5931, - "step": 2201 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6527, - "step": 2202 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6665, - "step": 2203 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.601, - "step": 2204 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6601, - "step": 2205 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6462, - "step": 2206 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6394, - "step": 2207 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6663, - "step": 2208 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.5891, - "step": 2209 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.5824, - "step": 2210 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.5796, - "step": 2211 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6777, - "step": 2212 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.5918, - "step": 2213 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6042, - "step": 2214 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.6243, - "step": 2215 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.5726, - "step": 2216 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.5934, - "step": 2217 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.7034, - "step": 2218 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.679, - "step": 2219 - }, - { - "epoch": 0.45, - "learning_rate": 2e-05, - "loss": 0.7103, - "step": 2220 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6551, - "step": 2221 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.5996, - "step": 2222 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6084, - "step": 2223 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.613, - "step": 2224 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.627, - "step": 2225 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6199, - "step": 2226 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6168, - "step": 2227 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6496, - "step": 2228 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.5556, - "step": 2229 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6482, - "step": 2230 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.5311, - "step": 2231 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6479, - "step": 2232 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6141, - "step": 2233 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6307, - "step": 2234 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.557, - "step": 2235 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.5734, - "step": 2236 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6602, - "step": 2237 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6509, - "step": 2238 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6112, - "step": 2239 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.5942, - "step": 2240 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6638, - "step": 2241 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6011, - "step": 2242 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6665, - "step": 2243 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.7399, - "step": 2244 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6689, - "step": 2245 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6717, - "step": 2246 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6041, - "step": 2247 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.5348, - "step": 2248 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6339, - "step": 2249 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6756, - "step": 2250 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.7195, - "step": 2251 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.645, - "step": 2252 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.5919, - "step": 2253 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6714, - "step": 2254 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6538, - "step": 2255 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6256, - "step": 2256 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6114, - "step": 2257 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.7101, - "step": 2258 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6184, - "step": 2259 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6216, - "step": 2260 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.5681, - "step": 2261 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.4982, - "step": 2262 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6462, - "step": 2263 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.5496, - "step": 2264 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.5898, - "step": 2265 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6123, - "step": 2266 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.5718, - "step": 2267 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6179, - "step": 2268 - }, - { - "epoch": 0.46, - "learning_rate": 2e-05, - "loss": 0.6371, - "step": 2269 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6981, - "step": 2270 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.5284, - "step": 2271 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6835, - "step": 2272 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.5876, - "step": 2273 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6186, - "step": 2274 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.5773, - "step": 2275 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.521, - "step": 2276 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.5708, - "step": 2277 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6213, - "step": 2278 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.5823, - "step": 2279 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.674, - "step": 2280 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6071, - "step": 2281 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.5759, - "step": 2282 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.5898, - "step": 2283 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6078, - "step": 2284 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6493, - "step": 2285 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6432, - "step": 2286 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6356, - "step": 2287 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.5564, - "step": 2288 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6686, - "step": 2289 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6642, - "step": 2290 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.5947, - "step": 2291 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.5797, - "step": 2292 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6364, - "step": 2293 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.675, - "step": 2294 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6084, - "step": 2295 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6242, - "step": 2296 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.5868, - "step": 2297 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.5157, - "step": 2298 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6515, - "step": 2299 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6017, - "step": 2300 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6379, - "step": 2301 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.654, - "step": 2302 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6957, - "step": 2303 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.644, - "step": 2304 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6264, - "step": 2305 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.7318, - "step": 2306 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.7089, - "step": 2307 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.5667, - "step": 2308 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.5908, - "step": 2309 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6266, - "step": 2310 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.615, - "step": 2311 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.5829, - "step": 2312 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.5995, - "step": 2313 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.5969, - "step": 2314 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6659, - "step": 2315 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6617, - "step": 2316 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6474, - "step": 2317 - }, - { - "epoch": 0.47, - "learning_rate": 2e-05, - "loss": 0.6725, - "step": 2318 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6218, - "step": 2319 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6423, - "step": 2320 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.5489, - "step": 2321 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.5611, - "step": 2322 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6407, - "step": 2323 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6468, - "step": 2324 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6178, - "step": 2325 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.701, - "step": 2326 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.5771, - "step": 2327 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.7026, - "step": 2328 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6645, - "step": 2329 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6644, - "step": 2330 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6127, - "step": 2331 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.5854, - "step": 2332 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6469, - "step": 2333 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6832, - "step": 2334 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6477, - "step": 2335 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6198, - "step": 2336 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6739, - "step": 2337 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6559, - "step": 2338 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6323, - "step": 2339 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.5659, - "step": 2340 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6306, - "step": 2341 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6338, - "step": 2342 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.7173, - "step": 2343 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.5341, - "step": 2344 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6326, - "step": 2345 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.664, - "step": 2346 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6186, - "step": 2347 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6197, - "step": 2348 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6725, - "step": 2349 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.5033, - "step": 2350 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.5595, - "step": 2351 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.5711, - "step": 2352 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.5944, - "step": 2353 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6512, - "step": 2354 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6746, - "step": 2355 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6385, - "step": 2356 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6252, - "step": 2357 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6149, - "step": 2358 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.8232, - "step": 2359 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.5601, - "step": 2360 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6035, - "step": 2361 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6625, - "step": 2362 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6581, - "step": 2363 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6861, - "step": 2364 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6997, - "step": 2365 - }, - { - "epoch": 0.48, - "learning_rate": 2e-05, - "loss": 0.6732, - "step": 2366 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6453, - "step": 2367 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.5727, - "step": 2368 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6154, - "step": 2369 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.5827, - "step": 2370 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6498, - "step": 2371 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6865, - "step": 2372 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6142, - "step": 2373 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.5367, - "step": 2374 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6574, - "step": 2375 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.5727, - "step": 2376 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6224, - "step": 2377 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.728, - "step": 2378 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.5914, - "step": 2379 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.653, - "step": 2380 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6889, - "step": 2381 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.5872, - "step": 2382 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6495, - "step": 2383 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.587, - "step": 2384 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6934, - "step": 2385 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6607, - "step": 2386 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.5986, - "step": 2387 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.5844, - "step": 2388 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.593, - "step": 2389 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.5707, - "step": 2390 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6785, - "step": 2391 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6377, - "step": 2392 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6155, - "step": 2393 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6246, - "step": 2394 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.5711, - "step": 2395 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.59, - "step": 2396 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6334, - "step": 2397 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6511, - "step": 2398 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6115, - "step": 2399 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6085, - "step": 2400 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.662, - "step": 2401 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6501, - "step": 2402 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6868, - "step": 2403 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6633, - "step": 2404 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6526, - "step": 2405 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.5685, - "step": 2406 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6836, - "step": 2407 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6667, - "step": 2408 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.5891, - "step": 2409 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6501, - "step": 2410 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.685, - "step": 2411 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6862, - "step": 2412 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.5955, - "step": 2413 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.6597, - "step": 2414 - }, - { - "epoch": 0.49, - "learning_rate": 2e-05, - "loss": 0.5852, - "step": 2415 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.7052, - "step": 2416 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.7438, - "step": 2417 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6511, - "step": 2418 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6519, - "step": 2419 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.5776, - "step": 2420 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6277, - "step": 2421 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6609, - "step": 2422 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6042, - "step": 2423 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6448, - "step": 2424 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6376, - "step": 2425 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6378, - "step": 2426 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.5565, - "step": 2427 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6392, - "step": 2428 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.5278, - "step": 2429 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6372, - "step": 2430 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.7268, - "step": 2431 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.622, - "step": 2432 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6172, - "step": 2433 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.5826, - "step": 2434 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6632, - "step": 2435 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.647, - "step": 2436 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.5847, - "step": 2437 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.5638, - "step": 2438 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6642, - "step": 2439 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6594, - "step": 2440 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6305, - "step": 2441 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.5733, - "step": 2442 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.5285, - "step": 2443 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.7061, - "step": 2444 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.631, - "step": 2445 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6467, - "step": 2446 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6574, - "step": 2447 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.662, - "step": 2448 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6797, - "step": 2449 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.555, - "step": 2450 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.5928, - "step": 2451 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6215, - "step": 2452 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6448, - "step": 2453 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6445, - "step": 2454 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6551, - "step": 2455 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.6021, - "step": 2456 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.5339, - "step": 2457 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.5972, - "step": 2458 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.566, - "step": 2459 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.5384, - "step": 2460 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.5965, - "step": 2461 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.5882, - "step": 2462 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.5994, - "step": 2463 - }, - { - "epoch": 0.5, - "learning_rate": 2e-05, - "loss": 0.7526, - "step": 2464 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.5372, - "step": 2465 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.528, - "step": 2466 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.5972, - "step": 2467 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.641, - "step": 2468 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6076, - "step": 2469 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6366, - "step": 2470 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.7078, - "step": 2471 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6699, - "step": 2472 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6969, - "step": 2473 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6449, - "step": 2474 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.509, - "step": 2475 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.5836, - "step": 2476 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6106, - "step": 2477 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.587, - "step": 2478 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6541, - "step": 2479 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.652, - "step": 2480 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6392, - "step": 2481 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.4807, - "step": 2482 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.5952, - "step": 2483 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.5775, - "step": 2484 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6922, - "step": 2485 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6854, - "step": 2486 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.686, - "step": 2487 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.7139, - "step": 2488 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.5932, - "step": 2489 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.607, - "step": 2490 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.5184, - "step": 2491 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6095, - "step": 2492 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.68, - "step": 2493 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6059, - "step": 2494 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6481, - "step": 2495 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.768, - "step": 2496 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.589, - "step": 2497 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6391, - "step": 2498 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6293, - "step": 2499 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6606, - "step": 2500 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6698, - "step": 2501 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.5372, - "step": 2502 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6615, - "step": 2503 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.5788, - "step": 2504 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6436, - "step": 2505 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6039, - "step": 2506 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.5907, - "step": 2507 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6239, - "step": 2508 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.5945, - "step": 2509 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.5594, - "step": 2510 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6328, - "step": 2511 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.6059, - "step": 2512 - }, - { - "epoch": 0.51, - "learning_rate": 2e-05, - "loss": 0.601, - "step": 2513 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.5872, - "step": 2514 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6579, - "step": 2515 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6236, - "step": 2516 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.7246, - "step": 2517 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.5691, - "step": 2518 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6519, - "step": 2519 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6728, - "step": 2520 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.5995, - "step": 2521 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.5802, - "step": 2522 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.7707, - "step": 2523 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.7127, - "step": 2524 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6228, - "step": 2525 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6667, - "step": 2526 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.7074, - "step": 2527 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6909, - "step": 2528 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6213, - "step": 2529 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6344, - "step": 2530 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6914, - "step": 2531 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.5493, - "step": 2532 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.5979, - "step": 2533 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6285, - "step": 2534 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.66, - "step": 2535 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.668, - "step": 2536 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6359, - "step": 2537 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6576, - "step": 2538 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6721, - "step": 2539 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6477, - "step": 2540 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6434, - "step": 2541 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6243, - "step": 2542 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6894, - "step": 2543 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.5829, - "step": 2544 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6346, - "step": 2545 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6127, - "step": 2546 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6457, - "step": 2547 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.636, - "step": 2548 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.7277, - "step": 2549 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.5404, - "step": 2550 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.5154, - "step": 2551 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6798, - "step": 2552 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6224, - "step": 2553 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.7097, - "step": 2554 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6131, - "step": 2555 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.7336, - "step": 2556 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6266, - "step": 2557 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6505, - "step": 2558 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.7309, - "step": 2559 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.5792, - "step": 2560 - }, - { - "epoch": 0.52, - "learning_rate": 2e-05, - "loss": 0.6298, - "step": 2561 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6225, - "step": 2562 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6557, - "step": 2563 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6578, - "step": 2564 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6742, - "step": 2565 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6239, - "step": 2566 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6426, - "step": 2567 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6561, - "step": 2568 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.5907, - "step": 2569 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.682, - "step": 2570 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6605, - "step": 2571 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6487, - "step": 2572 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6046, - "step": 2573 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.7092, - "step": 2574 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.7532, - "step": 2575 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.612, - "step": 2576 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6, - "step": 2577 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.5703, - "step": 2578 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.5537, - "step": 2579 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6666, - "step": 2580 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6694, - "step": 2581 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6227, - "step": 2582 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6804, - "step": 2583 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6692, - "step": 2584 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.561, - "step": 2585 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.5832, - "step": 2586 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6708, - "step": 2587 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6052, - "step": 2588 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.5919, - "step": 2589 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.7056, - "step": 2590 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6208, - "step": 2591 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6733, - "step": 2592 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6541, - "step": 2593 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.554, - "step": 2594 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6426, - "step": 2595 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6891, - "step": 2596 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.5588, - "step": 2597 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6653, - "step": 2598 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6599, - "step": 2599 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.5808, - "step": 2600 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6651, - "step": 2601 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6819, - "step": 2602 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.5993, - "step": 2603 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6259, - "step": 2604 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6691, - "step": 2605 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6605, - "step": 2606 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6028, - "step": 2607 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.5948, - "step": 2608 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6433, - "step": 2609 - }, - { - "epoch": 0.53, - "learning_rate": 2e-05, - "loss": 0.6646, - "step": 2610 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.5787, - "step": 2611 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.5897, - "step": 2612 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.5985, - "step": 2613 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6154, - "step": 2614 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6267, - "step": 2615 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6598, - "step": 2616 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6718, - "step": 2617 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6368, - "step": 2618 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6087, - "step": 2619 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6818, - "step": 2620 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6273, - "step": 2621 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.5864, - "step": 2622 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.577, - "step": 2623 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6529, - "step": 2624 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.5946, - "step": 2625 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6678, - "step": 2626 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.595, - "step": 2627 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6203, - "step": 2628 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.5782, - "step": 2629 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.5456, - "step": 2630 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.5877, - "step": 2631 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6654, - "step": 2632 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6436, - "step": 2633 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6401, - "step": 2634 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6186, - "step": 2635 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.5776, - "step": 2636 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6747, - "step": 2637 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.5877, - "step": 2638 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.5756, - "step": 2639 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6469, - "step": 2640 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6442, - "step": 2641 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6372, - "step": 2642 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.627, - "step": 2643 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6121, - "step": 2644 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6471, - "step": 2645 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6089, - "step": 2646 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6514, - "step": 2647 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6431, - "step": 2648 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6628, - "step": 2649 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.5886, - "step": 2650 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6469, - "step": 2651 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.585, - "step": 2652 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6221, - "step": 2653 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6249, - "step": 2654 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.5501, - "step": 2655 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.5652, - "step": 2656 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.5429, - "step": 2657 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6649, - "step": 2658 - }, - { - "epoch": 0.54, - "learning_rate": 2e-05, - "loss": 0.6663, - "step": 2659 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6933, - "step": 2660 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.5742, - "step": 2661 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6893, - "step": 2662 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6396, - "step": 2663 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6725, - "step": 2664 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.5966, - "step": 2665 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6405, - "step": 2666 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6348, - "step": 2667 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6313, - "step": 2668 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.5485, - "step": 2669 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.5887, - "step": 2670 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6813, - "step": 2671 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.7084, - "step": 2672 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.5974, - "step": 2673 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6155, - "step": 2674 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6129, - "step": 2675 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.5948, - "step": 2676 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.676, - "step": 2677 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.5447, - "step": 2678 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6383, - "step": 2679 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6223, - "step": 2680 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6521, - "step": 2681 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.5364, - "step": 2682 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.5848, - "step": 2683 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6116, - "step": 2684 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6893, - "step": 2685 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6532, - "step": 2686 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6486, - "step": 2687 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.5852, - "step": 2688 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6207, - "step": 2689 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6617, - "step": 2690 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6579, - "step": 2691 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6402, - "step": 2692 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.5801, - "step": 2693 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6602, - "step": 2694 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6319, - "step": 2695 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6458, - "step": 2696 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.5485, - "step": 2697 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.646, - "step": 2698 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6701, - "step": 2699 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6926, - "step": 2700 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.7041, - "step": 2701 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.5302, - "step": 2702 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6329, - "step": 2703 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6172, - "step": 2704 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6509, - "step": 2705 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6937, - "step": 2706 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.6749, - "step": 2707 - }, - { - "epoch": 0.55, - "learning_rate": 2e-05, - "loss": 0.5693, - "step": 2708 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6359, - "step": 2709 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.5941, - "step": 2710 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6377, - "step": 2711 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.5596, - "step": 2712 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.7149, - "step": 2713 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6384, - "step": 2714 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.582, - "step": 2715 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.573, - "step": 2716 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6477, - "step": 2717 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6641, - "step": 2718 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6993, - "step": 2719 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6889, - "step": 2720 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6712, - "step": 2721 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.7548, - "step": 2722 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.5614, - "step": 2723 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.5537, - "step": 2724 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.563, - "step": 2725 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.7226, - "step": 2726 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6076, - "step": 2727 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.7257, - "step": 2728 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.7209, - "step": 2729 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6338, - "step": 2730 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6143, - "step": 2731 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6878, - "step": 2732 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6367, - "step": 2733 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6083, - "step": 2734 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.579, - "step": 2735 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6503, - "step": 2736 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.5454, - "step": 2737 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.5997, - "step": 2738 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.5939, - "step": 2739 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.5859, - "step": 2740 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6319, - "step": 2741 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.5415, - "step": 2742 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.5619, - "step": 2743 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6488, - "step": 2744 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6238, - "step": 2745 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6223, - "step": 2746 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.5888, - "step": 2747 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6702, - "step": 2748 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6637, - "step": 2749 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6916, - "step": 2750 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6381, - "step": 2751 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.5832, - "step": 2752 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.5877, - "step": 2753 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.5623, - "step": 2754 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6178, - "step": 2755 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.5361, - "step": 2756 - }, - { - "epoch": 0.56, - "learning_rate": 2e-05, - "loss": 0.6697, - "step": 2757 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.5953, - "step": 2758 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.5498, - "step": 2759 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.5889, - "step": 2760 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6088, - "step": 2761 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6734, - "step": 2762 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.5591, - "step": 2763 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.5906, - "step": 2764 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.7074, - "step": 2765 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6229, - "step": 2766 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.5673, - "step": 2767 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6339, - "step": 2768 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6642, - "step": 2769 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.631, - "step": 2770 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6646, - "step": 2771 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.5906, - "step": 2772 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6694, - "step": 2773 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6653, - "step": 2774 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6955, - "step": 2775 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.7395, - "step": 2776 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6329, - "step": 2777 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6488, - "step": 2778 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.57, - "step": 2779 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.7209, - "step": 2780 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.5792, - "step": 2781 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.7018, - "step": 2782 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.5482, - "step": 2783 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6234, - "step": 2784 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6082, - "step": 2785 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6288, - "step": 2786 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.5776, - "step": 2787 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.5799, - "step": 2788 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6092, - "step": 2789 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.5751, - "step": 2790 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.5916, - "step": 2791 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6593, - "step": 2792 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6282, - "step": 2793 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6048, - "step": 2794 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.5859, - "step": 2795 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.7095, - "step": 2796 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.7051, - "step": 2797 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6125, - "step": 2798 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.5874, - "step": 2799 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6862, - "step": 2800 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6305, - "step": 2801 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6225, - "step": 2802 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6191, - "step": 2803 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.5983, - "step": 2804 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.6724, - "step": 2805 - }, - { - "epoch": 0.57, - "learning_rate": 2e-05, - "loss": 0.5826, - "step": 2806 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.4962, - "step": 2807 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6562, - "step": 2808 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6397, - "step": 2809 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.5877, - "step": 2810 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.682, - "step": 2811 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.5949, - "step": 2812 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6141, - "step": 2813 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6656, - "step": 2814 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6061, - "step": 2815 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.5822, - "step": 2816 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.7127, - "step": 2817 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.5622, - "step": 2818 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6404, - "step": 2819 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.688, - "step": 2820 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6347, - "step": 2821 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6374, - "step": 2822 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6796, - "step": 2823 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6582, - "step": 2824 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.5922, - "step": 2825 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.5898, - "step": 2826 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6627, - "step": 2827 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6565, - "step": 2828 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.5815, - "step": 2829 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.5564, - "step": 2830 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.5954, - "step": 2831 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.5891, - "step": 2832 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6667, - "step": 2833 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6434, - "step": 2834 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.668, - "step": 2835 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6878, - "step": 2836 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.663, - "step": 2837 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.5806, - "step": 2838 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.5877, - "step": 2839 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.5877, - "step": 2840 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.5908, - "step": 2841 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6167, - "step": 2842 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6475, - "step": 2843 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6229, - "step": 2844 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.5924, - "step": 2845 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.5439, - "step": 2846 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6261, - "step": 2847 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.5962, - "step": 2848 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6447, - "step": 2849 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.637, - "step": 2850 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6479, - "step": 2851 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.665, - "step": 2852 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.6033, - "step": 2853 - }, - { - "epoch": 0.58, - "learning_rate": 2e-05, - "loss": 0.5583, - "step": 2854 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.5807, - "step": 2855 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6346, - "step": 2856 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.5858, - "step": 2857 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6581, - "step": 2858 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6685, - "step": 2859 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6131, - "step": 2860 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.575, - "step": 2861 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.5971, - "step": 2862 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6491, - "step": 2863 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.517, - "step": 2864 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6169, - "step": 2865 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.5841, - "step": 2866 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6437, - "step": 2867 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.684, - "step": 2868 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6617, - "step": 2869 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.5573, - "step": 2870 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.5788, - "step": 2871 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.5737, - "step": 2872 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6178, - "step": 2873 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6292, - "step": 2874 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6976, - "step": 2875 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.5643, - "step": 2876 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6781, - "step": 2877 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6664, - "step": 2878 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6022, - "step": 2879 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.5809, - "step": 2880 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.5691, - "step": 2881 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.7112, - "step": 2882 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6138, - "step": 2883 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6188, - "step": 2884 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6052, - "step": 2885 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.693, - "step": 2886 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6209, - "step": 2887 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6264, - "step": 2888 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.5119, - "step": 2889 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.4877, - "step": 2890 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6146, - "step": 2891 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.5075, - "step": 2892 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6338, - "step": 2893 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6527, - "step": 2894 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6572, - "step": 2895 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6663, - "step": 2896 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.641, - "step": 2897 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6651, - "step": 2898 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6709, - "step": 2899 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6828, - "step": 2900 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.5939, - "step": 2901 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.5238, - "step": 2902 - }, - { - "epoch": 0.59, - "learning_rate": 2e-05, - "loss": 0.6681, - "step": 2903 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.653, - "step": 2904 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6447, - "step": 2905 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.5814, - "step": 2906 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.5896, - "step": 2907 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.5722, - "step": 2908 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6446, - "step": 2909 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6457, - "step": 2910 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.5332, - "step": 2911 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6352, - "step": 2912 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6174, - "step": 2913 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.5535, - "step": 2914 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.5556, - "step": 2915 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6433, - "step": 2916 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.5279, - "step": 2917 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.7602, - "step": 2918 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6375, - "step": 2919 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.5531, - "step": 2920 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6269, - "step": 2921 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.5801, - "step": 2922 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.575, - "step": 2923 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.614, - "step": 2924 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6612, - "step": 2925 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.5827, - "step": 2926 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.5809, - "step": 2927 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6156, - "step": 2928 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6497, - "step": 2929 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6342, - "step": 2930 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6436, - "step": 2931 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6257, - "step": 2932 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6566, - "step": 2933 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.552, - "step": 2934 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.667, - "step": 2935 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6558, - "step": 2936 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.5543, - "step": 2937 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.654, - "step": 2938 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.5791, - "step": 2939 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.5693, - "step": 2940 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6772, - "step": 2941 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6018, - "step": 2942 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.5483, - "step": 2943 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.5912, - "step": 2944 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.5548, - "step": 2945 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6507, - "step": 2946 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6633, - "step": 2947 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6134, - "step": 2948 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6224, - "step": 2949 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.58, - "step": 2950 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6196, - "step": 2951 - }, - { - "epoch": 0.6, - "learning_rate": 2e-05, - "loss": 0.6089, - "step": 2952 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6375, - "step": 2953 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6526, - "step": 2954 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6471, - "step": 2955 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6873, - "step": 2956 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6049, - "step": 2957 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6496, - "step": 2958 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.5999, - "step": 2959 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.5646, - "step": 2960 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6138, - "step": 2961 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6828, - "step": 2962 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6962, - "step": 2963 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.7002, - "step": 2964 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.649, - "step": 2965 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.595, - "step": 2966 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6428, - "step": 2967 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6445, - "step": 2968 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.5838, - "step": 2969 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.5802, - "step": 2970 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.5558, - "step": 2971 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6317, - "step": 2972 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6669, - "step": 2973 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.64, - "step": 2974 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6432, - "step": 2975 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.5538, - "step": 2976 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.5424, - "step": 2977 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6296, - "step": 2978 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6735, - "step": 2979 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6836, - "step": 2980 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6532, - "step": 2981 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.7099, - "step": 2982 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6624, - "step": 2983 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6404, - "step": 2984 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6428, - "step": 2985 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.5748, - "step": 2986 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6477, - "step": 2987 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6639, - "step": 2988 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6019, - "step": 2989 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6511, - "step": 2990 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6307, - "step": 2991 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6216, - "step": 2992 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6348, - "step": 2993 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.5943, - "step": 2994 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.7219, - "step": 2995 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.5684, - "step": 2996 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6397, - "step": 2997 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6196, - "step": 2998 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6001, - "step": 2999 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.73, - "step": 3000 - }, - { - "epoch": 0.61, - "learning_rate": 2e-05, - "loss": 0.6626, - "step": 3001 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6548, - "step": 3002 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6231, - "step": 3003 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6317, - "step": 3004 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6098, - "step": 3005 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.5282, - "step": 3006 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6268, - "step": 3007 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.562, - "step": 3008 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.5521, - "step": 3009 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6476, - "step": 3010 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.5945, - "step": 3011 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6244, - "step": 3012 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6936, - "step": 3013 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.552, - "step": 3014 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6169, - "step": 3015 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6332, - "step": 3016 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.715, - "step": 3017 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.613, - "step": 3018 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.5982, - "step": 3019 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6818, - "step": 3020 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6225, - "step": 3021 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6441, - "step": 3022 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6441, - "step": 3023 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6982, - "step": 3024 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.592, - "step": 3025 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6166, - "step": 3026 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.5687, - "step": 3027 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6396, - "step": 3028 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6371, - "step": 3029 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6404, - "step": 3030 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6221, - "step": 3031 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.5671, - "step": 3032 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6621, - "step": 3033 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.5951, - "step": 3034 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6407, - "step": 3035 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6584, - "step": 3036 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6232, - "step": 3037 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.5785, - "step": 3038 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.5695, - "step": 3039 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.5846, - "step": 3040 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.5634, - "step": 3041 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.5879, - "step": 3042 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.66, - "step": 3043 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6465, - "step": 3044 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.5377, - "step": 3045 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6763, - "step": 3046 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.6369, - "step": 3047 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.7336, - "step": 3048 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.5884, - "step": 3049 - }, - { - "epoch": 0.62, - "learning_rate": 2e-05, - "loss": 0.576, - "step": 3050 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.5018, - "step": 3051 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.5866, - "step": 3052 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.601, - "step": 3053 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.56, - "step": 3054 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.5853, - "step": 3055 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.673, - "step": 3056 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.5726, - "step": 3057 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.7053, - "step": 3058 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.5819, - "step": 3059 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.7131, - "step": 3060 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6461, - "step": 3061 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6543, - "step": 3062 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6288, - "step": 3063 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.5768, - "step": 3064 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.7152, - "step": 3065 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.573, - "step": 3066 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6162, - "step": 3067 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6367, - "step": 3068 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.5854, - "step": 3069 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.5876, - "step": 3070 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.5715, - "step": 3071 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.5328, - "step": 3072 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6704, - "step": 3073 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6475, - "step": 3074 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.5733, - "step": 3075 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.5768, - "step": 3076 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.574, - "step": 3077 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6771, - "step": 3078 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.621, - "step": 3079 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6634, - "step": 3080 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6122, - "step": 3081 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6032, - "step": 3082 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6549, - "step": 3083 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6205, - "step": 3084 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6944, - "step": 3085 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6567, - "step": 3086 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.54, - "step": 3087 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.584, - "step": 3088 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6703, - "step": 3089 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.7397, - "step": 3090 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.5997, - "step": 3091 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6544, - "step": 3092 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6611, - "step": 3093 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6511, - "step": 3094 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.543, - "step": 3095 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6246, - "step": 3096 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6373, - "step": 3097 - }, - { - "epoch": 0.63, - "learning_rate": 2e-05, - "loss": 0.6696, - "step": 3098 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6588, - "step": 3099 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.683, - "step": 3100 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6588, - "step": 3101 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6261, - "step": 3102 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6205, - "step": 3103 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6988, - "step": 3104 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6254, - "step": 3105 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.7105, - "step": 3106 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6727, - "step": 3107 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6823, - "step": 3108 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.662, - "step": 3109 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6327, - "step": 3110 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.5635, - "step": 3111 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.5977, - "step": 3112 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6708, - "step": 3113 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.5913, - "step": 3114 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6536, - "step": 3115 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6834, - "step": 3116 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.661, - "step": 3117 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6314, - "step": 3118 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.564, - "step": 3119 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.5826, - "step": 3120 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.659, - "step": 3121 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.5927, - "step": 3122 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.5881, - "step": 3123 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.7185, - "step": 3124 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6515, - "step": 3125 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6201, - "step": 3126 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6574, - "step": 3127 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.5592, - "step": 3128 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.5791, - "step": 3129 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6122, - "step": 3130 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6873, - "step": 3131 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.5922, - "step": 3132 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.586, - "step": 3133 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6696, - "step": 3134 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6058, - "step": 3135 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6262, - "step": 3136 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.5972, - "step": 3137 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6637, - "step": 3138 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6744, - "step": 3139 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6583, - "step": 3140 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6925, - "step": 3141 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.615, - "step": 3142 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6145, - "step": 3143 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6636, - "step": 3144 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6423, - "step": 3145 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.6638, - "step": 3146 - }, - { - "epoch": 0.64, - "learning_rate": 2e-05, - "loss": 0.5544, - "step": 3147 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.5855, - "step": 3148 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6845, - "step": 3149 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6332, - "step": 3150 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6418, - "step": 3151 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6695, - "step": 3152 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.5802, - "step": 3153 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6395, - "step": 3154 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6467, - "step": 3155 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.552, - "step": 3156 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6074, - "step": 3157 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.5949, - "step": 3158 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6306, - "step": 3159 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6151, - "step": 3160 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.666, - "step": 3161 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.5923, - "step": 3162 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6071, - "step": 3163 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.5807, - "step": 3164 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.5796, - "step": 3165 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.5917, - "step": 3166 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.7181, - "step": 3167 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.7303, - "step": 3168 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.5746, - "step": 3169 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.5799, - "step": 3170 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.5459, - "step": 3171 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.5648, - "step": 3172 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6756, - "step": 3173 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6092, - "step": 3174 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.563, - "step": 3175 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6236, - "step": 3176 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6575, - "step": 3177 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6162, - "step": 3178 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6003, - "step": 3179 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.5867, - "step": 3180 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.615, - "step": 3181 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.5885, - "step": 3182 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6793, - "step": 3183 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.5712, - "step": 3184 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6018, - "step": 3185 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6031, - "step": 3186 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6565, - "step": 3187 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6778, - "step": 3188 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.653, - "step": 3189 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6807, - "step": 3190 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6326, - "step": 3191 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6361, - "step": 3192 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.5701, - "step": 3193 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6245, - "step": 3194 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.5378, - "step": 3195 - }, - { - "epoch": 0.65, - "learning_rate": 2e-05, - "loss": 0.6243, - "step": 3196 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.593, - "step": 3197 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.5684, - "step": 3198 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.5958, - "step": 3199 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6335, - "step": 3200 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.598, - "step": 3201 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6614, - "step": 3202 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.5647, - "step": 3203 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.5798, - "step": 3204 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.5828, - "step": 3205 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6701, - "step": 3206 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6124, - "step": 3207 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6064, - "step": 3208 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.5923, - "step": 3209 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6193, - "step": 3210 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6716, - "step": 3211 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6523, - "step": 3212 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6838, - "step": 3213 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.5597, - "step": 3214 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6521, - "step": 3215 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6458, - "step": 3216 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.5371, - "step": 3217 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.618, - "step": 3218 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.5904, - "step": 3219 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6092, - "step": 3220 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.623, - "step": 3221 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6739, - "step": 3222 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.623, - "step": 3223 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6892, - "step": 3224 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.5872, - "step": 3225 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.5195, - "step": 3226 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.5945, - "step": 3227 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6269, - "step": 3228 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.7308, - "step": 3229 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6847, - "step": 3230 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.7009, - "step": 3231 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6055, - "step": 3232 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6149, - "step": 3233 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6239, - "step": 3234 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.5657, - "step": 3235 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6557, - "step": 3236 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.641, - "step": 3237 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.5901, - "step": 3238 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.606, - "step": 3239 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.7035, - "step": 3240 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.5786, - "step": 3241 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6128, - "step": 3242 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.613, - "step": 3243 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.7325, - "step": 3244 - }, - { - "epoch": 0.66, - "learning_rate": 2e-05, - "loss": 0.6015, - "step": 3245 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6549, - "step": 3246 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5526, - "step": 3247 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5932, - "step": 3248 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5813, - "step": 3249 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5902, - "step": 3250 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5815, - "step": 3251 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6674, - "step": 3252 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6566, - "step": 3253 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5853, - "step": 3254 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6627, - "step": 3255 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6663, - "step": 3256 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5825, - "step": 3257 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5821, - "step": 3258 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5938, - "step": 3259 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5578, - "step": 3260 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5464, - "step": 3261 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.7192, - "step": 3262 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6788, - "step": 3263 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.4343, - "step": 3264 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.8037, - "step": 3265 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5383, - "step": 3266 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.635, - "step": 3267 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6292, - "step": 3268 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5852, - "step": 3269 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6135, - "step": 3270 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.709, - "step": 3271 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.716, - "step": 3272 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6312, - "step": 3273 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6192, - "step": 3274 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5404, - "step": 3275 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5871, - "step": 3276 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6241, - "step": 3277 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5949, - "step": 3278 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6358, - "step": 3279 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6145, - "step": 3280 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5875, - "step": 3281 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6169, - "step": 3282 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5507, - "step": 3283 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6351, - "step": 3284 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6508, - "step": 3285 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6409, - "step": 3286 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5428, - "step": 3287 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6007, - "step": 3288 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5093, - "step": 3289 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.5981, - "step": 3290 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.7142, - "step": 3291 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6049, - "step": 3292 - }, - { - "epoch": 0.67, - "learning_rate": 2e-05, - "loss": 0.6766, - "step": 3293 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.5471, - "step": 3294 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.5858, - "step": 3295 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6009, - "step": 3296 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6134, - "step": 3297 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6585, - "step": 3298 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.4991, - "step": 3299 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.552, - "step": 3300 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6072, - "step": 3301 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6227, - "step": 3302 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6592, - "step": 3303 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6129, - "step": 3304 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6053, - "step": 3305 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6896, - "step": 3306 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.5797, - "step": 3307 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.5925, - "step": 3308 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.68, - "step": 3309 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6208, - "step": 3310 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6473, - "step": 3311 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6133, - "step": 3312 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6133, - "step": 3313 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.5463, - "step": 3314 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.5623, - "step": 3315 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6884, - "step": 3316 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.4935, - "step": 3317 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6539, - "step": 3318 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.5346, - "step": 3319 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.5581, - "step": 3320 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.5712, - "step": 3321 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.5993, - "step": 3322 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6208, - "step": 3323 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6842, - "step": 3324 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.7104, - "step": 3325 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6709, - "step": 3326 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.5245, - "step": 3327 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.4702, - "step": 3328 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6378, - "step": 3329 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6187, - "step": 3330 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.5984, - "step": 3331 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.5959, - "step": 3332 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6424, - "step": 3333 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6286, - "step": 3334 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6067, - "step": 3335 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.641, - "step": 3336 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6336, - "step": 3337 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.5324, - "step": 3338 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.5682, - "step": 3339 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.6221, - "step": 3340 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.5308, - "step": 3341 - }, - { - "epoch": 0.68, - "learning_rate": 2e-05, - "loss": 0.5294, - "step": 3342 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6093, - "step": 3343 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6625, - "step": 3344 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6601, - "step": 3345 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.7123, - "step": 3346 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6007, - "step": 3347 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6428, - "step": 3348 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.5993, - "step": 3349 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6146, - "step": 3350 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6277, - "step": 3351 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6585, - "step": 3352 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6568, - "step": 3353 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.5992, - "step": 3354 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.5271, - "step": 3355 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.5888, - "step": 3356 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6004, - "step": 3357 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6257, - "step": 3358 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.529, - "step": 3359 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6351, - "step": 3360 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.7126, - "step": 3361 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6145, - "step": 3362 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.5767, - "step": 3363 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.5563, - "step": 3364 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.5996, - "step": 3365 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.5536, - "step": 3366 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6363, - "step": 3367 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.7099, - "step": 3368 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6449, - "step": 3369 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.612, - "step": 3370 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6599, - "step": 3371 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6883, - "step": 3372 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.5474, - "step": 3373 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.5486, - "step": 3374 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.5697, - "step": 3375 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.5692, - "step": 3376 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.5983, - "step": 3377 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6239, - "step": 3378 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.51, - "step": 3379 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.5485, - "step": 3380 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.5779, - "step": 3381 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6084, - "step": 3382 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6798, - "step": 3383 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6685, - "step": 3384 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.5769, - "step": 3385 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.58, - "step": 3386 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6136, - "step": 3387 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6381, - "step": 3388 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6428, - "step": 3389 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6795, - "step": 3390 - }, - { - "epoch": 0.69, - "learning_rate": 2e-05, - "loss": 0.6354, - "step": 3391 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6454, - "step": 3392 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6008, - "step": 3393 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6488, - "step": 3394 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6648, - "step": 3395 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.5928, - "step": 3396 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6264, - "step": 3397 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6625, - "step": 3398 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6501, - "step": 3399 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6549, - "step": 3400 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6139, - "step": 3401 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6232, - "step": 3402 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6473, - "step": 3403 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.5778, - "step": 3404 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6365, - "step": 3405 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6061, - "step": 3406 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6395, - "step": 3407 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.5831, - "step": 3408 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6119, - "step": 3409 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.613, - "step": 3410 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6758, - "step": 3411 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.5957, - "step": 3412 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6629, - "step": 3413 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.5447, - "step": 3414 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.639, - "step": 3415 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6384, - "step": 3416 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.629, - "step": 3417 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.655, - "step": 3418 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6065, - "step": 3419 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.5559, - "step": 3420 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6733, - "step": 3421 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.5442, - "step": 3422 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6429, - "step": 3423 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6227, - "step": 3424 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6819, - "step": 3425 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6105, - "step": 3426 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6507, - "step": 3427 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6816, - "step": 3428 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6884, - "step": 3429 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6066, - "step": 3430 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6652, - "step": 3431 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.5702, - "step": 3432 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.5443, - "step": 3433 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6195, - "step": 3434 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.5429, - "step": 3435 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6298, - "step": 3436 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6244, - "step": 3437 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6402, - "step": 3438 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6688, - "step": 3439 - }, - { - "epoch": 0.7, - "learning_rate": 2e-05, - "loss": 0.6463, - "step": 3440 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.625, - "step": 3441 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.5165, - "step": 3442 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.617, - "step": 3443 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6797, - "step": 3444 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.5861, - "step": 3445 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6088, - "step": 3446 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.7004, - "step": 3447 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6922, - "step": 3448 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.547, - "step": 3449 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6619, - "step": 3450 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.5535, - "step": 3451 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6374, - "step": 3452 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6431, - "step": 3453 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6551, - "step": 3454 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6593, - "step": 3455 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6492, - "step": 3456 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6384, - "step": 3457 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.639, - "step": 3458 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6942, - "step": 3459 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6124, - "step": 3460 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6521, - "step": 3461 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.5702, - "step": 3462 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.5538, - "step": 3463 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6367, - "step": 3464 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.638, - "step": 3465 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.5969, - "step": 3466 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6677, - "step": 3467 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6094, - "step": 3468 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.5859, - "step": 3469 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.5963, - "step": 3470 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6107, - "step": 3471 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6161, - "step": 3472 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.5314, - "step": 3473 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6155, - "step": 3474 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.5914, - "step": 3475 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6295, - "step": 3476 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.5754, - "step": 3477 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.5012, - "step": 3478 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6202, - "step": 3479 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6301, - "step": 3480 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6458, - "step": 3481 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.5961, - "step": 3482 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6038, - "step": 3483 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6566, - "step": 3484 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.5977, - "step": 3485 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6312, - "step": 3486 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6846, - "step": 3487 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6677, - "step": 3488 - }, - { - "epoch": 0.71, - "learning_rate": 2e-05, - "loss": 0.6379, - "step": 3489 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.5807, - "step": 3490 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6527, - "step": 3491 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.5324, - "step": 3492 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.5484, - "step": 3493 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6442, - "step": 3494 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6943, - "step": 3495 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.5931, - "step": 3496 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6169, - "step": 3497 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6288, - "step": 3498 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6082, - "step": 3499 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6088, - "step": 3500 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.5716, - "step": 3501 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.665, - "step": 3502 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6531, - "step": 3503 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6812, - "step": 3504 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6146, - "step": 3505 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6479, - "step": 3506 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6869, - "step": 3507 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6371, - "step": 3508 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6238, - "step": 3509 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6166, - "step": 3510 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6063, - "step": 3511 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6956, - "step": 3512 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.7127, - "step": 3513 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.5648, - "step": 3514 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.5347, - "step": 3515 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6906, - "step": 3516 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6183, - "step": 3517 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.5987, - "step": 3518 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6806, - "step": 3519 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6368, - "step": 3520 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6301, - "step": 3521 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6359, - "step": 3522 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6501, - "step": 3523 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6032, - "step": 3524 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6776, - "step": 3525 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6685, - "step": 3526 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.5659, - "step": 3527 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.5944, - "step": 3528 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6067, - "step": 3529 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.5963, - "step": 3530 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.5745, - "step": 3531 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.5965, - "step": 3532 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6526, - "step": 3533 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6619, - "step": 3534 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.5674, - "step": 3535 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6876, - "step": 3536 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6663, - "step": 3537 - }, - { - "epoch": 0.72, - "learning_rate": 2e-05, - "loss": 0.6867, - "step": 3538 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5649, - "step": 3539 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5193, - "step": 3540 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5854, - "step": 3541 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.6202, - "step": 3542 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.6229, - "step": 3543 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5644, - "step": 3544 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.6489, - "step": 3545 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5582, - "step": 3546 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5405, - "step": 3547 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.569, - "step": 3548 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.6118, - "step": 3549 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.6537, - "step": 3550 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.6306, - "step": 3551 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.7067, - "step": 3552 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5973, - "step": 3553 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.6272, - "step": 3554 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5975, - "step": 3555 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5831, - "step": 3556 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.6351, - "step": 3557 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.6044, - "step": 3558 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5448, - "step": 3559 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5289, - "step": 3560 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.6013, - "step": 3561 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5952, - "step": 3562 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.6306, - "step": 3563 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5671, - "step": 3564 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.6482, - "step": 3565 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.6734, - "step": 3566 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.6052, - "step": 3567 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5564, - "step": 3568 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5522, - "step": 3569 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5821, - "step": 3570 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5911, - "step": 3571 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.6348, - "step": 3572 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.627, - "step": 3573 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.6791, - "step": 3574 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5989, - "step": 3575 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.507, - "step": 3576 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.6673, - "step": 3577 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5401, - "step": 3578 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5465, - "step": 3579 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.6245, - "step": 3580 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.6143, - "step": 3581 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5783, - "step": 3582 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5164, - "step": 3583 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.694, - "step": 3584 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5606, - "step": 3585 - }, - { - "epoch": 0.73, - "learning_rate": 2e-05, - "loss": 0.5453, - "step": 3586 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6041, - "step": 3587 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6074, - "step": 3588 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6136, - "step": 3589 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.569, - "step": 3590 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.5716, - "step": 3591 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.5751, - "step": 3592 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6696, - "step": 3593 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.5651, - "step": 3594 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6391, - "step": 3595 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.5731, - "step": 3596 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6217, - "step": 3597 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6288, - "step": 3598 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6418, - "step": 3599 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.5644, - "step": 3600 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6432, - "step": 3601 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6135, - "step": 3602 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.5216, - "step": 3603 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6109, - "step": 3604 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6024, - "step": 3605 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.5845, - "step": 3606 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6072, - "step": 3607 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6152, - "step": 3608 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.7051, - "step": 3609 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6364, - "step": 3610 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.54, - "step": 3611 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.5309, - "step": 3612 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.5471, - "step": 3613 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.5654, - "step": 3614 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.626, - "step": 3615 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.5705, - "step": 3616 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6164, - "step": 3617 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6565, - "step": 3618 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6586, - "step": 3619 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6465, - "step": 3620 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6012, - "step": 3621 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6205, - "step": 3622 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.617, - "step": 3623 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6146, - "step": 3624 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6494, - "step": 3625 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.5892, - "step": 3626 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.5742, - "step": 3627 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.5856, - "step": 3628 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6798, - "step": 3629 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.603, - "step": 3630 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.575, - "step": 3631 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6801, - "step": 3632 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.5739, - "step": 3633 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.6666, - "step": 3634 - }, - { - "epoch": 0.74, - "learning_rate": 2e-05, - "loss": 0.5584, - "step": 3635 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.5898, - "step": 3636 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6501, - "step": 3637 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6021, - "step": 3638 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.5795, - "step": 3639 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.5843, - "step": 3640 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.5536, - "step": 3641 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6476, - "step": 3642 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.5506, - "step": 3643 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.5981, - "step": 3644 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.5513, - "step": 3645 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.5042, - "step": 3646 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6169, - "step": 3647 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6093, - "step": 3648 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6694, - "step": 3649 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6043, - "step": 3650 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.644, - "step": 3651 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.5012, - "step": 3652 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.5496, - "step": 3653 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6119, - "step": 3654 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6112, - "step": 3655 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6143, - "step": 3656 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.615, - "step": 3657 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6603, - "step": 3658 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.586, - "step": 3659 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6417, - "step": 3660 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6163, - "step": 3661 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6387, - "step": 3662 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.5936, - "step": 3663 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6679, - "step": 3664 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.7115, - "step": 3665 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.5151, - "step": 3666 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6315, - "step": 3667 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6287, - "step": 3668 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6188, - "step": 3669 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6651, - "step": 3670 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.7096, - "step": 3671 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.5356, - "step": 3672 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.4695, - "step": 3673 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6327, - "step": 3674 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6131, - "step": 3675 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6533, - "step": 3676 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.5611, - "step": 3677 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.5943, - "step": 3678 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.5938, - "step": 3679 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.5753, - "step": 3680 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.5946, - "step": 3681 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.5761, - "step": 3682 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.6673, - "step": 3683 - }, - { - "epoch": 0.75, - "learning_rate": 2e-05, - "loss": 0.606, - "step": 3684 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.5625, - "step": 3685 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.635, - "step": 3686 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.7037, - "step": 3687 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.5647, - "step": 3688 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6821, - "step": 3689 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.5694, - "step": 3690 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.4828, - "step": 3691 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.5693, - "step": 3692 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6126, - "step": 3693 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6763, - "step": 3694 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.5499, - "step": 3695 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.5814, - "step": 3696 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6729, - "step": 3697 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6566, - "step": 3698 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.5922, - "step": 3699 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6842, - "step": 3700 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6581, - "step": 3701 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.5867, - "step": 3702 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.5665, - "step": 3703 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.5711, - "step": 3704 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6282, - "step": 3705 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.5862, - "step": 3706 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.5773, - "step": 3707 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.5526, - "step": 3708 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.7218, - "step": 3709 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6357, - "step": 3710 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.5648, - "step": 3711 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6194, - "step": 3712 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6307, - "step": 3713 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.5892, - "step": 3714 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6125, - "step": 3715 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.7214, - "step": 3716 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.5968, - "step": 3717 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6651, - "step": 3718 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.7007, - "step": 3719 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.5946, - "step": 3720 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6793, - "step": 3721 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.704, - "step": 3722 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.5808, - "step": 3723 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6115, - "step": 3724 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6164, - "step": 3725 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6434, - "step": 3726 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6259, - "step": 3727 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.679, - "step": 3728 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.61, - "step": 3729 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.5947, - "step": 3730 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6351, - "step": 3731 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.6276, - "step": 3732 - }, - { - "epoch": 0.76, - "learning_rate": 2e-05, - "loss": 0.687, - "step": 3733 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.611, - "step": 3734 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6894, - "step": 3735 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.712, - "step": 3736 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6045, - "step": 3737 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6358, - "step": 3738 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.5887, - "step": 3739 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.5817, - "step": 3740 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6625, - "step": 3741 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.5447, - "step": 3742 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.5796, - "step": 3743 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6503, - "step": 3744 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.5494, - "step": 3745 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6203, - "step": 3746 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.5591, - "step": 3747 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6855, - "step": 3748 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.4923, - "step": 3749 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.5541, - "step": 3750 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6236, - "step": 3751 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6438, - "step": 3752 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6434, - "step": 3753 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.5657, - "step": 3754 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6521, - "step": 3755 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6389, - "step": 3756 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6387, - "step": 3757 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6451, - "step": 3758 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.5505, - "step": 3759 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6351, - "step": 3760 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6157, - "step": 3761 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.5746, - "step": 3762 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.558, - "step": 3763 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6815, - "step": 3764 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6381, - "step": 3765 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6157, - "step": 3766 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6117, - "step": 3767 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6398, - "step": 3768 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.7021, - "step": 3769 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6888, - "step": 3770 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6157, - "step": 3771 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.633, - "step": 3772 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.605, - "step": 3773 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6358, - "step": 3774 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6074, - "step": 3775 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6216, - "step": 3776 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.5642, - "step": 3777 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.5975, - "step": 3778 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.4607, - "step": 3779 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6857, - "step": 3780 - }, - { - "epoch": 0.77, - "learning_rate": 2e-05, - "loss": 0.6488, - "step": 3781 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5655, - "step": 3782 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6093, - "step": 3783 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6373, - "step": 3784 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6252, - "step": 3785 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5891, - "step": 3786 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6498, - "step": 3787 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6089, - "step": 3788 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6771, - "step": 3789 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6159, - "step": 3790 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6703, - "step": 3791 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5603, - "step": 3792 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6312, - "step": 3793 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5586, - "step": 3794 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6326, - "step": 3795 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5804, - "step": 3796 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6352, - "step": 3797 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6674, - "step": 3798 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5453, - "step": 3799 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6044, - "step": 3800 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6062, - "step": 3801 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5371, - "step": 3802 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5649, - "step": 3803 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.625, - "step": 3804 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.617, - "step": 3805 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6592, - "step": 3806 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6329, - "step": 3807 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6455, - "step": 3808 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5451, - "step": 3809 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6372, - "step": 3810 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5905, - "step": 3811 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5741, - "step": 3812 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6209, - "step": 3813 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5791, - "step": 3814 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.653, - "step": 3815 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5351, - "step": 3816 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6055, - "step": 3817 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6215, - "step": 3818 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.624, - "step": 3819 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5875, - "step": 3820 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5985, - "step": 3821 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.7211, - "step": 3822 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5556, - "step": 3823 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6247, - "step": 3824 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5799, - "step": 3825 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6025, - "step": 3826 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5863, - "step": 3827 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5859, - "step": 3828 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.5747, - "step": 3829 - }, - { - "epoch": 0.78, - "learning_rate": 2e-05, - "loss": 0.6057, - "step": 3830 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.7293, - "step": 3831 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.5988, - "step": 3832 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6193, - "step": 3833 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.5534, - "step": 3834 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6062, - "step": 3835 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.588, - "step": 3836 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.4574, - "step": 3837 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6094, - "step": 3838 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.647, - "step": 3839 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6233, - "step": 3840 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.573, - "step": 3841 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6151, - "step": 3842 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.5484, - "step": 3843 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6184, - "step": 3844 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6789, - "step": 3845 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6593, - "step": 3846 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.5609, - "step": 3847 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6367, - "step": 3848 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6248, - "step": 3849 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6541, - "step": 3850 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.5882, - "step": 3851 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.7065, - "step": 3852 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.5349, - "step": 3853 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.5571, - "step": 3854 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6236, - "step": 3855 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.5837, - "step": 3856 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.5988, - "step": 3857 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6805, - "step": 3858 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.5797, - "step": 3859 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6564, - "step": 3860 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6533, - "step": 3861 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6649, - "step": 3862 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6709, - "step": 3863 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6573, - "step": 3864 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6475, - "step": 3865 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.657, - "step": 3866 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6695, - "step": 3867 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.5583, - "step": 3868 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6046, - "step": 3869 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.587, - "step": 3870 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.5909, - "step": 3871 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6441, - "step": 3872 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6201, - "step": 3873 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.5909, - "step": 3874 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.5653, - "step": 3875 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.614, - "step": 3876 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6106, - "step": 3877 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.6668, - "step": 3878 - }, - { - "epoch": 0.79, - "learning_rate": 2e-05, - "loss": 0.5382, - "step": 3879 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6425, - "step": 3880 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.7068, - "step": 3881 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6155, - "step": 3882 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6723, - "step": 3883 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5281, - "step": 3884 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6093, - "step": 3885 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5299, - "step": 3886 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6508, - "step": 3887 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.633, - "step": 3888 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6172, - "step": 3889 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6599, - "step": 3890 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5811, - "step": 3891 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5517, - "step": 3892 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.4848, - "step": 3893 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5616, - "step": 3894 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6025, - "step": 3895 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6786, - "step": 3896 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5682, - "step": 3897 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6446, - "step": 3898 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.596, - "step": 3899 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5781, - "step": 3900 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5954, - "step": 3901 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5125, - "step": 3902 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5073, - "step": 3903 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.666, - "step": 3904 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6736, - "step": 3905 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.47, - "step": 3906 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6146, - "step": 3907 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6224, - "step": 3908 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6884, - "step": 3909 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6169, - "step": 3910 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.7529, - "step": 3911 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5379, - "step": 3912 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6711, - "step": 3913 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6408, - "step": 3914 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6214, - "step": 3915 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5489, - "step": 3916 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5788, - "step": 3917 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5726, - "step": 3918 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6436, - "step": 3919 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5838, - "step": 3920 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.683, - "step": 3921 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5193, - "step": 3922 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6708, - "step": 3923 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5643, - "step": 3924 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.6541, - "step": 3925 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5398, - "step": 3926 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5622, - "step": 3927 - }, - { - "epoch": 0.8, - "learning_rate": 2e-05, - "loss": 0.5335, - "step": 3928 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.5797, - "step": 3929 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.661, - "step": 3930 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.5651, - "step": 3931 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6695, - "step": 3932 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6493, - "step": 3933 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6046, - "step": 3934 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.5653, - "step": 3935 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6341, - "step": 3936 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.5787, - "step": 3937 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.5378, - "step": 3938 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.688, - "step": 3939 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.5817, - "step": 3940 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6691, - "step": 3941 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6329, - "step": 3942 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.5873, - "step": 3943 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.5285, - "step": 3944 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.5671, - "step": 3945 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.608, - "step": 3946 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.607, - "step": 3947 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.5981, - "step": 3948 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.5993, - "step": 3949 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6423, - "step": 3950 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.5575, - "step": 3951 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.55, - "step": 3952 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.543, - "step": 3953 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6144, - "step": 3954 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6517, - "step": 3955 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6003, - "step": 3956 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6624, - "step": 3957 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.644, - "step": 3958 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.5891, - "step": 3959 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6647, - "step": 3960 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6988, - "step": 3961 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.5701, - "step": 3962 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6085, - "step": 3963 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.5719, - "step": 3964 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6883, - "step": 3965 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6274, - "step": 3966 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6703, - "step": 3967 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6938, - "step": 3968 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.5196, - "step": 3969 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.684, - "step": 3970 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.5512, - "step": 3971 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6927, - "step": 3972 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.5495, - "step": 3973 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6468, - "step": 3974 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.613, - "step": 3975 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.5456, - "step": 3976 - }, - { - "epoch": 0.81, - "learning_rate": 2e-05, - "loss": 0.6284, - "step": 3977 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5922, - "step": 3978 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5779, - "step": 3979 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5684, - "step": 3980 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5655, - "step": 3981 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.6997, - "step": 3982 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.6162, - "step": 3983 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.6693, - "step": 3984 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.6635, - "step": 3985 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.6037, - "step": 3986 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.641, - "step": 3987 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5842, - "step": 3988 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5631, - "step": 3989 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5701, - "step": 3990 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.6216, - "step": 3991 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.577, - "step": 3992 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5734, - "step": 3993 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.6357, - "step": 3994 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.6468, - "step": 3995 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.49, - "step": 3996 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.6507, - "step": 3997 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5371, - "step": 3998 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.6332, - "step": 3999 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.555, - "step": 4000 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.6331, - "step": 4001 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.6693, - "step": 4002 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.635, - "step": 4003 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.6136, - "step": 4004 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.565, - "step": 4005 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5399, - "step": 4006 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5599, - "step": 4007 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5876, - "step": 4008 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5421, - "step": 4009 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5919, - "step": 4010 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.6064, - "step": 4011 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.6632, - "step": 4012 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.7319, - "step": 4013 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5757, - "step": 4014 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.6011, - "step": 4015 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5888, - "step": 4016 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5435, - "step": 4017 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5355, - "step": 4018 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5495, - "step": 4019 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.6674, - "step": 4020 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5591, - "step": 4021 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.6236, - "step": 4022 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.5656, - "step": 4023 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.6178, - "step": 4024 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.559, - "step": 4025 - }, - { - "epoch": 0.82, - "learning_rate": 2e-05, - "loss": 0.4813, - "step": 4026 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.6016, - "step": 4027 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.6218, - "step": 4028 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.5613, - "step": 4029 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.4975, - "step": 4030 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.6303, - "step": 4031 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.599, - "step": 4032 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.6142, - "step": 4033 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.6719, - "step": 4034 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.6078, - "step": 4035 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.5391, - "step": 4036 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.5077, - "step": 4037 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.7024, - "step": 4038 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.6028, - "step": 4039 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.5561, - "step": 4040 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.5469, - "step": 4041 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.5355, - "step": 4042 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.6058, - "step": 4043 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.641, - "step": 4044 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.6027, - "step": 4045 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.4733, - "step": 4046 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.7286, - "step": 4047 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.5613, - "step": 4048 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.6596, - "step": 4049 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.597, - "step": 4050 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.5383, - "step": 4051 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.553, - "step": 4052 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.6838, - "step": 4053 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.6226, - "step": 4054 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.4999, - "step": 4055 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.629, - "step": 4056 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.679, - "step": 4057 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.6342, - "step": 4058 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.605, - "step": 4059 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.6102, - "step": 4060 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.6757, - "step": 4061 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.5571, - "step": 4062 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.6634, - "step": 4063 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.5999, - "step": 4064 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.5978, - "step": 4065 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.5565, - "step": 4066 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.575, - "step": 4067 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.6521, - "step": 4068 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.5512, - "step": 4069 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.5357, - "step": 4070 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.5525, - "step": 4071 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.6235, - "step": 4072 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.5479, - "step": 4073 - }, - { - "epoch": 0.83, - "learning_rate": 2e-05, - "loss": 0.6273, - "step": 4074 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.677, - "step": 4075 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.5708, - "step": 4076 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6406, - "step": 4077 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6173, - "step": 4078 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6028, - "step": 4079 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6256, - "step": 4080 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6563, - "step": 4081 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6042, - "step": 4082 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6174, - "step": 4083 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6079, - "step": 4084 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.5333, - "step": 4085 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6029, - "step": 4086 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6342, - "step": 4087 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6511, - "step": 4088 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.5715, - "step": 4089 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6441, - "step": 4090 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6678, - "step": 4091 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.5942, - "step": 4092 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.5778, - "step": 4093 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.692, - "step": 4094 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.5643, - "step": 4095 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6485, - "step": 4096 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.7089, - "step": 4097 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6837, - "step": 4098 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6203, - "step": 4099 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.634, - "step": 4100 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6918, - "step": 4101 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6328, - "step": 4102 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.5701, - "step": 4103 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.652, - "step": 4104 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.5546, - "step": 4105 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6435, - "step": 4106 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6313, - "step": 4107 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6309, - "step": 4108 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.5813, - "step": 4109 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6329, - "step": 4110 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.659, - "step": 4111 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6154, - "step": 4112 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.5884, - "step": 4113 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.525, - "step": 4114 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6665, - "step": 4115 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6216, - "step": 4116 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.639, - "step": 4117 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.5696, - "step": 4118 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.5548, - "step": 4119 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6433, - "step": 4120 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.5863, - "step": 4121 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.7223, - "step": 4122 - }, - { - "epoch": 0.84, - "learning_rate": 2e-05, - "loss": 0.6096, - "step": 4123 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5814, - "step": 4124 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5835, - "step": 4125 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5647, - "step": 4126 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.576, - "step": 4127 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6289, - "step": 4128 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.627, - "step": 4129 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6157, - "step": 4130 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5995, - "step": 4131 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5605, - "step": 4132 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5866, - "step": 4133 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6016, - "step": 4134 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6072, - "step": 4135 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6563, - "step": 4136 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5947, - "step": 4137 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6337, - "step": 4138 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6204, - "step": 4139 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6803, - "step": 4140 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6035, - "step": 4141 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5759, - "step": 4142 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6396, - "step": 4143 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5576, - "step": 4144 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5597, - "step": 4145 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.7477, - "step": 4146 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6953, - "step": 4147 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6088, - "step": 4148 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5807, - "step": 4149 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5636, - "step": 4150 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6678, - "step": 4151 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6833, - "step": 4152 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6469, - "step": 4153 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5465, - "step": 4154 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6047, - "step": 4155 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6618, - "step": 4156 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.7298, - "step": 4157 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5789, - "step": 4158 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5309, - "step": 4159 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5671, - "step": 4160 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5538, - "step": 4161 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5796, - "step": 4162 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5595, - "step": 4163 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6395, - "step": 4164 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.559, - "step": 4165 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6839, - "step": 4166 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6601, - "step": 4167 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.6387, - "step": 4168 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5988, - "step": 4169 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.65, - "step": 4170 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5725, - "step": 4171 - }, - { - "epoch": 0.85, - "learning_rate": 2e-05, - "loss": 0.5666, - "step": 4172 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.616, - "step": 4173 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6305, - "step": 4174 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5719, - "step": 4175 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6254, - "step": 4176 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.7095, - "step": 4177 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6663, - "step": 4178 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6879, - "step": 4179 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6649, - "step": 4180 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6036, - "step": 4181 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5659, - "step": 4182 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5383, - "step": 4183 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5586, - "step": 4184 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5431, - "step": 4185 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5591, - "step": 4186 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5751, - "step": 4187 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5921, - "step": 4188 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6077, - "step": 4189 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6577, - "step": 4190 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6707, - "step": 4191 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5961, - "step": 4192 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.4638, - "step": 4193 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5874, - "step": 4194 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6801, - "step": 4195 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5602, - "step": 4196 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6365, - "step": 4197 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6591, - "step": 4198 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6714, - "step": 4199 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5628, - "step": 4200 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6108, - "step": 4201 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5874, - "step": 4202 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5855, - "step": 4203 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5459, - "step": 4204 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6074, - "step": 4205 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6409, - "step": 4206 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6123, - "step": 4207 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5645, - "step": 4208 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5573, - "step": 4209 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6585, - "step": 4210 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5542, - "step": 4211 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5787, - "step": 4212 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.536, - "step": 4213 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5914, - "step": 4214 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.53, - "step": 4215 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6188, - "step": 4216 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6052, - "step": 4217 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6353, - "step": 4218 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6335, - "step": 4219 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.6217, - "step": 4220 - }, - { - "epoch": 0.86, - "learning_rate": 2e-05, - "loss": 0.5908, - "step": 4221 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5665, - "step": 4222 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6829, - "step": 4223 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5474, - "step": 4224 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5387, - "step": 4225 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6337, - "step": 4226 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6429, - "step": 4227 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.715, - "step": 4228 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6197, - "step": 4229 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5788, - "step": 4230 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6435, - "step": 4231 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6377, - "step": 4232 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6367, - "step": 4233 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5542, - "step": 4234 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5082, - "step": 4235 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.579, - "step": 4236 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6706, - "step": 4237 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6625, - "step": 4238 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6301, - "step": 4239 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6041, - "step": 4240 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5276, - "step": 4241 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5685, - "step": 4242 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6455, - "step": 4243 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6776, - "step": 4244 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5905, - "step": 4245 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6283, - "step": 4246 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5842, - "step": 4247 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5851, - "step": 4248 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6255, - "step": 4249 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6482, - "step": 4250 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5419, - "step": 4251 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5859, - "step": 4252 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5386, - "step": 4253 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6369, - "step": 4254 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6353, - "step": 4255 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5896, - "step": 4256 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6716, - "step": 4257 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6187, - "step": 4258 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5429, - "step": 4259 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5477, - "step": 4260 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5411, - "step": 4261 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6343, - "step": 4262 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5244, - "step": 4263 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5829, - "step": 4264 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5322, - "step": 4265 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.5372, - "step": 4266 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6341, - "step": 4267 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.709, - "step": 4268 - }, - { - "epoch": 0.87, - "learning_rate": 2e-05, - "loss": 0.6882, - "step": 4269 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6511, - "step": 4270 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5582, - "step": 4271 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6074, - "step": 4272 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5744, - "step": 4273 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5783, - "step": 4274 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5829, - "step": 4275 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6505, - "step": 4276 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.596, - "step": 4277 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6412, - "step": 4278 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6555, - "step": 4279 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6795, - "step": 4280 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6337, - "step": 4281 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5629, - "step": 4282 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5698, - "step": 4283 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6249, - "step": 4284 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6141, - "step": 4285 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5976, - "step": 4286 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6795, - "step": 4287 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6518, - "step": 4288 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6353, - "step": 4289 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6723, - "step": 4290 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6058, - "step": 4291 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5662, - "step": 4292 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6322, - "step": 4293 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6236, - "step": 4294 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6665, - "step": 4295 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6469, - "step": 4296 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5708, - "step": 4297 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5439, - "step": 4298 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5605, - "step": 4299 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6364, - "step": 4300 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5906, - "step": 4301 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5714, - "step": 4302 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5815, - "step": 4303 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6516, - "step": 4304 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5403, - "step": 4305 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6507, - "step": 4306 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5707, - "step": 4307 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5873, - "step": 4308 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5661, - "step": 4309 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.644, - "step": 4310 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.621, - "step": 4311 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.4797, - "step": 4312 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6633, - "step": 4313 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5965, - "step": 4314 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6262, - "step": 4315 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.5295, - "step": 4316 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.6689, - "step": 4317 - }, - { - "epoch": 0.88, - "learning_rate": 2e-05, - "loss": 0.593, - "step": 4318 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6348, - "step": 4319 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6173, - "step": 4320 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.5907, - "step": 4321 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.5963, - "step": 4322 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6712, - "step": 4323 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6277, - "step": 4324 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6129, - "step": 4325 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6528, - "step": 4326 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6503, - "step": 4327 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6255, - "step": 4328 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6082, - "step": 4329 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6531, - "step": 4330 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6829, - "step": 4331 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.5554, - "step": 4332 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.5325, - "step": 4333 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6573, - "step": 4334 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6753, - "step": 4335 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6459, - "step": 4336 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.5409, - "step": 4337 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.5179, - "step": 4338 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6239, - "step": 4339 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6014, - "step": 4340 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.5492, - "step": 4341 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6663, - "step": 4342 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6357, - "step": 4343 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.7364, - "step": 4344 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6366, - "step": 4345 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.5993, - "step": 4346 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.5582, - "step": 4347 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6006, - "step": 4348 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6727, - "step": 4349 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6499, - "step": 4350 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.5838, - "step": 4351 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6558, - "step": 4352 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.68, - "step": 4353 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.5557, - "step": 4354 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.5904, - "step": 4355 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.5664, - "step": 4356 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.5791, - "step": 4357 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6195, - "step": 4358 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6608, - "step": 4359 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.566, - "step": 4360 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6246, - "step": 4361 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6501, - "step": 4362 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.5765, - "step": 4363 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.741, - "step": 4364 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6165, - "step": 4365 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6439, - "step": 4366 - }, - { - "epoch": 0.89, - "learning_rate": 2e-05, - "loss": 0.6481, - "step": 4367 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6224, - "step": 4368 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6156, - "step": 4369 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6243, - "step": 4370 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.5776, - "step": 4371 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.64, - "step": 4372 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.5732, - "step": 4373 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.5689, - "step": 4374 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6299, - "step": 4375 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6452, - "step": 4376 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.5829, - "step": 4377 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6078, - "step": 4378 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.4973, - "step": 4379 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6389, - "step": 4380 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.558, - "step": 4381 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6321, - "step": 4382 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6436, - "step": 4383 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6167, - "step": 4384 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6185, - "step": 4385 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.63, - "step": 4386 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6434, - "step": 4387 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.5969, - "step": 4388 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.5597, - "step": 4389 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6137, - "step": 4390 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.7454, - "step": 4391 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.5971, - "step": 4392 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6091, - "step": 4393 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.5651, - "step": 4394 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6745, - "step": 4395 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.5889, - "step": 4396 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.5834, - "step": 4397 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6309, - "step": 4398 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6002, - "step": 4399 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.737, - "step": 4400 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.5552, - "step": 4401 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6768, - "step": 4402 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6113, - "step": 4403 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.5195, - "step": 4404 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.669, - "step": 4405 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6204, - "step": 4406 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.7081, - "step": 4407 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.5974, - "step": 4408 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.5905, - "step": 4409 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6946, - "step": 4410 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6124, - "step": 4411 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.5615, - "step": 4412 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.5789, - "step": 4413 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.6121, - "step": 4414 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.5406, - "step": 4415 - }, - { - "epoch": 0.9, - "learning_rate": 2e-05, - "loss": 0.4948, - "step": 4416 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6526, - "step": 4417 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6015, - "step": 4418 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.702, - "step": 4419 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6573, - "step": 4420 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.7501, - "step": 4421 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.5406, - "step": 4422 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6559, - "step": 4423 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6287, - "step": 4424 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.496, - "step": 4425 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6012, - "step": 4426 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6053, - "step": 4427 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6341, - "step": 4428 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.5132, - "step": 4429 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6218, - "step": 4430 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.5348, - "step": 4431 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6706, - "step": 4432 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6721, - "step": 4433 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6051, - "step": 4434 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.7203, - "step": 4435 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6449, - "step": 4436 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.5438, - "step": 4437 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6211, - "step": 4438 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.596, - "step": 4439 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.5393, - "step": 4440 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.5145, - "step": 4441 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6574, - "step": 4442 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6981, - "step": 4443 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.5199, - "step": 4444 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6415, - "step": 4445 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.5783, - "step": 4446 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6088, - "step": 4447 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.5861, - "step": 4448 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6146, - "step": 4449 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.5775, - "step": 4450 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.5834, - "step": 4451 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.5834, - "step": 4452 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6434, - "step": 4453 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.56, - "step": 4454 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6149, - "step": 4455 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6583, - "step": 4456 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6279, - "step": 4457 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.5783, - "step": 4458 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6297, - "step": 4459 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.7014, - "step": 4460 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6463, - "step": 4461 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.6291, - "step": 4462 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.5959, - "step": 4463 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.5352, - "step": 4464 - }, - { - "epoch": 0.91, - "learning_rate": 2e-05, - "loss": 0.5732, - "step": 4465 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.627, - "step": 4466 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.5971, - "step": 4467 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.5547, - "step": 4468 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6143, - "step": 4469 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.631, - "step": 4470 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6417, - "step": 4471 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.5334, - "step": 4472 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6259, - "step": 4473 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.554, - "step": 4474 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.5855, - "step": 4475 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6156, - "step": 4476 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.5693, - "step": 4477 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.5914, - "step": 4478 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6146, - "step": 4479 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.5753, - "step": 4480 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6446, - "step": 4481 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.5942, - "step": 4482 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.5912, - "step": 4483 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.4966, - "step": 4484 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6351, - "step": 4485 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.5843, - "step": 4486 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6198, - "step": 4487 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.7116, - "step": 4488 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6266, - "step": 4489 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6026, - "step": 4490 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.5574, - "step": 4491 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.5679, - "step": 4492 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6174, - "step": 4493 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6361, - "step": 4494 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6249, - "step": 4495 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6303, - "step": 4496 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.5765, - "step": 4497 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6048, - "step": 4498 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6416, - "step": 4499 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6854, - "step": 4500 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.586, - "step": 4501 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.5201, - "step": 4502 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.5415, - "step": 4503 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.5716, - "step": 4504 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6321, - "step": 4505 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.659, - "step": 4506 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.5279, - "step": 4507 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.673, - "step": 4508 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6266, - "step": 4509 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.5991, - "step": 4510 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6172, - "step": 4511 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.6617, - "step": 4512 - }, - { - "epoch": 0.92, - "learning_rate": 2e-05, - "loss": 0.595, - "step": 4513 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.607, - "step": 4514 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6775, - "step": 4515 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5981, - "step": 4516 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5781, - "step": 4517 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5633, - "step": 4518 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6591, - "step": 4519 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5942, - "step": 4520 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5734, - "step": 4521 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5458, - "step": 4522 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5864, - "step": 4523 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5616, - "step": 4524 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6279, - "step": 4525 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6507, - "step": 4526 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.624, - "step": 4527 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5919, - "step": 4528 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6614, - "step": 4529 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5872, - "step": 4530 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.4877, - "step": 4531 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6525, - "step": 4532 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6065, - "step": 4533 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5562, - "step": 4534 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.674, - "step": 4535 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6709, - "step": 4536 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5504, - "step": 4537 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6456, - "step": 4538 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5118, - "step": 4539 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6362, - "step": 4540 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6114, - "step": 4541 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6567, - "step": 4542 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6574, - "step": 4543 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.579, - "step": 4544 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5393, - "step": 4545 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6462, - "step": 4546 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6184, - "step": 4547 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5999, - "step": 4548 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5848, - "step": 4549 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5842, - "step": 4550 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6361, - "step": 4551 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6924, - "step": 4552 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6219, - "step": 4553 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5295, - "step": 4554 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6471, - "step": 4555 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5958, - "step": 4556 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6783, - "step": 4557 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5265, - "step": 4558 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.5631, - "step": 4559 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.605, - "step": 4560 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6456, - "step": 4561 - }, - { - "epoch": 0.93, - "learning_rate": 2e-05, - "loss": 0.6183, - "step": 4562 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.5736, - "step": 4563 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6082, - "step": 4564 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.5795, - "step": 4565 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.673, - "step": 4566 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.4915, - "step": 4567 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6211, - "step": 4568 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6006, - "step": 4569 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.4995, - "step": 4570 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6289, - "step": 4571 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6116, - "step": 4572 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.5097, - "step": 4573 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.5866, - "step": 4574 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6093, - "step": 4575 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6181, - "step": 4576 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6257, - "step": 4577 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.5587, - "step": 4578 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.7104, - "step": 4579 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.5542, - "step": 4580 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6336, - "step": 4581 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.5548, - "step": 4582 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6153, - "step": 4583 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.631, - "step": 4584 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.5737, - "step": 4585 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.5705, - "step": 4586 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6562, - "step": 4587 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6962, - "step": 4588 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.5109, - "step": 4589 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.5824, - "step": 4590 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.5797, - "step": 4591 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.5415, - "step": 4592 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6317, - "step": 4593 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.5898, - "step": 4594 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6227, - "step": 4595 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.5939, - "step": 4596 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6068, - "step": 4597 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6093, - "step": 4598 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6253, - "step": 4599 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6117, - "step": 4600 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6229, - "step": 4601 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6026, - "step": 4602 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.5452, - "step": 4603 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.5712, - "step": 4604 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6403, - "step": 4605 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6085, - "step": 4606 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.5862, - "step": 4607 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.5701, - "step": 4608 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6238, - "step": 4609 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6145, - "step": 4610 - }, - { - "epoch": 0.94, - "learning_rate": 2e-05, - "loss": 0.6448, - "step": 4611 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.6039, - "step": 4612 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5986, - "step": 4613 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.6861, - "step": 4614 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.6112, - "step": 4615 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.694, - "step": 4616 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5905, - "step": 4617 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.605, - "step": 4618 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5697, - "step": 4619 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5651, - "step": 4620 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5855, - "step": 4621 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.6121, - "step": 4622 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.644, - "step": 4623 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5704, - "step": 4624 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5518, - "step": 4625 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5524, - "step": 4626 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.6465, - "step": 4627 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.6072, - "step": 4628 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.6712, - "step": 4629 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.6478, - "step": 4630 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5607, - "step": 4631 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5079, - "step": 4632 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.658, - "step": 4633 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.7082, - "step": 4634 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5791, - "step": 4635 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5786, - "step": 4636 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.6252, - "step": 4637 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.6237, - "step": 4638 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5462, - "step": 4639 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.6617, - "step": 4640 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.6151, - "step": 4641 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.6818, - "step": 4642 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5723, - "step": 4643 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.6182, - "step": 4644 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.543, - "step": 4645 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.575, - "step": 4646 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5755, - "step": 4647 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5941, - "step": 4648 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5977, - "step": 4649 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.6381, - "step": 4650 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5512, - "step": 4651 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.6887, - "step": 4652 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5757, - "step": 4653 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5233, - "step": 4654 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.56, - "step": 4655 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.566, - "step": 4656 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5725, - "step": 4657 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.614, - "step": 4658 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.664, - "step": 4659 - }, - { - "epoch": 0.95, - "learning_rate": 2e-05, - "loss": 0.5345, - "step": 4660 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6823, - "step": 4661 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5768, - "step": 4662 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6055, - "step": 4663 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6173, - "step": 4664 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6827, - "step": 4665 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6375, - "step": 4666 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6597, - "step": 4667 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5602, - "step": 4668 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6644, - "step": 4669 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5891, - "step": 4670 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6139, - "step": 4671 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.697, - "step": 4672 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.7692, - "step": 4673 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.683, - "step": 4674 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6307, - "step": 4675 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6418, - "step": 4676 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6683, - "step": 4677 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5902, - "step": 4678 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6385, - "step": 4679 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5645, - "step": 4680 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6161, - "step": 4681 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6634, - "step": 4682 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5843, - "step": 4683 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5907, - "step": 4684 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5615, - "step": 4685 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5665, - "step": 4686 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6746, - "step": 4687 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6728, - "step": 4688 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.666, - "step": 4689 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5893, - "step": 4690 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5986, - "step": 4691 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.7005, - "step": 4692 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6328, - "step": 4693 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.638, - "step": 4694 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6329, - "step": 4695 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6288, - "step": 4696 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5653, - "step": 4697 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.6691, - "step": 4698 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5067, - "step": 4699 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5752, - "step": 4700 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5952, - "step": 4701 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5345, - "step": 4702 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.639, - "step": 4703 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5814, - "step": 4704 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5788, - "step": 4705 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5985, - "step": 4706 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5597, - "step": 4707 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5978, - "step": 4708 - }, - { - "epoch": 0.96, - "learning_rate": 2e-05, - "loss": 0.5725, - "step": 4709 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5765, - "step": 4710 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6758, - "step": 4711 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6072, - "step": 4712 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5197, - "step": 4713 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6786, - "step": 4714 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5533, - "step": 4715 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5495, - "step": 4716 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5783, - "step": 4717 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6472, - "step": 4718 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5661, - "step": 4719 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5303, - "step": 4720 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6364, - "step": 4721 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6046, - "step": 4722 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6728, - "step": 4723 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5788, - "step": 4724 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6164, - "step": 4725 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5957, - "step": 4726 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6658, - "step": 4727 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6626, - "step": 4728 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5293, - "step": 4729 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5462, - "step": 4730 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5349, - "step": 4731 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6505, - "step": 4732 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.7188, - "step": 4733 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5675, - "step": 4734 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6455, - "step": 4735 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5844, - "step": 4736 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5644, - "step": 4737 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6029, - "step": 4738 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5033, - "step": 4739 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5983, - "step": 4740 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6603, - "step": 4741 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6113, - "step": 4742 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6928, - "step": 4743 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6715, - "step": 4744 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.7285, - "step": 4745 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6696, - "step": 4746 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5165, - "step": 4747 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6057, - "step": 4748 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5255, - "step": 4749 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6008, - "step": 4750 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.6768, - "step": 4751 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5739, - "step": 4752 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5833, - "step": 4753 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5645, - "step": 4754 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5499, - "step": 4755 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5576, - "step": 4756 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.661, - "step": 4757 - }, - { - "epoch": 0.97, - "learning_rate": 2e-05, - "loss": 0.5004, - "step": 4758 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.5859, - "step": 4759 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.5893, - "step": 4760 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.5938, - "step": 4761 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6117, - "step": 4762 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6536, - "step": 4763 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.5582, - "step": 4764 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6396, - "step": 4765 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6276, - "step": 4766 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6195, - "step": 4767 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6575, - "step": 4768 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.5986, - "step": 4769 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6489, - "step": 4770 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.5977, - "step": 4771 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.5896, - "step": 4772 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6461, - "step": 4773 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6319, - "step": 4774 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6122, - "step": 4775 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6703, - "step": 4776 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6662, - "step": 4777 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.5998, - "step": 4778 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6152, - "step": 4779 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.5429, - "step": 4780 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.5821, - "step": 4781 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6579, - "step": 4782 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6439, - "step": 4783 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.5492, - "step": 4784 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6689, - "step": 4785 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6124, - "step": 4786 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.5029, - "step": 4787 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.5958, - "step": 4788 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6394, - "step": 4789 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6396, - "step": 4790 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.584, - "step": 4791 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.5479, - "step": 4792 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6877, - "step": 4793 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.5568, - "step": 4794 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6488, - "step": 4795 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6088, - "step": 4796 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6036, - "step": 4797 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.5758, - "step": 4798 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.5596, - "step": 4799 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6285, - "step": 4800 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6203, - "step": 4801 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.7356, - "step": 4802 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6755, - "step": 4803 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.509, - "step": 4804 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.6415, - "step": 4805 - }, - { - "epoch": 0.98, - "learning_rate": 2e-05, - "loss": 0.5815, - "step": 4806 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5529, - "step": 4807 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6138, - "step": 4808 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5841, - "step": 4809 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5966, - "step": 4810 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5951, - "step": 4811 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5818, - "step": 4812 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6076, - "step": 4813 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.607, - "step": 4814 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5769, - "step": 4815 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5722, - "step": 4816 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.7201, - "step": 4817 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5908, - "step": 4818 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5917, - "step": 4819 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5334, - "step": 4820 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5806, - "step": 4821 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5423, - "step": 4822 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5782, - "step": 4823 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5723, - "step": 4824 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6919, - "step": 4825 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5555, - "step": 4826 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5007, - "step": 4827 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.538, - "step": 4828 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6166, - "step": 4829 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6288, - "step": 4830 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5901, - "step": 4831 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6122, - "step": 4832 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6161, - "step": 4833 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5593, - "step": 4834 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6373, - "step": 4835 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6423, - "step": 4836 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5792, - "step": 4837 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6187, - "step": 4838 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6221, - "step": 4839 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6764, - "step": 4840 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5291, - "step": 4841 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6438, - "step": 4842 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6103, - "step": 4843 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6524, - "step": 4844 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6252, - "step": 4845 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.581, - "step": 4846 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6495, - "step": 4847 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6774, - "step": 4848 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6417, - "step": 4849 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6021, - "step": 4850 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6082, - "step": 4851 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6331, - "step": 4852 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.5518, - "step": 4853 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.6389, - "step": 4854 - }, - { - "epoch": 0.99, - "learning_rate": 2e-05, - "loss": 0.7318, - "step": 4855 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.6669, - "step": 4856 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.6708, - "step": 4857 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.619, - "step": 4858 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.6405, - "step": 4859 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.6397, - "step": 4860 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.584, - "step": 4861 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.6565, - "step": 4862 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.6563, - "step": 4863 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.6581, - "step": 4864 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.6099, - "step": 4865 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.6325, - "step": 4866 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.6476, - "step": 4867 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.5762, - "step": 4868 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.6442, - "step": 4869 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.5866, - "step": 4870 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.5471, - "step": 4871 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.5917, - "step": 4872 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.6165, - "step": 4873 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.6619, - "step": 4874 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.6168, - "step": 4875 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.5354, - "step": 4876 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.5867, - "step": 4877 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.6209, - "step": 4878 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.6006, - "step": 4879 - }, - { - "epoch": 1.0, - "learning_rate": 2e-05, - "loss": 0.5971, - "step": 4880 - }, - { - "epoch": 1.0, - "step": 4880, - "total_flos": 4747483064107008.0, - "train_loss": 0.639445250456939, - "train_runtime": 46854.6547, - "train_samples_per_second": 13.331, - "train_steps_per_second": 0.104 - } - ], - "logging_steps": 1.0, - "max_steps": 4880, - "num_train_epochs": 1, - "save_steps": 50000, - "total_flos": 4747483064107008.0, - "trial_name": null, - "trial_params": null -}