{ "best_metric": 0.7393630146980286, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.3508771929824561, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0017543859649122807, "grad_norm": 0.7406291961669922, "learning_rate": 1.009e-05, "loss": 0.9063, "step": 1 }, { "epoch": 0.0017543859649122807, "eval_loss": 1.0510913133621216, "eval_runtime": 27.8319, "eval_samples_per_second": 8.623, "eval_steps_per_second": 2.156, "step": 1 }, { "epoch": 0.0035087719298245615, "grad_norm": 1.0678048133850098, "learning_rate": 2.018e-05, "loss": 1.1931, "step": 2 }, { "epoch": 0.005263157894736842, "grad_norm": 1.1864534616470337, "learning_rate": 3.027e-05, "loss": 1.0979, "step": 3 }, { "epoch": 0.007017543859649123, "grad_norm": 0.9908481240272522, "learning_rate": 4.036e-05, "loss": 0.7717, "step": 4 }, { "epoch": 0.008771929824561403, "grad_norm": 1.750254511833191, "learning_rate": 5.045e-05, "loss": 1.1904, "step": 5 }, { "epoch": 0.010526315789473684, "grad_norm": 0.7105154395103455, "learning_rate": 6.054e-05, "loss": 0.629, "step": 6 }, { "epoch": 0.012280701754385965, "grad_norm": 0.9075063467025757, "learning_rate": 7.062999999999999e-05, "loss": 0.8961, "step": 7 }, { "epoch": 0.014035087719298246, "grad_norm": 0.9498921036720276, "learning_rate": 8.072e-05, "loss": 0.9649, "step": 8 }, { "epoch": 0.015789473684210527, "grad_norm": 1.1364835500717163, "learning_rate": 9.081e-05, "loss": 1.0493, "step": 9 }, { "epoch": 0.017543859649122806, "grad_norm": 1.0802795886993408, "learning_rate": 0.0001009, "loss": 1.0153, "step": 10 }, { "epoch": 0.01929824561403509, "grad_norm": 0.9688495993614197, "learning_rate": 0.00010036894736842106, "loss": 0.6098, "step": 11 }, { "epoch": 0.021052631578947368, "grad_norm": 1.2151799201965332, "learning_rate": 9.98378947368421e-05, "loss": 1.0982, "step": 12 }, { "epoch": 0.02280701754385965, "grad_norm": 1.1714990139007568, "learning_rate": 9.930684210526315e-05, "loss": 1.0194, "step": 13 }, { "epoch": 0.02456140350877193, "grad_norm": 0.905537486076355, "learning_rate": 9.877578947368421e-05, "loss": 0.6032, "step": 14 }, { "epoch": 0.02631578947368421, "grad_norm": 1.3678743839263916, "learning_rate": 9.824473684210527e-05, "loss": 1.2738, "step": 15 }, { "epoch": 0.028070175438596492, "grad_norm": 1.3796780109405518, "learning_rate": 9.771368421052632e-05, "loss": 1.0135, "step": 16 }, { "epoch": 0.02982456140350877, "grad_norm": 0.8758544921875, "learning_rate": 9.718263157894736e-05, "loss": 0.7037, "step": 17 }, { "epoch": 0.031578947368421054, "grad_norm": 0.7311310768127441, "learning_rate": 9.665157894736842e-05, "loss": 0.6875, "step": 18 }, { "epoch": 0.03333333333333333, "grad_norm": 1.0072100162506104, "learning_rate": 9.612052631578948e-05, "loss": 0.8314, "step": 19 }, { "epoch": 0.03508771929824561, "grad_norm": 0.8759172558784485, "learning_rate": 9.558947368421052e-05, "loss": 0.7827, "step": 20 }, { "epoch": 0.03684210526315789, "grad_norm": 0.7492383718490601, "learning_rate": 9.505842105263159e-05, "loss": 0.6755, "step": 21 }, { "epoch": 0.03859649122807018, "grad_norm": 0.7113670706748962, "learning_rate": 9.452736842105263e-05, "loss": 0.661, "step": 22 }, { "epoch": 0.04035087719298246, "grad_norm": 0.6615825891494751, "learning_rate": 9.399631578947368e-05, "loss": 0.6872, "step": 23 }, { "epoch": 0.042105263157894736, "grad_norm": 1.0192131996154785, "learning_rate": 9.346526315789474e-05, "loss": 0.9075, "step": 24 }, { "epoch": 0.043859649122807015, "grad_norm": 0.992385745048523, "learning_rate": 9.293421052631578e-05, "loss": 0.8013, "step": 25 }, { "epoch": 0.0456140350877193, "grad_norm": 0.7488397359848022, "learning_rate": 9.240315789473684e-05, "loss": 0.602, "step": 26 }, { "epoch": 0.04736842105263158, "grad_norm": 0.5196430087089539, "learning_rate": 9.18721052631579e-05, "loss": 0.4694, "step": 27 }, { "epoch": 0.04912280701754386, "grad_norm": 0.6897857189178467, "learning_rate": 9.134105263157895e-05, "loss": 0.7644, "step": 28 }, { "epoch": 0.05087719298245614, "grad_norm": 0.7681694030761719, "learning_rate": 9.081e-05, "loss": 0.7093, "step": 29 }, { "epoch": 0.05263157894736842, "grad_norm": 0.9528352618217468, "learning_rate": 9.027894736842105e-05, "loss": 0.726, "step": 30 }, { "epoch": 0.054385964912280704, "grad_norm": 0.6168146729469299, "learning_rate": 8.97478947368421e-05, "loss": 0.5074, "step": 31 }, { "epoch": 0.056140350877192984, "grad_norm": 0.8572965860366821, "learning_rate": 8.921684210526316e-05, "loss": 0.8495, "step": 32 }, { "epoch": 0.05789473684210526, "grad_norm": 0.7506121397018433, "learning_rate": 8.86857894736842e-05, "loss": 0.9085, "step": 33 }, { "epoch": 0.05964912280701754, "grad_norm": 0.8023539185523987, "learning_rate": 8.815473684210527e-05, "loss": 0.7338, "step": 34 }, { "epoch": 0.06140350877192982, "grad_norm": 0.6991697549819946, "learning_rate": 8.762368421052631e-05, "loss": 0.7113, "step": 35 }, { "epoch": 0.06315789473684211, "grad_norm": 0.7761762142181396, "learning_rate": 8.709263157894737e-05, "loss": 0.6322, "step": 36 }, { "epoch": 0.06491228070175438, "grad_norm": 0.9230353832244873, "learning_rate": 8.656157894736843e-05, "loss": 0.7582, "step": 37 }, { "epoch": 0.06666666666666667, "grad_norm": 0.6606146693229675, "learning_rate": 8.603052631578947e-05, "loss": 0.6742, "step": 38 }, { "epoch": 0.06842105263157895, "grad_norm": 0.8233667612075806, "learning_rate": 8.549947368421052e-05, "loss": 0.5869, "step": 39 }, { "epoch": 0.07017543859649122, "grad_norm": 0.7692704796791077, "learning_rate": 8.496842105263158e-05, "loss": 0.6732, "step": 40 }, { "epoch": 0.07192982456140351, "grad_norm": 0.8292697072029114, "learning_rate": 8.443736842105264e-05, "loss": 0.7863, "step": 41 }, { "epoch": 0.07368421052631578, "grad_norm": 0.8596307635307312, "learning_rate": 8.390631578947369e-05, "loss": 0.7988, "step": 42 }, { "epoch": 0.07543859649122807, "grad_norm": 0.792542576789856, "learning_rate": 8.337526315789473e-05, "loss": 0.708, "step": 43 }, { "epoch": 0.07719298245614035, "grad_norm": 0.7675974369049072, "learning_rate": 8.284421052631579e-05, "loss": 0.5697, "step": 44 }, { "epoch": 0.07894736842105263, "grad_norm": 0.6914715766906738, "learning_rate": 8.231315789473685e-05, "loss": 0.4748, "step": 45 }, { "epoch": 0.08070175438596491, "grad_norm": 1.0996037721633911, "learning_rate": 8.178210526315789e-05, "loss": 0.9406, "step": 46 }, { "epoch": 0.0824561403508772, "grad_norm": 0.9779611229896545, "learning_rate": 8.125105263157894e-05, "loss": 0.7495, "step": 47 }, { "epoch": 0.08421052631578947, "grad_norm": 1.2376444339752197, "learning_rate": 8.072e-05, "loss": 1.2518, "step": 48 }, { "epoch": 0.08596491228070176, "grad_norm": 1.0767101049423218, "learning_rate": 8.018894736842106e-05, "loss": 0.8389, "step": 49 }, { "epoch": 0.08771929824561403, "grad_norm": 1.3052878379821777, "learning_rate": 7.965789473684211e-05, "loss": 1.0287, "step": 50 }, { "epoch": 0.08771929824561403, "eval_loss": 0.8099061250686646, "eval_runtime": 28.2437, "eval_samples_per_second": 8.497, "eval_steps_per_second": 2.124, "step": 50 }, { "epoch": 0.08947368421052632, "grad_norm": 1.1818331480026245, "learning_rate": 7.912684210526315e-05, "loss": 1.1752, "step": 51 }, { "epoch": 0.0912280701754386, "grad_norm": 0.9281390905380249, "learning_rate": 7.859578947368421e-05, "loss": 0.9196, "step": 52 }, { "epoch": 0.09298245614035087, "grad_norm": 1.2918709516525269, "learning_rate": 7.806473684210527e-05, "loss": 1.4252, "step": 53 }, { "epoch": 0.09473684210526316, "grad_norm": 0.6728716492652893, "learning_rate": 7.753368421052631e-05, "loss": 0.7743, "step": 54 }, { "epoch": 0.09649122807017543, "grad_norm": 0.6830320954322815, "learning_rate": 7.700263157894738e-05, "loss": 0.7888, "step": 55 }, { "epoch": 0.09824561403508772, "grad_norm": 0.8492221832275391, "learning_rate": 7.647157894736842e-05, "loss": 0.7649, "step": 56 }, { "epoch": 0.1, "grad_norm": 0.7507942914962769, "learning_rate": 7.594052631578948e-05, "loss": 0.7795, "step": 57 }, { "epoch": 0.10175438596491228, "grad_norm": 0.6988322734832764, "learning_rate": 7.540947368421053e-05, "loss": 0.5944, "step": 58 }, { "epoch": 0.10350877192982456, "grad_norm": 0.7897641062736511, "learning_rate": 7.487842105263157e-05, "loss": 0.8777, "step": 59 }, { "epoch": 0.10526315789473684, "grad_norm": 0.7114337682723999, "learning_rate": 7.434736842105263e-05, "loss": 0.8136, "step": 60 }, { "epoch": 0.10701754385964912, "grad_norm": 0.5905416011810303, "learning_rate": 7.381631578947368e-05, "loss": 0.6229, "step": 61 }, { "epoch": 0.10877192982456141, "grad_norm": 0.6030896306037903, "learning_rate": 7.328526315789474e-05, "loss": 0.7214, "step": 62 }, { "epoch": 0.11052631578947368, "grad_norm": 0.7862070202827454, "learning_rate": 7.27542105263158e-05, "loss": 0.8534, "step": 63 }, { "epoch": 0.11228070175438597, "grad_norm": 0.6742895841598511, "learning_rate": 7.222315789473684e-05, "loss": 0.6744, "step": 64 }, { "epoch": 0.11403508771929824, "grad_norm": 0.5670256614685059, "learning_rate": 7.16921052631579e-05, "loss": 0.7197, "step": 65 }, { "epoch": 0.11578947368421053, "grad_norm": 0.8699775338172913, "learning_rate": 7.116105263157895e-05, "loss": 0.8541, "step": 66 }, { "epoch": 0.11754385964912281, "grad_norm": 0.5569133758544922, "learning_rate": 7.062999999999999e-05, "loss": 0.4119, "step": 67 }, { "epoch": 0.11929824561403508, "grad_norm": 0.6450591683387756, "learning_rate": 7.009894736842106e-05, "loss": 0.6812, "step": 68 }, { "epoch": 0.12105263157894737, "grad_norm": 0.8254157900810242, "learning_rate": 6.95678947368421e-05, "loss": 0.9211, "step": 69 }, { "epoch": 0.12280701754385964, "grad_norm": 0.8246232867240906, "learning_rate": 6.903684210526316e-05, "loss": 0.8237, "step": 70 }, { "epoch": 0.12456140350877193, "grad_norm": 0.6372646689414978, "learning_rate": 6.850578947368422e-05, "loss": 0.7131, "step": 71 }, { "epoch": 0.12631578947368421, "grad_norm": 0.7007615566253662, "learning_rate": 6.797473684210526e-05, "loss": 0.7478, "step": 72 }, { "epoch": 0.1280701754385965, "grad_norm": 0.6046174168586731, "learning_rate": 6.744368421052631e-05, "loss": 0.6193, "step": 73 }, { "epoch": 0.12982456140350876, "grad_norm": 0.5702415108680725, "learning_rate": 6.691263157894736e-05, "loss": 0.6184, "step": 74 }, { "epoch": 0.13157894736842105, "grad_norm": 0.6641340851783752, "learning_rate": 6.638157894736843e-05, "loss": 0.7208, "step": 75 }, { "epoch": 0.13333333333333333, "grad_norm": 0.6358048319816589, "learning_rate": 6.585052631578948e-05, "loss": 0.6867, "step": 76 }, { "epoch": 0.13508771929824562, "grad_norm": 0.684662401676178, "learning_rate": 6.531947368421052e-05, "loss": 0.7349, "step": 77 }, { "epoch": 0.1368421052631579, "grad_norm": 0.6469242572784424, "learning_rate": 6.478842105263158e-05, "loss": 0.6503, "step": 78 }, { "epoch": 0.13859649122807016, "grad_norm": 0.7363973259925842, "learning_rate": 6.425736842105264e-05, "loss": 0.9963, "step": 79 }, { "epoch": 0.14035087719298245, "grad_norm": 0.6783320903778076, "learning_rate": 6.372631578947368e-05, "loss": 0.7758, "step": 80 }, { "epoch": 0.14210526315789473, "grad_norm": 0.604201078414917, "learning_rate": 6.319526315789473e-05, "loss": 0.7565, "step": 81 }, { "epoch": 0.14385964912280702, "grad_norm": 0.7265191674232483, "learning_rate": 6.266421052631579e-05, "loss": 0.6596, "step": 82 }, { "epoch": 0.1456140350877193, "grad_norm": 0.6445674300193787, "learning_rate": 6.213315789473685e-05, "loss": 0.6451, "step": 83 }, { "epoch": 0.14736842105263157, "grad_norm": 0.6166771650314331, "learning_rate": 6.16021052631579e-05, "loss": 0.6186, "step": 84 }, { "epoch": 0.14912280701754385, "grad_norm": 0.737983226776123, "learning_rate": 6.107105263157894e-05, "loss": 0.7986, "step": 85 }, { "epoch": 0.15087719298245614, "grad_norm": 0.665922999382019, "learning_rate": 6.054e-05, "loss": 0.6777, "step": 86 }, { "epoch": 0.15263157894736842, "grad_norm": 0.7293619513511658, "learning_rate": 6.000894736842105e-05, "loss": 0.6561, "step": 87 }, { "epoch": 0.1543859649122807, "grad_norm": 0.8867078423500061, "learning_rate": 5.94778947368421e-05, "loss": 0.848, "step": 88 }, { "epoch": 0.156140350877193, "grad_norm": 0.7068512439727783, "learning_rate": 5.894684210526316e-05, "loss": 0.6779, "step": 89 }, { "epoch": 0.15789473684210525, "grad_norm": 0.8587491512298584, "learning_rate": 5.841578947368421e-05, "loss": 0.8993, "step": 90 }, { "epoch": 0.15964912280701754, "grad_norm": 0.7978894114494324, "learning_rate": 5.7884736842105265e-05, "loss": 0.7329, "step": 91 }, { "epoch": 0.16140350877192983, "grad_norm": 0.7457124590873718, "learning_rate": 5.7353684210526314e-05, "loss": 0.6159, "step": 92 }, { "epoch": 0.1631578947368421, "grad_norm": 0.601140022277832, "learning_rate": 5.6822631578947364e-05, "loss": 0.5636, "step": 93 }, { "epoch": 0.1649122807017544, "grad_norm": 0.7705742120742798, "learning_rate": 5.629157894736842e-05, "loss": 0.8532, "step": 94 }, { "epoch": 0.16666666666666666, "grad_norm": 0.7511420249938965, "learning_rate": 5.576052631578948e-05, "loss": 0.7187, "step": 95 }, { "epoch": 0.16842105263157894, "grad_norm": 0.8569889068603516, "learning_rate": 5.522947368421053e-05, "loss": 0.84, "step": 96 }, { "epoch": 0.17017543859649123, "grad_norm": 0.6718824505805969, "learning_rate": 5.469842105263158e-05, "loss": 0.4859, "step": 97 }, { "epoch": 0.17192982456140352, "grad_norm": 0.7785717844963074, "learning_rate": 5.416736842105263e-05, "loss": 0.7726, "step": 98 }, { "epoch": 0.1736842105263158, "grad_norm": 0.9549532532691956, "learning_rate": 5.3636315789473685e-05, "loss": 0.9155, "step": 99 }, { "epoch": 0.17543859649122806, "grad_norm": 1.5034849643707275, "learning_rate": 5.3105263157894734e-05, "loss": 0.9611, "step": 100 }, { "epoch": 0.17543859649122806, "eval_loss": 0.7681017518043518, "eval_runtime": 28.5725, "eval_samples_per_second": 8.4, "eval_steps_per_second": 2.1, "step": 100 }, { "epoch": 0.17719298245614035, "grad_norm": 0.741050660610199, "learning_rate": 5.257421052631578e-05, "loss": 0.8367, "step": 101 }, { "epoch": 0.17894736842105263, "grad_norm": 1.0446927547454834, "learning_rate": 5.2043157894736846e-05, "loss": 1.1629, "step": 102 }, { "epoch": 0.18070175438596492, "grad_norm": 0.6213659644126892, "learning_rate": 5.1512105263157895e-05, "loss": 0.7145, "step": 103 }, { "epoch": 0.1824561403508772, "grad_norm": 1.1013575792312622, "learning_rate": 5.098105263157895e-05, "loss": 1.0192, "step": 104 }, { "epoch": 0.18421052631578946, "grad_norm": 0.6862013339996338, "learning_rate": 5.045e-05, "loss": 0.8154, "step": 105 }, { "epoch": 0.18596491228070175, "grad_norm": 0.6277929544448853, "learning_rate": 4.991894736842105e-05, "loss": 0.6772, "step": 106 }, { "epoch": 0.18771929824561404, "grad_norm": 0.7700167894363403, "learning_rate": 4.9387894736842105e-05, "loss": 0.8308, "step": 107 }, { "epoch": 0.18947368421052632, "grad_norm": 0.6632503271102905, "learning_rate": 4.885684210526316e-05, "loss": 0.7842, "step": 108 }, { "epoch": 0.1912280701754386, "grad_norm": 0.7114025950431824, "learning_rate": 4.832578947368421e-05, "loss": 0.7336, "step": 109 }, { "epoch": 0.19298245614035087, "grad_norm": 0.6157619953155518, "learning_rate": 4.779473684210526e-05, "loss": 0.768, "step": 110 }, { "epoch": 0.19473684210526315, "grad_norm": 0.710568904876709, "learning_rate": 4.7263684210526315e-05, "loss": 0.9034, "step": 111 }, { "epoch": 0.19649122807017544, "grad_norm": 0.7637466192245483, "learning_rate": 4.673263157894737e-05, "loss": 0.6161, "step": 112 }, { "epoch": 0.19824561403508772, "grad_norm": 0.569848358631134, "learning_rate": 4.620157894736842e-05, "loss": 0.6611, "step": 113 }, { "epoch": 0.2, "grad_norm": 0.787581741809845, "learning_rate": 4.5670526315789475e-05, "loss": 0.7389, "step": 114 }, { "epoch": 0.20175438596491227, "grad_norm": 0.6908506751060486, "learning_rate": 4.5139473684210524e-05, "loss": 0.7028, "step": 115 }, { "epoch": 0.20350877192982456, "grad_norm": 0.5628468990325928, "learning_rate": 4.460842105263158e-05, "loss": 0.5894, "step": 116 }, { "epoch": 0.20526315789473684, "grad_norm": 0.5309147238731384, "learning_rate": 4.4077368421052636e-05, "loss": 0.6172, "step": 117 }, { "epoch": 0.20701754385964913, "grad_norm": 0.6175889372825623, "learning_rate": 4.3546315789473685e-05, "loss": 0.6526, "step": 118 }, { "epoch": 0.20877192982456141, "grad_norm": 0.7518534064292908, "learning_rate": 4.3015263157894734e-05, "loss": 0.712, "step": 119 }, { "epoch": 0.21052631578947367, "grad_norm": 0.5883734226226807, "learning_rate": 4.248421052631579e-05, "loss": 0.5457, "step": 120 }, { "epoch": 0.21228070175438596, "grad_norm": 0.6398406028747559, "learning_rate": 4.1953157894736846e-05, "loss": 0.5305, "step": 121 }, { "epoch": 0.21403508771929824, "grad_norm": 0.6730632185935974, "learning_rate": 4.1422105263157895e-05, "loss": 0.8646, "step": 122 }, { "epoch": 0.21578947368421053, "grad_norm": 0.5818577408790588, "learning_rate": 4.0891052631578944e-05, "loss": 0.6427, "step": 123 }, { "epoch": 0.21754385964912282, "grad_norm": 0.6870962381362915, "learning_rate": 4.036e-05, "loss": 0.7576, "step": 124 }, { "epoch": 0.21929824561403508, "grad_norm": 0.5730745196342468, "learning_rate": 3.9828947368421056e-05, "loss": 0.652, "step": 125 }, { "epoch": 0.22105263157894736, "grad_norm": 0.6954720616340637, "learning_rate": 3.9297894736842105e-05, "loss": 0.7137, "step": 126 }, { "epoch": 0.22280701754385965, "grad_norm": 0.586402177810669, "learning_rate": 3.8766842105263154e-05, "loss": 0.5587, "step": 127 }, { "epoch": 0.22456140350877193, "grad_norm": 0.5629393458366394, "learning_rate": 3.823578947368421e-05, "loss": 0.7004, "step": 128 }, { "epoch": 0.22631578947368422, "grad_norm": 0.8045310974121094, "learning_rate": 3.7704736842105265e-05, "loss": 0.8988, "step": 129 }, { "epoch": 0.22807017543859648, "grad_norm": 0.6091334819793701, "learning_rate": 3.7173684210526315e-05, "loss": 0.5314, "step": 130 }, { "epoch": 0.22982456140350876, "grad_norm": 0.6041271090507507, "learning_rate": 3.664263157894737e-05, "loss": 0.6393, "step": 131 }, { "epoch": 0.23157894736842105, "grad_norm": 0.5708930492401123, "learning_rate": 3.611157894736842e-05, "loss": 0.6395, "step": 132 }, { "epoch": 0.23333333333333334, "grad_norm": 0.6349872350692749, "learning_rate": 3.5580526315789475e-05, "loss": 0.6754, "step": 133 }, { "epoch": 0.23508771929824562, "grad_norm": 0.7760201096534729, "learning_rate": 3.504947368421053e-05, "loss": 0.7591, "step": 134 }, { "epoch": 0.23684210526315788, "grad_norm": 0.607395350933075, "learning_rate": 3.451842105263158e-05, "loss": 0.512, "step": 135 }, { "epoch": 0.23859649122807017, "grad_norm": 0.595882773399353, "learning_rate": 3.398736842105263e-05, "loss": 0.6204, "step": 136 }, { "epoch": 0.24035087719298245, "grad_norm": 0.63569176197052, "learning_rate": 3.345631578947368e-05, "loss": 0.6152, "step": 137 }, { "epoch": 0.24210526315789474, "grad_norm": 0.9270498156547546, "learning_rate": 3.292526315789474e-05, "loss": 0.7149, "step": 138 }, { "epoch": 0.24385964912280703, "grad_norm": 0.9119054079055786, "learning_rate": 3.239421052631579e-05, "loss": 1.0173, "step": 139 }, { "epoch": 0.24561403508771928, "grad_norm": 0.6205770373344421, "learning_rate": 3.186315789473684e-05, "loss": 0.5314, "step": 140 }, { "epoch": 0.24736842105263157, "grad_norm": 0.6443336009979248, "learning_rate": 3.1332105263157895e-05, "loss": 0.7873, "step": 141 }, { "epoch": 0.24912280701754386, "grad_norm": 0.9933227896690369, "learning_rate": 3.080105263157895e-05, "loss": 0.8881, "step": 142 }, { "epoch": 0.25087719298245614, "grad_norm": 0.6606805920600891, "learning_rate": 3.027e-05, "loss": 0.6213, "step": 143 }, { "epoch": 0.25263157894736843, "grad_norm": 0.7721325159072876, "learning_rate": 2.973894736842105e-05, "loss": 0.6419, "step": 144 }, { "epoch": 0.2543859649122807, "grad_norm": 0.7559982538223267, "learning_rate": 2.9207894736842105e-05, "loss": 0.7017, "step": 145 }, { "epoch": 0.256140350877193, "grad_norm": 0.8087080717086792, "learning_rate": 2.8676842105263157e-05, "loss": 0.7934, "step": 146 }, { "epoch": 0.2578947368421053, "grad_norm": 0.7131542563438416, "learning_rate": 2.814578947368421e-05, "loss": 0.689, "step": 147 }, { "epoch": 0.2596491228070175, "grad_norm": 0.9132384061813354, "learning_rate": 2.7614736842105266e-05, "loss": 0.8236, "step": 148 }, { "epoch": 0.2614035087719298, "grad_norm": 0.9250985980033875, "learning_rate": 2.7083684210526315e-05, "loss": 0.7521, "step": 149 }, { "epoch": 0.2631578947368421, "grad_norm": 0.8993288278579712, "learning_rate": 2.6552631578947367e-05, "loss": 0.8465, "step": 150 }, { "epoch": 0.2631578947368421, "eval_loss": 0.7511516809463501, "eval_runtime": 28.1659, "eval_samples_per_second": 8.521, "eval_steps_per_second": 2.13, "step": 150 }, { "epoch": 0.2649122807017544, "grad_norm": 0.47015851736068726, "learning_rate": 2.6021578947368423e-05, "loss": 0.6779, "step": 151 }, { "epoch": 0.26666666666666666, "grad_norm": 0.6141353845596313, "learning_rate": 2.5490526315789475e-05, "loss": 0.7003, "step": 152 }, { "epoch": 0.26842105263157895, "grad_norm": 0.9018178582191467, "learning_rate": 2.4959473684210524e-05, "loss": 1.0953, "step": 153 }, { "epoch": 0.27017543859649124, "grad_norm": 0.7489861845970154, "learning_rate": 2.442842105263158e-05, "loss": 0.8608, "step": 154 }, { "epoch": 0.2719298245614035, "grad_norm": 0.7168894410133362, "learning_rate": 2.389736842105263e-05, "loss": 0.7412, "step": 155 }, { "epoch": 0.2736842105263158, "grad_norm": 0.8422032594680786, "learning_rate": 2.3366315789473685e-05, "loss": 0.873, "step": 156 }, { "epoch": 0.2754385964912281, "grad_norm": 0.6963294744491577, "learning_rate": 2.2835263157894738e-05, "loss": 0.7918, "step": 157 }, { "epoch": 0.2771929824561403, "grad_norm": 0.4546716511249542, "learning_rate": 2.230421052631579e-05, "loss": 0.5057, "step": 158 }, { "epoch": 0.2789473684210526, "grad_norm": 0.4820166230201721, "learning_rate": 2.1773157894736843e-05, "loss": 0.5477, "step": 159 }, { "epoch": 0.2807017543859649, "grad_norm": 0.7093713879585266, "learning_rate": 2.1242105263157895e-05, "loss": 0.7208, "step": 160 }, { "epoch": 0.2824561403508772, "grad_norm": 0.6953532695770264, "learning_rate": 2.0711052631578947e-05, "loss": 0.6844, "step": 161 }, { "epoch": 0.28421052631578947, "grad_norm": 0.8025092482566833, "learning_rate": 2.018e-05, "loss": 0.9494, "step": 162 }, { "epoch": 0.28596491228070176, "grad_norm": 0.6826035380363464, "learning_rate": 1.9648947368421052e-05, "loss": 1.0678, "step": 163 }, { "epoch": 0.28771929824561404, "grad_norm": 0.7237564325332642, "learning_rate": 1.9117894736842105e-05, "loss": 0.764, "step": 164 }, { "epoch": 0.2894736842105263, "grad_norm": 0.7669388651847839, "learning_rate": 1.8586842105263157e-05, "loss": 0.7991, "step": 165 }, { "epoch": 0.2912280701754386, "grad_norm": 0.7776060700416565, "learning_rate": 1.805578947368421e-05, "loss": 0.7634, "step": 166 }, { "epoch": 0.2929824561403509, "grad_norm": 0.6191375255584717, "learning_rate": 1.7524736842105266e-05, "loss": 0.6987, "step": 167 }, { "epoch": 0.29473684210526313, "grad_norm": 0.8354077339172363, "learning_rate": 1.6993684210526315e-05, "loss": 0.968, "step": 168 }, { "epoch": 0.2964912280701754, "grad_norm": 0.9358921647071838, "learning_rate": 1.646263157894737e-05, "loss": 0.8361, "step": 169 }, { "epoch": 0.2982456140350877, "grad_norm": 0.7853288054466248, "learning_rate": 1.593157894736842e-05, "loss": 0.8393, "step": 170 }, { "epoch": 0.3, "grad_norm": 0.9581964612007141, "learning_rate": 1.5400526315789475e-05, "loss": 1.0107, "step": 171 }, { "epoch": 0.3017543859649123, "grad_norm": 0.7966198325157166, "learning_rate": 1.4869473684210524e-05, "loss": 0.7629, "step": 172 }, { "epoch": 0.30350877192982456, "grad_norm": 0.7841195464134216, "learning_rate": 1.4338421052631579e-05, "loss": 0.7958, "step": 173 }, { "epoch": 0.30526315789473685, "grad_norm": 0.4999608099460602, "learning_rate": 1.3807368421052633e-05, "loss": 0.5932, "step": 174 }, { "epoch": 0.30701754385964913, "grad_norm": 0.6105174422264099, "learning_rate": 1.3276315789473684e-05, "loss": 0.7267, "step": 175 }, { "epoch": 0.3087719298245614, "grad_norm": 0.5984802842140198, "learning_rate": 1.2745263157894738e-05, "loss": 0.575, "step": 176 }, { "epoch": 0.3105263157894737, "grad_norm": 0.7803645730018616, "learning_rate": 1.221421052631579e-05, "loss": 0.7659, "step": 177 }, { "epoch": 0.312280701754386, "grad_norm": 0.5337862372398376, "learning_rate": 1.1683157894736843e-05, "loss": 0.664, "step": 178 }, { "epoch": 0.3140350877192982, "grad_norm": 0.6948061585426331, "learning_rate": 1.1152105263157895e-05, "loss": 0.7925, "step": 179 }, { "epoch": 0.3157894736842105, "grad_norm": 0.6513640284538269, "learning_rate": 1.0621052631578948e-05, "loss": 0.7287, "step": 180 }, { "epoch": 0.3175438596491228, "grad_norm": 0.6046849489212036, "learning_rate": 1.009e-05, "loss": 0.5544, "step": 181 }, { "epoch": 0.3192982456140351, "grad_norm": 0.5044527649879456, "learning_rate": 9.558947368421052e-06, "loss": 0.5837, "step": 182 }, { "epoch": 0.32105263157894737, "grad_norm": 0.6072579026222229, "learning_rate": 9.027894736842105e-06, "loss": 0.7566, "step": 183 }, { "epoch": 0.32280701754385965, "grad_norm": 0.7336714267730713, "learning_rate": 8.496842105263157e-06, "loss": 0.756, "step": 184 }, { "epoch": 0.32456140350877194, "grad_norm": 0.7087790369987488, "learning_rate": 7.96578947368421e-06, "loss": 0.8286, "step": 185 }, { "epoch": 0.3263157894736842, "grad_norm": 0.7009425759315491, "learning_rate": 7.434736842105262e-06, "loss": 0.7483, "step": 186 }, { "epoch": 0.3280701754385965, "grad_norm": 0.678546667098999, "learning_rate": 6.903684210526316e-06, "loss": 0.7917, "step": 187 }, { "epoch": 0.3298245614035088, "grad_norm": 0.6305289268493652, "learning_rate": 6.372631578947369e-06, "loss": 0.6682, "step": 188 }, { "epoch": 0.33157894736842103, "grad_norm": 0.8548862934112549, "learning_rate": 5.841578947368421e-06, "loss": 1.0413, "step": 189 }, { "epoch": 0.3333333333333333, "grad_norm": 0.6873423457145691, "learning_rate": 5.310526315789474e-06, "loss": 0.5938, "step": 190 }, { "epoch": 0.3350877192982456, "grad_norm": 0.8765286803245544, "learning_rate": 4.779473684210526e-06, "loss": 0.7684, "step": 191 }, { "epoch": 0.3368421052631579, "grad_norm": 0.7501645088195801, "learning_rate": 4.248421052631579e-06, "loss": 0.8901, "step": 192 }, { "epoch": 0.3385964912280702, "grad_norm": 0.580901563167572, "learning_rate": 3.717368421052631e-06, "loss": 0.4722, "step": 193 }, { "epoch": 0.34035087719298246, "grad_norm": 0.8076489567756653, "learning_rate": 3.1863157894736844e-06, "loss": 0.7184, "step": 194 }, { "epoch": 0.34210526315789475, "grad_norm": 0.7457717061042786, "learning_rate": 2.655263157894737e-06, "loss": 0.7367, "step": 195 }, { "epoch": 0.34385964912280703, "grad_norm": 0.9256778359413147, "learning_rate": 2.1242105263157893e-06, "loss": 0.8355, "step": 196 }, { "epoch": 0.3456140350877193, "grad_norm": 0.8798757195472717, "learning_rate": 1.5931578947368422e-06, "loss": 0.8276, "step": 197 }, { "epoch": 0.3473684210526316, "grad_norm": 0.6897732615470886, "learning_rate": 1.0621052631578947e-06, "loss": 0.6065, "step": 198 }, { "epoch": 0.34912280701754383, "grad_norm": 0.8041809797286987, "learning_rate": 5.310526315789473e-07, "loss": 0.7118, "step": 199 }, { "epoch": 0.3508771929824561, "grad_norm": 1.1839923858642578, "learning_rate": 0.0, "loss": 0.8041, "step": 200 }, { "epoch": 0.3508771929824561, "eval_loss": 0.7393630146980286, "eval_runtime": 28.1896, "eval_samples_per_second": 8.514, "eval_steps_per_second": 2.128, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.381227984125952e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }