{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999194392975107, "eval_steps": 500, "global_step": 6206, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 12.935441478144865, "learning_rate": 5.3475935828877005e-08, "loss": 1.4455, "step": 1 }, { "epoch": 0.0, "grad_norm": 14.7622354944256, "learning_rate": 1.0695187165775401e-07, "loss": 1.4577, "step": 2 }, { "epoch": 0.0, "grad_norm": 12.745943568788293, "learning_rate": 1.6042780748663104e-07, "loss": 1.3129, "step": 3 }, { "epoch": 0.0, "grad_norm": 13.9302655173877, "learning_rate": 2.1390374331550802e-07, "loss": 1.473, "step": 4 }, { "epoch": 0.0, "grad_norm": 12.694705289156342, "learning_rate": 2.6737967914438503e-07, "loss": 1.4385, "step": 5 }, { "epoch": 0.0, "grad_norm": 12.720888398162117, "learning_rate": 3.208556149732621e-07, "loss": 1.4653, "step": 6 }, { "epoch": 0.0, "grad_norm": 12.812943011118795, "learning_rate": 3.7433155080213904e-07, "loss": 1.3959, "step": 7 }, { "epoch": 0.0, "grad_norm": 6.821988569940946, "learning_rate": 4.2780748663101604e-07, "loss": 0.7875, "step": 8 }, { "epoch": 0.0, "grad_norm": 12.182259847820657, "learning_rate": 4.812834224598931e-07, "loss": 1.5014, "step": 9 }, { "epoch": 0.0, "grad_norm": 12.026573876602576, "learning_rate": 5.347593582887701e-07, "loss": 1.4184, "step": 10 }, { "epoch": 0.0, "grad_norm": 11.672855803818452, "learning_rate": 5.882352941176471e-07, "loss": 1.5295, "step": 11 }, { "epoch": 0.0, "grad_norm": 6.5219344415093214, "learning_rate": 6.417112299465242e-07, "loss": 0.7443, "step": 12 }, { "epoch": 0.0, "grad_norm": 9.822765399632402, "learning_rate": 6.951871657754011e-07, "loss": 1.4695, "step": 13 }, { "epoch": 0.0, "grad_norm": 8.572331406231754, "learning_rate": 7.486631016042781e-07, "loss": 1.3112, "step": 14 }, { "epoch": 0.0, "grad_norm": 8.50054132155673, "learning_rate": 8.021390374331551e-07, "loss": 1.3738, "step": 15 }, { "epoch": 0.0, "grad_norm": 7.490496835595879, "learning_rate": 8.556149732620321e-07, "loss": 1.362, "step": 16 }, { "epoch": 0.0, "grad_norm": 7.8960541730994915, "learning_rate": 9.090909090909091e-07, "loss": 1.1374, "step": 17 }, { "epoch": 0.0, "grad_norm": 7.612965836506855, "learning_rate": 9.625668449197862e-07, "loss": 1.1762, "step": 18 }, { "epoch": 0.0, "grad_norm": 5.233528018489697, "learning_rate": 1.0160427807486633e-06, "loss": 1.1174, "step": 19 }, { "epoch": 0.0, "grad_norm": 4.816254368834543, "learning_rate": 1.0695187165775401e-06, "loss": 1.1701, "step": 20 }, { "epoch": 0.0, "grad_norm": 4.997004404002938, "learning_rate": 1.1229946524064172e-06, "loss": 1.105, "step": 21 }, { "epoch": 0.0, "grad_norm": 4.25413563828559, "learning_rate": 1.1764705882352942e-06, "loss": 1.1117, "step": 22 }, { "epoch": 0.0, "grad_norm": 5.254001012967801, "learning_rate": 1.2299465240641713e-06, "loss": 0.6391, "step": 23 }, { "epoch": 0.0, "grad_norm": 3.8588334655369434, "learning_rate": 1.2834224598930483e-06, "loss": 1.1637, "step": 24 }, { "epoch": 0.0, "grad_norm": 3.2963170527733574, "learning_rate": 1.3368983957219254e-06, "loss": 1.042, "step": 25 }, { "epoch": 0.0, "grad_norm": 4.282541430114072, "learning_rate": 1.3903743315508022e-06, "loss": 0.6116, "step": 26 }, { "epoch": 0.0, "grad_norm": 2.7293564908374353, "learning_rate": 1.4438502673796793e-06, "loss": 0.9941, "step": 27 }, { "epoch": 0.0, "grad_norm": 1.824192721701949, "learning_rate": 1.4973262032085562e-06, "loss": 1.0038, "step": 28 }, { "epoch": 0.0, "grad_norm": 1.8125014928177339, "learning_rate": 1.5508021390374334e-06, "loss": 1.0333, "step": 29 }, { "epoch": 0.0, "grad_norm": 1.7184486102384333, "learning_rate": 1.6042780748663103e-06, "loss": 0.952, "step": 30 }, { "epoch": 0.0, "grad_norm": 1.8407635577341634, "learning_rate": 1.6577540106951873e-06, "loss": 0.9006, "step": 31 }, { "epoch": 0.01, "grad_norm": 1.703457385982276, "learning_rate": 1.7112299465240642e-06, "loss": 0.8891, "step": 32 }, { "epoch": 0.01, "grad_norm": 1.6214136372997996, "learning_rate": 1.7647058823529414e-06, "loss": 0.9676, "step": 33 }, { "epoch": 0.01, "grad_norm": 1.5284451424310843, "learning_rate": 1.8181818181818183e-06, "loss": 0.9191, "step": 34 }, { "epoch": 0.01, "grad_norm": 1.433591334843516, "learning_rate": 1.8716577540106954e-06, "loss": 0.8436, "step": 35 }, { "epoch": 0.01, "grad_norm": 1.4811324547013833, "learning_rate": 1.9251336898395724e-06, "loss": 0.8871, "step": 36 }, { "epoch": 0.01, "grad_norm": 1.6693464083370484, "learning_rate": 1.9786096256684497e-06, "loss": 0.9509, "step": 37 }, { "epoch": 0.01, "grad_norm": 1.5227273449265926, "learning_rate": 2.0320855614973265e-06, "loss": 0.9546, "step": 38 }, { "epoch": 0.01, "grad_norm": 1.4756603584692545, "learning_rate": 2.0855614973262034e-06, "loss": 0.9534, "step": 39 }, { "epoch": 0.01, "grad_norm": 1.4729605341959016, "learning_rate": 2.1390374331550802e-06, "loss": 0.8574, "step": 40 }, { "epoch": 0.01, "grad_norm": 1.4308867149250293, "learning_rate": 2.1925133689839575e-06, "loss": 0.8769, "step": 41 }, { "epoch": 0.01, "grad_norm": 1.2843916639976503, "learning_rate": 2.2459893048128343e-06, "loss": 0.7959, "step": 42 }, { "epoch": 0.01, "grad_norm": 1.3303200637988284, "learning_rate": 2.2994652406417116e-06, "loss": 0.7755, "step": 43 }, { "epoch": 0.01, "grad_norm": 1.3981022102143286, "learning_rate": 2.3529411764705885e-06, "loss": 0.8467, "step": 44 }, { "epoch": 0.01, "grad_norm": 1.4070369115048074, "learning_rate": 2.4064171122994653e-06, "loss": 0.8869, "step": 45 }, { "epoch": 0.01, "grad_norm": 1.3826852752536778, "learning_rate": 2.4598930481283426e-06, "loss": 0.8539, "step": 46 }, { "epoch": 0.01, "grad_norm": 1.2994263971268318, "learning_rate": 2.5133689839572194e-06, "loss": 0.8226, "step": 47 }, { "epoch": 0.01, "grad_norm": 1.4029755203913823, "learning_rate": 2.5668449197860967e-06, "loss": 0.8063, "step": 48 }, { "epoch": 0.01, "grad_norm": 1.4640857513245897, "learning_rate": 2.6203208556149735e-06, "loss": 0.8219, "step": 49 }, { "epoch": 0.01, "grad_norm": 1.3400580068888166, "learning_rate": 2.673796791443851e-06, "loss": 0.7591, "step": 50 }, { "epoch": 0.01, "grad_norm": 1.4271256402645964, "learning_rate": 2.7272727272727272e-06, "loss": 0.8773, "step": 51 }, { "epoch": 0.01, "grad_norm": 1.2593254014363953, "learning_rate": 2.7807486631016045e-06, "loss": 0.7871, "step": 52 }, { "epoch": 0.01, "grad_norm": 1.3018443477097152, "learning_rate": 2.8342245989304818e-06, "loss": 0.8449, "step": 53 }, { "epoch": 0.01, "grad_norm": 1.3182813983078345, "learning_rate": 2.8877005347593586e-06, "loss": 0.7531, "step": 54 }, { "epoch": 0.01, "grad_norm": 1.315026501483241, "learning_rate": 2.9411764705882355e-06, "loss": 0.731, "step": 55 }, { "epoch": 0.01, "grad_norm": 1.2645000378266102, "learning_rate": 2.9946524064171123e-06, "loss": 0.8118, "step": 56 }, { "epoch": 0.01, "grad_norm": 1.3939481835820742, "learning_rate": 3.0481283422459896e-06, "loss": 0.8107, "step": 57 }, { "epoch": 0.01, "grad_norm": 1.3722036495691252, "learning_rate": 3.101604278074867e-06, "loss": 0.7851, "step": 58 }, { "epoch": 0.01, "grad_norm": 1.4013348097015585, "learning_rate": 3.1550802139037433e-06, "loss": 0.8707, "step": 59 }, { "epoch": 0.01, "grad_norm": 1.3945354126187168, "learning_rate": 3.2085561497326205e-06, "loss": 0.7455, "step": 60 }, { "epoch": 0.01, "grad_norm": 1.3615958450470937, "learning_rate": 3.262032085561498e-06, "loss": 0.8429, "step": 61 }, { "epoch": 0.01, "grad_norm": 1.3363841680546134, "learning_rate": 3.3155080213903747e-06, "loss": 0.8495, "step": 62 }, { "epoch": 0.01, "grad_norm": 1.3891357284948158, "learning_rate": 3.368983957219252e-06, "loss": 0.9195, "step": 63 }, { "epoch": 0.01, "grad_norm": 1.271946386715015, "learning_rate": 3.4224598930481284e-06, "loss": 0.749, "step": 64 }, { "epoch": 0.01, "grad_norm": 1.219667288144616, "learning_rate": 3.4759358288770056e-06, "loss": 0.6372, "step": 65 }, { "epoch": 0.01, "grad_norm": 1.2601525896049124, "learning_rate": 3.529411764705883e-06, "loss": 0.7609, "step": 66 }, { "epoch": 0.01, "grad_norm": 1.4258547561284391, "learning_rate": 3.5828877005347597e-06, "loss": 0.8498, "step": 67 }, { "epoch": 0.01, "grad_norm": 1.3384436860521522, "learning_rate": 3.6363636363636366e-06, "loss": 0.7567, "step": 68 }, { "epoch": 0.01, "grad_norm": 1.237169349467392, "learning_rate": 3.6898395721925134e-06, "loss": 0.7743, "step": 69 }, { "epoch": 0.01, "grad_norm": 1.2628384486054505, "learning_rate": 3.7433155080213907e-06, "loss": 0.7615, "step": 70 }, { "epoch": 0.01, "grad_norm": 1.3039911911016937, "learning_rate": 3.796791443850268e-06, "loss": 0.7763, "step": 71 }, { "epoch": 0.01, "grad_norm": 1.4536181475187755, "learning_rate": 3.850267379679145e-06, "loss": 0.8278, "step": 72 }, { "epoch": 0.01, "grad_norm": 1.3389444939627722, "learning_rate": 3.903743315508022e-06, "loss": 0.7661, "step": 73 }, { "epoch": 0.01, "grad_norm": 1.2578830820664857, "learning_rate": 3.957219251336899e-06, "loss": 0.7756, "step": 74 }, { "epoch": 0.01, "grad_norm": 1.2616404615407286, "learning_rate": 4.010695187165775e-06, "loss": 0.725, "step": 75 }, { "epoch": 0.01, "grad_norm": 1.2731557167858414, "learning_rate": 4.064171122994653e-06, "loss": 0.6998, "step": 76 }, { "epoch": 0.01, "grad_norm": 1.3484428672133917, "learning_rate": 4.11764705882353e-06, "loss": 0.7932, "step": 77 }, { "epoch": 0.01, "grad_norm": 1.2909478782594066, "learning_rate": 4.171122994652407e-06, "loss": 0.7236, "step": 78 }, { "epoch": 0.01, "grad_norm": 0.7994965665573495, "learning_rate": 4.224598930481284e-06, "loss": 0.4944, "step": 79 }, { "epoch": 0.01, "grad_norm": 1.5323738271913132, "learning_rate": 4.2780748663101604e-06, "loss": 0.837, "step": 80 }, { "epoch": 0.01, "grad_norm": 1.2559640178659703, "learning_rate": 4.331550802139038e-06, "loss": 0.7898, "step": 81 }, { "epoch": 0.01, "grad_norm": 1.2156880135548962, "learning_rate": 4.385026737967915e-06, "loss": 0.7338, "step": 82 }, { "epoch": 0.01, "grad_norm": 1.2886913112014688, "learning_rate": 4.438502673796792e-06, "loss": 0.7038, "step": 83 }, { "epoch": 0.01, "grad_norm": 1.273326354954758, "learning_rate": 4.491978609625669e-06, "loss": 0.7561, "step": 84 }, { "epoch": 0.01, "grad_norm": 1.2916258313295492, "learning_rate": 4.5454545454545455e-06, "loss": 0.7931, "step": 85 }, { "epoch": 0.01, "grad_norm": 1.2597583771598537, "learning_rate": 4.598930481283423e-06, "loss": 0.6588, "step": 86 }, { "epoch": 0.01, "grad_norm": 1.2920208680762029, "learning_rate": 4.6524064171123e-06, "loss": 0.7691, "step": 87 }, { "epoch": 0.01, "grad_norm": 1.2720819534402383, "learning_rate": 4.705882352941177e-06, "loss": 0.8344, "step": 88 }, { "epoch": 0.01, "grad_norm": 1.2826249153403808, "learning_rate": 4.759358288770054e-06, "loss": 0.6944, "step": 89 }, { "epoch": 0.01, "grad_norm": 1.1577666803150262, "learning_rate": 4.812834224598931e-06, "loss": 0.728, "step": 90 }, { "epoch": 0.01, "grad_norm": 1.2905971202346285, "learning_rate": 4.866310160427808e-06, "loss": 0.6942, "step": 91 }, { "epoch": 0.01, "grad_norm": 1.3635752155344887, "learning_rate": 4.919786096256685e-06, "loss": 0.7546, "step": 92 }, { "epoch": 0.01, "grad_norm": 1.3662328636967387, "learning_rate": 4.973262032085562e-06, "loss": 0.7836, "step": 93 }, { "epoch": 0.02, "grad_norm": 1.4707915804076896, "learning_rate": 5.026737967914439e-06, "loss": 0.7372, "step": 94 }, { "epoch": 0.02, "grad_norm": 1.2849509484569301, "learning_rate": 5.0802139037433165e-06, "loss": 0.7776, "step": 95 }, { "epoch": 0.02, "grad_norm": 1.428058767538607, "learning_rate": 5.133689839572193e-06, "loss": 0.8066, "step": 96 }, { "epoch": 0.02, "grad_norm": 1.615922362802789, "learning_rate": 5.187165775401069e-06, "loss": 0.8649, "step": 97 }, { "epoch": 0.02, "grad_norm": 1.319482758837517, "learning_rate": 5.240641711229947e-06, "loss": 0.8037, "step": 98 }, { "epoch": 0.02, "grad_norm": 0.6801244635264085, "learning_rate": 5.294117647058824e-06, "loss": 0.481, "step": 99 }, { "epoch": 0.02, "grad_norm": 1.408729969060582, "learning_rate": 5.347593582887702e-06, "loss": 0.7608, "step": 100 }, { "epoch": 0.02, "grad_norm": 1.517835000841765, "learning_rate": 5.4010695187165785e-06, "loss": 0.8009, "step": 101 }, { "epoch": 0.02, "grad_norm": 1.3021444696294928, "learning_rate": 5.4545454545454545e-06, "loss": 0.7761, "step": 102 }, { "epoch": 0.02, "grad_norm": 1.3926050404746577, "learning_rate": 5.508021390374332e-06, "loss": 0.7868, "step": 103 }, { "epoch": 0.02, "grad_norm": 0.6832263550818355, "learning_rate": 5.561497326203209e-06, "loss": 0.4697, "step": 104 }, { "epoch": 0.02, "grad_norm": 1.3721074757572662, "learning_rate": 5.614973262032086e-06, "loss": 0.7854, "step": 105 }, { "epoch": 0.02, "grad_norm": 1.3098264456149247, "learning_rate": 5.6684491978609635e-06, "loss": 0.6518, "step": 106 }, { "epoch": 0.02, "grad_norm": 1.3750556327247605, "learning_rate": 5.7219251336898395e-06, "loss": 0.7788, "step": 107 }, { "epoch": 0.02, "grad_norm": 1.368039595492315, "learning_rate": 5.775401069518717e-06, "loss": 0.7205, "step": 108 }, { "epoch": 0.02, "grad_norm": 1.3117106157324974, "learning_rate": 5.828877005347594e-06, "loss": 0.7234, "step": 109 }, { "epoch": 0.02, "grad_norm": 1.4473178635658313, "learning_rate": 5.882352941176471e-06, "loss": 0.7516, "step": 110 }, { "epoch": 0.02, "grad_norm": 1.396316738422691, "learning_rate": 5.935828877005349e-06, "loss": 0.8079, "step": 111 }, { "epoch": 0.02, "grad_norm": 1.1715901374545499, "learning_rate": 5.989304812834225e-06, "loss": 0.5798, "step": 112 }, { "epoch": 0.02, "grad_norm": 1.388524502842756, "learning_rate": 6.0427807486631015e-06, "loss": 0.7906, "step": 113 }, { "epoch": 0.02, "grad_norm": 1.2982019855974833, "learning_rate": 6.096256684491979e-06, "loss": 0.7176, "step": 114 }, { "epoch": 0.02, "grad_norm": 1.338159896784674, "learning_rate": 6.149732620320856e-06, "loss": 0.7134, "step": 115 }, { "epoch": 0.02, "grad_norm": 1.2613799535421117, "learning_rate": 6.203208556149734e-06, "loss": 0.7265, "step": 116 }, { "epoch": 0.02, "grad_norm": 1.472526413330938, "learning_rate": 6.25668449197861e-06, "loss": 0.8516, "step": 117 }, { "epoch": 0.02, "grad_norm": 1.3388925592171055, "learning_rate": 6.3101604278074865e-06, "loss": 0.7541, "step": 118 }, { "epoch": 0.02, "grad_norm": 1.3693232759630902, "learning_rate": 6.363636363636364e-06, "loss": 0.7319, "step": 119 }, { "epoch": 0.02, "grad_norm": 1.217116268775871, "learning_rate": 6.417112299465241e-06, "loss": 0.6994, "step": 120 }, { "epoch": 0.02, "grad_norm": 1.383252591520476, "learning_rate": 6.470588235294119e-06, "loss": 0.7199, "step": 121 }, { "epoch": 0.02, "grad_norm": 1.2247793941666851, "learning_rate": 6.524064171122996e-06, "loss": 0.7144, "step": 122 }, { "epoch": 0.02, "grad_norm": 1.2817725807213516, "learning_rate": 6.577540106951872e-06, "loss": 0.7443, "step": 123 }, { "epoch": 0.02, "grad_norm": 1.2541881712292917, "learning_rate": 6.631016042780749e-06, "loss": 0.7827, "step": 124 }, { "epoch": 0.02, "grad_norm": 1.4904141052411564, "learning_rate": 6.684491978609626e-06, "loss": 0.785, "step": 125 }, { "epoch": 0.02, "grad_norm": 1.455037889026677, "learning_rate": 6.737967914438504e-06, "loss": 0.7385, "step": 126 }, { "epoch": 0.02, "grad_norm": 1.3587712236420801, "learning_rate": 6.791443850267381e-06, "loss": 0.7268, "step": 127 }, { "epoch": 0.02, "grad_norm": 1.3989330276607206, "learning_rate": 6.844919786096257e-06, "loss": 0.7274, "step": 128 }, { "epoch": 0.02, "grad_norm": 1.7253566527353816, "learning_rate": 6.898395721925134e-06, "loss": 0.7787, "step": 129 }, { "epoch": 0.02, "grad_norm": 1.296949252584265, "learning_rate": 6.951871657754011e-06, "loss": 0.6882, "step": 130 }, { "epoch": 0.02, "grad_norm": 1.2826085284747382, "learning_rate": 7.005347593582889e-06, "loss": 0.7839, "step": 131 }, { "epoch": 0.02, "grad_norm": 0.8080921523697212, "learning_rate": 7.058823529411766e-06, "loss": 0.4443, "step": 132 }, { "epoch": 0.02, "grad_norm": 1.3800525896378077, "learning_rate": 7.112299465240642e-06, "loss": 0.7355, "step": 133 }, { "epoch": 0.02, "grad_norm": 1.363840912423529, "learning_rate": 7.1657754010695195e-06, "loss": 0.6729, "step": 134 }, { "epoch": 0.02, "grad_norm": 1.2649157806773565, "learning_rate": 7.219251336898396e-06, "loss": 0.7261, "step": 135 }, { "epoch": 0.02, "grad_norm": 1.3214263402465884, "learning_rate": 7.272727272727273e-06, "loss": 0.7623, "step": 136 }, { "epoch": 0.02, "grad_norm": 1.288570252898323, "learning_rate": 7.326203208556151e-06, "loss": 0.7265, "step": 137 }, { "epoch": 0.02, "grad_norm": 1.2363944963995177, "learning_rate": 7.379679144385027e-06, "loss": 0.655, "step": 138 }, { "epoch": 0.02, "grad_norm": 1.4156804170487287, "learning_rate": 7.433155080213904e-06, "loss": 0.7961, "step": 139 }, { "epoch": 0.02, "grad_norm": 1.2384359160180567, "learning_rate": 7.486631016042781e-06, "loss": 0.7878, "step": 140 }, { "epoch": 0.02, "grad_norm": 1.3774744960531309, "learning_rate": 7.540106951871658e-06, "loss": 0.8762, "step": 141 }, { "epoch": 0.02, "grad_norm": 1.2386418951519893, "learning_rate": 7.593582887700536e-06, "loss": 0.7585, "step": 142 }, { "epoch": 0.02, "grad_norm": 1.3251484755237808, "learning_rate": 7.647058823529411e-06, "loss": 0.7012, "step": 143 }, { "epoch": 0.02, "grad_norm": 1.3869648836737731, "learning_rate": 7.70053475935829e-06, "loss": 0.7384, "step": 144 }, { "epoch": 0.02, "grad_norm": 1.2839819282631588, "learning_rate": 7.754010695187166e-06, "loss": 0.6685, "step": 145 }, { "epoch": 0.02, "grad_norm": 1.380634963113264, "learning_rate": 7.807486631016043e-06, "loss": 0.7416, "step": 146 }, { "epoch": 0.02, "grad_norm": 1.3516287254672021, "learning_rate": 7.86096256684492e-06, "loss": 0.6661, "step": 147 }, { "epoch": 0.02, "grad_norm": 1.476190394846508, "learning_rate": 7.914438502673799e-06, "loss": 0.8217, "step": 148 }, { "epoch": 0.02, "grad_norm": 1.3277349424446394, "learning_rate": 7.967914438502674e-06, "loss": 0.7373, "step": 149 }, { "epoch": 0.02, "grad_norm": 1.3610384964315203, "learning_rate": 8.02139037433155e-06, "loss": 0.7429, "step": 150 }, { "epoch": 0.02, "grad_norm": 1.1415142233931699, "learning_rate": 8.07486631016043e-06, "loss": 0.7019, "step": 151 }, { "epoch": 0.02, "grad_norm": 1.3150801069322091, "learning_rate": 8.128342245989306e-06, "loss": 0.6895, "step": 152 }, { "epoch": 0.02, "grad_norm": 1.0193872623524336, "learning_rate": 8.181818181818183e-06, "loss": 0.4849, "step": 153 }, { "epoch": 0.02, "grad_norm": 1.4244765728424607, "learning_rate": 8.23529411764706e-06, "loss": 0.7747, "step": 154 }, { "epoch": 0.02, "grad_norm": 1.3007303504370338, "learning_rate": 8.288770053475937e-06, "loss": 0.763, "step": 155 }, { "epoch": 0.03, "grad_norm": 1.2697650681744665, "learning_rate": 8.342245989304813e-06, "loss": 0.7737, "step": 156 }, { "epoch": 0.03, "grad_norm": 1.3963893836361554, "learning_rate": 8.39572192513369e-06, "loss": 0.7515, "step": 157 }, { "epoch": 0.03, "grad_norm": 1.3363467946483825, "learning_rate": 8.449197860962567e-06, "loss": 0.7604, "step": 158 }, { "epoch": 0.03, "grad_norm": 1.3274245686332713, "learning_rate": 8.502673796791444e-06, "loss": 0.8, "step": 159 }, { "epoch": 0.03, "grad_norm": 0.7717008133929879, "learning_rate": 8.556149732620321e-06, "loss": 0.5089, "step": 160 }, { "epoch": 0.03, "grad_norm": 1.291861767521327, "learning_rate": 8.609625668449198e-06, "loss": 0.7105, "step": 161 }, { "epoch": 0.03, "grad_norm": 1.2952891407984455, "learning_rate": 8.663101604278076e-06, "loss": 0.6938, "step": 162 }, { "epoch": 0.03, "grad_norm": 1.3635723537878723, "learning_rate": 8.716577540106953e-06, "loss": 0.7749, "step": 163 }, { "epoch": 0.03, "grad_norm": 1.4160132017337088, "learning_rate": 8.77005347593583e-06, "loss": 0.768, "step": 164 }, { "epoch": 0.03, "grad_norm": 1.3509135795582823, "learning_rate": 8.823529411764707e-06, "loss": 0.7451, "step": 165 }, { "epoch": 0.03, "grad_norm": 1.351803385991898, "learning_rate": 8.877005347593584e-06, "loss": 0.7246, "step": 166 }, { "epoch": 0.03, "grad_norm": 1.218080048051328, "learning_rate": 8.93048128342246e-06, "loss": 0.7194, "step": 167 }, { "epoch": 0.03, "grad_norm": 1.2640725233899797, "learning_rate": 8.983957219251337e-06, "loss": 0.6985, "step": 168 }, { "epoch": 0.03, "grad_norm": 1.2762845164377183, "learning_rate": 9.037433155080214e-06, "loss": 0.6798, "step": 169 }, { "epoch": 0.03, "grad_norm": 1.2810621410476162, "learning_rate": 9.090909090909091e-06, "loss": 0.6819, "step": 170 }, { "epoch": 0.03, "grad_norm": 1.3779930981716855, "learning_rate": 9.144385026737968e-06, "loss": 0.7396, "step": 171 }, { "epoch": 0.03, "grad_norm": 1.3778386495198012, "learning_rate": 9.197860962566846e-06, "loss": 0.7001, "step": 172 }, { "epoch": 0.03, "grad_norm": 1.4089047112988118, "learning_rate": 9.251336898395723e-06, "loss": 0.7553, "step": 173 }, { "epoch": 0.03, "grad_norm": 1.2853824849437605, "learning_rate": 9.3048128342246e-06, "loss": 0.7321, "step": 174 }, { "epoch": 0.03, "grad_norm": 1.3138606515454636, "learning_rate": 9.358288770053477e-06, "loss": 0.6608, "step": 175 }, { "epoch": 0.03, "grad_norm": 1.3648407254095352, "learning_rate": 9.411764705882354e-06, "loss": 0.7672, "step": 176 }, { "epoch": 0.03, "grad_norm": 1.3653948567640994, "learning_rate": 9.46524064171123e-06, "loss": 0.7931, "step": 177 }, { "epoch": 0.03, "grad_norm": 1.3093774998215235, "learning_rate": 9.518716577540108e-06, "loss": 0.7334, "step": 178 }, { "epoch": 0.03, "grad_norm": 1.273571084537043, "learning_rate": 9.572192513368986e-06, "loss": 0.656, "step": 179 }, { "epoch": 0.03, "grad_norm": 1.5505377475596862, "learning_rate": 9.625668449197861e-06, "loss": 0.8257, "step": 180 }, { "epoch": 0.03, "grad_norm": 1.5729070011195827, "learning_rate": 9.679144385026738e-06, "loss": 0.8037, "step": 181 }, { "epoch": 0.03, "grad_norm": 1.4547553138374314, "learning_rate": 9.732620320855617e-06, "loss": 0.478, "step": 182 }, { "epoch": 0.03, "grad_norm": 1.2513348841143141, "learning_rate": 9.786096256684493e-06, "loss": 0.6765, "step": 183 }, { "epoch": 0.03, "grad_norm": 1.3627686835443136, "learning_rate": 9.83957219251337e-06, "loss": 0.7556, "step": 184 }, { "epoch": 0.03, "grad_norm": 0.774634246259936, "learning_rate": 9.893048128342247e-06, "loss": 0.4749, "step": 185 }, { "epoch": 0.03, "grad_norm": 1.397399587792225, "learning_rate": 9.946524064171124e-06, "loss": 0.7883, "step": 186 }, { "epoch": 0.03, "grad_norm": 1.367808901959227, "learning_rate": 1e-05, "loss": 0.7913, "step": 187 }, { "epoch": 0.03, "grad_norm": 1.25082090581326, "learning_rate": 9.999999318931088e-06, "loss": 0.6764, "step": 188 }, { "epoch": 0.03, "grad_norm": 1.331214831242888, "learning_rate": 9.999997275724535e-06, "loss": 0.7612, "step": 189 }, { "epoch": 0.03, "grad_norm": 1.4249493714320949, "learning_rate": 9.999993870380897e-06, "loss": 0.6491, "step": 190 }, { "epoch": 0.03, "grad_norm": 1.1990182316559028, "learning_rate": 9.999989102901105e-06, "loss": 0.7281, "step": 191 }, { "epoch": 0.03, "grad_norm": 1.3438234474302149, "learning_rate": 9.999982973286455e-06, "loss": 0.7064, "step": 192 }, { "epoch": 0.03, "grad_norm": 1.2564493552885914, "learning_rate": 9.999975481538618e-06, "loss": 0.6813, "step": 193 }, { "epoch": 0.03, "grad_norm": 1.272763795139657, "learning_rate": 9.999966627659635e-06, "loss": 0.6266, "step": 194 }, { "epoch": 0.03, "grad_norm": 1.2134451906590265, "learning_rate": 9.999956411651916e-06, "loss": 0.6801, "step": 195 }, { "epoch": 0.03, "grad_norm": 1.3448442838532517, "learning_rate": 9.999944833518248e-06, "loss": 0.6677, "step": 196 }, { "epoch": 0.03, "grad_norm": 1.3835673263159791, "learning_rate": 9.999931893261783e-06, "loss": 0.8111, "step": 197 }, { "epoch": 0.03, "grad_norm": 1.4606603551203912, "learning_rate": 9.999917590886046e-06, "loss": 0.7676, "step": 198 }, { "epoch": 0.03, "grad_norm": 1.374405927822037, "learning_rate": 9.999901926394932e-06, "loss": 0.7414, "step": 199 }, { "epoch": 0.03, "grad_norm": 1.3730441365852943, "learning_rate": 9.99988489979271e-06, "loss": 0.5905, "step": 200 }, { "epoch": 0.03, "grad_norm": 1.3257743411629859, "learning_rate": 9.999866511084021e-06, "loss": 0.7766, "step": 201 }, { "epoch": 0.03, "grad_norm": 1.1854186441967431, "learning_rate": 9.999846760273873e-06, "loss": 0.7184, "step": 202 }, { "epoch": 0.03, "grad_norm": 1.3439684911943945, "learning_rate": 9.999825647367643e-06, "loss": 0.7614, "step": 203 }, { "epoch": 0.03, "grad_norm": 1.335934880678688, "learning_rate": 9.999803172371088e-06, "loss": 0.786, "step": 204 }, { "epoch": 0.03, "grad_norm": 1.4174458318046552, "learning_rate": 9.999779335290328e-06, "loss": 0.7289, "step": 205 }, { "epoch": 0.03, "grad_norm": 0.8889855242307784, "learning_rate": 9.999754136131855e-06, "loss": 0.491, "step": 206 }, { "epoch": 0.03, "grad_norm": 1.2834334506221448, "learning_rate": 9.999727574902538e-06, "loss": 0.6817, "step": 207 }, { "epoch": 0.03, "grad_norm": 1.3013454465054761, "learning_rate": 9.999699651609611e-06, "loss": 0.7477, "step": 208 }, { "epoch": 0.03, "grad_norm": 1.2342997822910966, "learning_rate": 9.999670366260682e-06, "loss": 0.7318, "step": 209 }, { "epoch": 0.03, "grad_norm": 1.4344589829959105, "learning_rate": 9.999639718863728e-06, "loss": 0.743, "step": 210 }, { "epoch": 0.03, "grad_norm": 1.1760729067482503, "learning_rate": 9.9996077094271e-06, "loss": 0.7114, "step": 211 }, { "epoch": 0.03, "grad_norm": 1.2317896448799757, "learning_rate": 9.999574337959514e-06, "loss": 0.7212, "step": 212 }, { "epoch": 0.03, "grad_norm": 1.387643872824135, "learning_rate": 9.999539604470068e-06, "loss": 0.7768, "step": 213 }, { "epoch": 0.03, "grad_norm": 1.5577030539795724, "learning_rate": 9.999503508968218e-06, "loss": 0.7212, "step": 214 }, { "epoch": 0.03, "grad_norm": 1.2210243091792707, "learning_rate": 9.9994660514638e-06, "loss": 0.6915, "step": 215 }, { "epoch": 0.03, "grad_norm": 1.275254204000143, "learning_rate": 9.99942723196702e-06, "loss": 0.7326, "step": 216 }, { "epoch": 0.03, "grad_norm": 1.2571978728202617, "learning_rate": 9.999387050488451e-06, "loss": 0.5764, "step": 217 }, { "epoch": 0.04, "grad_norm": 1.1723890682837121, "learning_rate": 9.99934550703904e-06, "loss": 0.6662, "step": 218 }, { "epoch": 0.04, "grad_norm": 1.225134245438625, "learning_rate": 9.999302601630106e-06, "loss": 0.7359, "step": 219 }, { "epoch": 0.04, "grad_norm": 1.2198619424501254, "learning_rate": 9.999258334273338e-06, "loss": 0.6699, "step": 220 }, { "epoch": 0.04, "grad_norm": 0.8867795004181716, "learning_rate": 9.999212704980792e-06, "loss": 0.4979, "step": 221 }, { "epoch": 0.04, "grad_norm": 1.398722762579555, "learning_rate": 9.999165713764902e-06, "loss": 0.7379, "step": 222 }, { "epoch": 0.04, "grad_norm": 1.5025694669167649, "learning_rate": 9.999117360638469e-06, "loss": 0.7499, "step": 223 }, { "epoch": 0.04, "grad_norm": 1.497817607359326, "learning_rate": 9.999067645614666e-06, "loss": 0.8253, "step": 224 }, { "epoch": 0.04, "grad_norm": 1.1717524039805163, "learning_rate": 9.999016568707036e-06, "loss": 0.7384, "step": 225 }, { "epoch": 0.04, "grad_norm": 0.6730693707993181, "learning_rate": 9.998964129929493e-06, "loss": 0.4905, "step": 226 }, { "epoch": 0.04, "grad_norm": 1.2642786064229417, "learning_rate": 9.998910329296322e-06, "loss": 0.789, "step": 227 }, { "epoch": 0.04, "grad_norm": 1.2876519614802493, "learning_rate": 9.998855166822186e-06, "loss": 0.7156, "step": 228 }, { "epoch": 0.04, "grad_norm": 1.3026579801479032, "learning_rate": 9.998798642522105e-06, "loss": 0.7563, "step": 229 }, { "epoch": 0.04, "grad_norm": 1.5017436593551474, "learning_rate": 9.998740756411483e-06, "loss": 0.7207, "step": 230 }, { "epoch": 0.04, "grad_norm": 1.266400053072259, "learning_rate": 9.998681508506087e-06, "loss": 0.648, "step": 231 }, { "epoch": 0.04, "grad_norm": 1.1864505088783366, "learning_rate": 9.998620898822059e-06, "loss": 0.6951, "step": 232 }, { "epoch": 0.04, "grad_norm": 1.3855125754652082, "learning_rate": 9.998558927375909e-06, "loss": 0.6922, "step": 233 }, { "epoch": 0.04, "grad_norm": 1.2175747229109015, "learning_rate": 9.998495594184523e-06, "loss": 0.6695, "step": 234 }, { "epoch": 0.04, "grad_norm": 1.276390781773472, "learning_rate": 9.998430899265152e-06, "loss": 0.6748, "step": 235 }, { "epoch": 0.04, "grad_norm": 1.2407184959699031, "learning_rate": 9.998364842635422e-06, "loss": 0.6653, "step": 236 }, { "epoch": 0.04, "grad_norm": 1.2556164713929225, "learning_rate": 9.998297424313327e-06, "loss": 0.6495, "step": 237 }, { "epoch": 0.04, "grad_norm": 1.4368173970617737, "learning_rate": 9.998228644317235e-06, "loss": 0.77, "step": 238 }, { "epoch": 0.04, "grad_norm": 1.3309990949955843, "learning_rate": 9.998158502665884e-06, "loss": 0.7261, "step": 239 }, { "epoch": 0.04, "grad_norm": 1.2526749234192738, "learning_rate": 9.99808699937838e-06, "loss": 0.701, "step": 240 }, { "epoch": 0.04, "grad_norm": 1.3372315522059617, "learning_rate": 9.998014134474207e-06, "loss": 0.6976, "step": 241 }, { "epoch": 0.04, "grad_norm": 1.3471462521866626, "learning_rate": 9.997939907973212e-06, "loss": 0.7213, "step": 242 }, { "epoch": 0.04, "grad_norm": 1.2306654806999384, "learning_rate": 9.997864319895616e-06, "loss": 0.7087, "step": 243 }, { "epoch": 0.04, "grad_norm": 0.858364792336332, "learning_rate": 9.997787370262012e-06, "loss": 0.4779, "step": 244 }, { "epoch": 0.04, "grad_norm": 0.76013363356788, "learning_rate": 9.997709059093364e-06, "loss": 0.4854, "step": 245 }, { "epoch": 0.04, "grad_norm": 1.2988792592854377, "learning_rate": 9.997629386411006e-06, "loss": 0.7282, "step": 246 }, { "epoch": 0.04, "grad_norm": 1.4542539405542785, "learning_rate": 9.997548352236644e-06, "loss": 0.7274, "step": 247 }, { "epoch": 0.04, "grad_norm": 1.320286546988568, "learning_rate": 9.99746595659235e-06, "loss": 0.7365, "step": 248 }, { "epoch": 0.04, "grad_norm": 1.309978405328875, "learning_rate": 9.997382199500577e-06, "loss": 0.7659, "step": 249 }, { "epoch": 0.04, "grad_norm": 1.1707539596309837, "learning_rate": 9.997297080984136e-06, "loss": 0.71, "step": 250 }, { "epoch": 0.04, "grad_norm": 1.3798793183963456, "learning_rate": 9.997210601066218e-06, "loss": 0.6374, "step": 251 }, { "epoch": 0.04, "grad_norm": 2.4811551224463737, "learning_rate": 9.997122759770386e-06, "loss": 0.7478, "step": 252 }, { "epoch": 0.04, "grad_norm": 1.3058697719571093, "learning_rate": 9.997033557120567e-06, "loss": 0.7421, "step": 253 }, { "epoch": 0.04, "grad_norm": 1.244869644724416, "learning_rate": 9.996942993141063e-06, "loss": 0.6853, "step": 254 }, { "epoch": 0.04, "grad_norm": 1.450253976156751, "learning_rate": 9.996851067856546e-06, "loss": 0.519, "step": 255 }, { "epoch": 0.04, "grad_norm": 1.1749160681368627, "learning_rate": 9.996757781292058e-06, "loss": 0.6864, "step": 256 }, { "epoch": 0.04, "grad_norm": 1.5588788302091203, "learning_rate": 9.996663133473017e-06, "loss": 0.6967, "step": 257 }, { "epoch": 0.04, "grad_norm": 1.3148715392780708, "learning_rate": 9.996567124425201e-06, "loss": 0.6698, "step": 258 }, { "epoch": 0.04, "grad_norm": 1.3213244835498843, "learning_rate": 9.996469754174772e-06, "loss": 0.7199, "step": 259 }, { "epoch": 0.04, "grad_norm": 1.2947213163042959, "learning_rate": 9.996371022748251e-06, "loss": 0.7128, "step": 260 }, { "epoch": 0.04, "grad_norm": 1.3156315734077115, "learning_rate": 9.99627093017254e-06, "loss": 0.6567, "step": 261 }, { "epoch": 0.04, "grad_norm": 1.3371295027852197, "learning_rate": 9.996169476474902e-06, "loss": 0.75, "step": 262 }, { "epoch": 0.04, "grad_norm": 1.2616705578376781, "learning_rate": 9.996066661682981e-06, "loss": 0.6927, "step": 263 }, { "epoch": 0.04, "grad_norm": 1.3571985209697057, "learning_rate": 9.995962485824783e-06, "loss": 0.7451, "step": 264 }, { "epoch": 0.04, "grad_norm": 1.230141529118125, "learning_rate": 9.995856948928688e-06, "loss": 0.7215, "step": 265 }, { "epoch": 0.04, "grad_norm": 1.2267964009778596, "learning_rate": 9.99575005102345e-06, "loss": 0.651, "step": 266 }, { "epoch": 0.04, "grad_norm": 1.2887761207584736, "learning_rate": 9.99564179213819e-06, "loss": 0.7504, "step": 267 }, { "epoch": 0.04, "grad_norm": 1.2962040637810222, "learning_rate": 9.995532172302399e-06, "loss": 0.6948, "step": 268 }, { "epoch": 0.04, "grad_norm": 1.2769936535782778, "learning_rate": 9.995421191545942e-06, "loss": 0.7075, "step": 269 }, { "epoch": 0.04, "grad_norm": 1.4286561027893085, "learning_rate": 9.995308849899052e-06, "loss": 0.8251, "step": 270 }, { "epoch": 0.04, "grad_norm": 1.3072936066939465, "learning_rate": 9.995195147392335e-06, "loss": 0.7475, "step": 271 }, { "epoch": 0.04, "grad_norm": 1.2369123118560394, "learning_rate": 9.995080084056767e-06, "loss": 0.6701, "step": 272 }, { "epoch": 0.04, "grad_norm": 1.2085389329504574, "learning_rate": 9.994963659923695e-06, "loss": 0.6256, "step": 273 }, { "epoch": 0.04, "grad_norm": 1.1643315502173501, "learning_rate": 9.994845875024834e-06, "loss": 0.6593, "step": 274 }, { "epoch": 0.04, "grad_norm": 1.286554009304387, "learning_rate": 9.994726729392272e-06, "loss": 0.7334, "step": 275 }, { "epoch": 0.04, "grad_norm": 0.8743332726561144, "learning_rate": 9.99460622305847e-06, "loss": 0.5115, "step": 276 }, { "epoch": 0.04, "grad_norm": 1.2680546273089717, "learning_rate": 9.994484356056256e-06, "loss": 0.7543, "step": 277 }, { "epoch": 0.04, "grad_norm": 0.7943082300961309, "learning_rate": 9.994361128418828e-06, "loss": 0.505, "step": 278 }, { "epoch": 0.04, "grad_norm": 1.2025177145253436, "learning_rate": 9.99423654017976e-06, "loss": 0.6858, "step": 279 }, { "epoch": 0.05, "grad_norm": 1.3822213681884803, "learning_rate": 9.99411059137299e-06, "loss": 0.7706, "step": 280 }, { "epoch": 0.05, "grad_norm": 1.2644026740402285, "learning_rate": 9.993983282032831e-06, "loss": 0.6906, "step": 281 }, { "epoch": 0.05, "grad_norm": 1.3744922785419906, "learning_rate": 9.993854612193967e-06, "loss": 0.7457, "step": 282 }, { "epoch": 0.05, "grad_norm": 1.2012060722321076, "learning_rate": 9.993724581891451e-06, "loss": 0.6845, "step": 283 }, { "epoch": 0.05, "grad_norm": 1.2314007641045293, "learning_rate": 9.993593191160704e-06, "loss": 0.682, "step": 284 }, { "epoch": 0.05, "grad_norm": 1.1716279672060668, "learning_rate": 9.993460440037525e-06, "loss": 0.7052, "step": 285 }, { "epoch": 0.05, "grad_norm": 1.2840643927613349, "learning_rate": 9.993326328558076e-06, "loss": 0.6667, "step": 286 }, { "epoch": 0.05, "grad_norm": 1.2553897101143476, "learning_rate": 9.993190856758892e-06, "loss": 0.6474, "step": 287 }, { "epoch": 0.05, "grad_norm": 1.2342484627628807, "learning_rate": 9.99305402467688e-06, "loss": 0.7231, "step": 288 }, { "epoch": 0.05, "grad_norm": 1.2506166392693183, "learning_rate": 9.99291583234932e-06, "loss": 0.7158, "step": 289 }, { "epoch": 0.05, "grad_norm": 1.2799363415300542, "learning_rate": 9.992776279813854e-06, "loss": 0.7628, "step": 290 }, { "epoch": 0.05, "grad_norm": 1.319625011570069, "learning_rate": 9.992635367108505e-06, "loss": 0.7417, "step": 291 }, { "epoch": 0.05, "grad_norm": 1.0637254628784878, "learning_rate": 9.992493094271657e-06, "loss": 0.6807, "step": 292 }, { "epoch": 0.05, "grad_norm": 1.2890203754510048, "learning_rate": 9.992349461342073e-06, "loss": 0.7077, "step": 293 }, { "epoch": 0.05, "grad_norm": 1.2287410655645907, "learning_rate": 9.992204468358879e-06, "loss": 0.7278, "step": 294 }, { "epoch": 0.05, "grad_norm": 1.2638386137239284, "learning_rate": 9.992058115361578e-06, "loss": 0.7108, "step": 295 }, { "epoch": 0.05, "grad_norm": 1.2729022475181637, "learning_rate": 9.991910402390041e-06, "loss": 0.695, "step": 296 }, { "epoch": 0.05, "grad_norm": 1.2496925396617156, "learning_rate": 9.991761329484505e-06, "loss": 0.657, "step": 297 }, { "epoch": 0.05, "grad_norm": 1.2540515841751168, "learning_rate": 9.991610896685587e-06, "loss": 0.7011, "step": 298 }, { "epoch": 0.05, "grad_norm": 1.2247738160047426, "learning_rate": 9.991459104034262e-06, "loss": 0.5431, "step": 299 }, { "epoch": 0.05, "grad_norm": 1.324998754958549, "learning_rate": 9.991305951571891e-06, "loss": 0.6445, "step": 300 }, { "epoch": 0.05, "grad_norm": 1.3194273049410405, "learning_rate": 9.99115143934019e-06, "loss": 0.6805, "step": 301 }, { "epoch": 0.05, "grad_norm": 1.1463422929971323, "learning_rate": 9.990995567381255e-06, "loss": 0.6931, "step": 302 }, { "epoch": 0.05, "grad_norm": 1.186861454033246, "learning_rate": 9.990838335737551e-06, "loss": 0.692, "step": 303 }, { "epoch": 0.05, "grad_norm": 1.188753937804532, "learning_rate": 9.990679744451909e-06, "loss": 0.6522, "step": 304 }, { "epoch": 0.05, "grad_norm": 1.3695028124417525, "learning_rate": 9.990519793567539e-06, "loss": 0.696, "step": 305 }, { "epoch": 0.05, "grad_norm": 1.212641102232149, "learning_rate": 9.990358483128012e-06, "loss": 0.7645, "step": 306 }, { "epoch": 0.05, "grad_norm": 1.3418494191959847, "learning_rate": 9.990195813177272e-06, "loss": 0.6759, "step": 307 }, { "epoch": 0.05, "grad_norm": 1.1394146441246351, "learning_rate": 9.99003178375964e-06, "loss": 0.6743, "step": 308 }, { "epoch": 0.05, "grad_norm": 1.2084250380038932, "learning_rate": 9.989866394919795e-06, "loss": 0.649, "step": 309 }, { "epoch": 0.05, "grad_norm": 1.26929032529933, "learning_rate": 9.9896996467028e-06, "loss": 0.6499, "step": 310 }, { "epoch": 0.05, "grad_norm": 1.1390710108289281, "learning_rate": 9.98953153915408e-06, "loss": 0.6479, "step": 311 }, { "epoch": 0.05, "grad_norm": 1.1445823212644295, "learning_rate": 9.989362072319431e-06, "loss": 0.6189, "step": 312 }, { "epoch": 0.05, "grad_norm": 1.2464565876993572, "learning_rate": 9.98919124624502e-06, "loss": 0.769, "step": 313 }, { "epoch": 0.05, "grad_norm": 1.2782427600503015, "learning_rate": 9.989019060977388e-06, "loss": 0.7443, "step": 314 }, { "epoch": 0.05, "grad_norm": 1.2435394254511745, "learning_rate": 9.988845516563437e-06, "loss": 0.7197, "step": 315 }, { "epoch": 0.05, "grad_norm": 1.200401468957928, "learning_rate": 9.988670613050452e-06, "loss": 0.74, "step": 316 }, { "epoch": 0.05, "grad_norm": 1.1479829406518383, "learning_rate": 9.988494350486077e-06, "loss": 0.7472, "step": 317 }, { "epoch": 0.05, "grad_norm": 1.2188447733257157, "learning_rate": 9.988316728918332e-06, "loss": 0.6729, "step": 318 }, { "epoch": 0.05, "grad_norm": 1.2380019122646702, "learning_rate": 9.988137748395608e-06, "loss": 0.647, "step": 319 }, { "epoch": 0.05, "grad_norm": 1.2113814365732802, "learning_rate": 9.987957408966662e-06, "loss": 0.6883, "step": 320 }, { "epoch": 0.05, "grad_norm": 1.1836754608407634, "learning_rate": 9.987775710680622e-06, "loss": 0.6861, "step": 321 }, { "epoch": 0.05, "grad_norm": 1.2513729213480607, "learning_rate": 9.987592653586992e-06, "loss": 0.7145, "step": 322 }, { "epoch": 0.05, "grad_norm": 1.148049920875932, "learning_rate": 9.987408237735638e-06, "loss": 0.7364, "step": 323 }, { "epoch": 0.05, "grad_norm": 1.1887268144403802, "learning_rate": 9.987222463176803e-06, "loss": 0.6854, "step": 324 }, { "epoch": 0.05, "grad_norm": 1.110222455442517, "learning_rate": 9.987035329961092e-06, "loss": 0.7447, "step": 325 }, { "epoch": 0.05, "grad_norm": 0.7164103752519383, "learning_rate": 9.986846838139492e-06, "loss": 0.5111, "step": 326 }, { "epoch": 0.05, "grad_norm": 1.2586794855633905, "learning_rate": 9.986656987763348e-06, "loss": 0.7445, "step": 327 }, { "epoch": 0.05, "grad_norm": 1.2050419814495252, "learning_rate": 9.986465778884384e-06, "loss": 0.7948, "step": 328 }, { "epoch": 0.05, "grad_norm": 1.2529472693071761, "learning_rate": 9.986273211554689e-06, "loss": 0.6765, "step": 329 }, { "epoch": 0.05, "grad_norm": 1.254394734298952, "learning_rate": 9.986079285826721e-06, "loss": 0.7061, "step": 330 }, { "epoch": 0.05, "grad_norm": 1.2710035041481733, "learning_rate": 9.985884001753317e-06, "loss": 0.6215, "step": 331 }, { "epoch": 0.05, "grad_norm": 1.2344446175458699, "learning_rate": 9.985687359387673e-06, "loss": 0.6688, "step": 332 }, { "epoch": 0.05, "grad_norm": 1.1742468375895199, "learning_rate": 9.98548935878336e-06, "loss": 0.7463, "step": 333 }, { "epoch": 0.05, "grad_norm": 1.1548595916777202, "learning_rate": 9.985289999994322e-06, "loss": 0.6193, "step": 334 }, { "epoch": 0.05, "grad_norm": 1.319122302898996, "learning_rate": 9.985089283074867e-06, "loss": 0.724, "step": 335 }, { "epoch": 0.05, "grad_norm": 1.1855380992583031, "learning_rate": 9.984887208079675e-06, "loss": 0.6728, "step": 336 }, { "epoch": 0.05, "grad_norm": 1.2140268826406544, "learning_rate": 9.9846837750638e-06, "loss": 0.712, "step": 337 }, { "epoch": 0.05, "grad_norm": 1.1946992342967895, "learning_rate": 9.98447898408266e-06, "loss": 0.6431, "step": 338 }, { "epoch": 0.05, "grad_norm": 1.0848472049848554, "learning_rate": 9.984272835192047e-06, "loss": 0.6256, "step": 339 }, { "epoch": 0.05, "grad_norm": 1.264192562721289, "learning_rate": 9.984065328448122e-06, "loss": 0.7262, "step": 340 }, { "epoch": 0.05, "grad_norm": 1.2867666404364067, "learning_rate": 9.983856463907415e-06, "loss": 0.6994, "step": 341 }, { "epoch": 0.06, "grad_norm": 1.2417529865863632, "learning_rate": 9.983646241626825e-06, "loss": 0.6457, "step": 342 }, { "epoch": 0.06, "grad_norm": 1.1971415238568486, "learning_rate": 9.983434661663625e-06, "loss": 0.6605, "step": 343 }, { "epoch": 0.06, "grad_norm": 1.2041269107631913, "learning_rate": 9.983221724075453e-06, "loss": 0.7163, "step": 344 }, { "epoch": 0.06, "grad_norm": 1.1926985043339906, "learning_rate": 9.983007428920322e-06, "loss": 0.7127, "step": 345 }, { "epoch": 0.06, "grad_norm": 1.2582674549204695, "learning_rate": 9.982791776256608e-06, "loss": 0.6459, "step": 346 }, { "epoch": 0.06, "grad_norm": 1.1460708432876627, "learning_rate": 9.982574766143063e-06, "loss": 0.757, "step": 347 }, { "epoch": 0.06, "grad_norm": 1.123781321365631, "learning_rate": 9.982356398638807e-06, "loss": 0.7232, "step": 348 }, { "epoch": 0.06, "grad_norm": 1.2774526181564465, "learning_rate": 9.982136673803328e-06, "loss": 0.6972, "step": 349 }, { "epoch": 0.06, "grad_norm": 1.2228251539912751, "learning_rate": 9.981915591696484e-06, "loss": 0.7134, "step": 350 }, { "epoch": 0.06, "grad_norm": 1.191353005764966, "learning_rate": 9.981693152378509e-06, "loss": 0.6868, "step": 351 }, { "epoch": 0.06, "grad_norm": 1.3557416164999287, "learning_rate": 9.981469355909996e-06, "loss": 0.7573, "step": 352 }, { "epoch": 0.06, "grad_norm": 1.2469418817573794, "learning_rate": 9.981244202351916e-06, "loss": 0.7498, "step": 353 }, { "epoch": 0.06, "grad_norm": 1.1984908296886687, "learning_rate": 9.981017691765606e-06, "loss": 0.7511, "step": 354 }, { "epoch": 0.06, "grad_norm": 1.2004419272130282, "learning_rate": 9.980789824212776e-06, "loss": 0.6611, "step": 355 }, { "epoch": 0.06, "grad_norm": 1.3811575729777732, "learning_rate": 9.980560599755498e-06, "loss": 0.6956, "step": 356 }, { "epoch": 0.06, "grad_norm": 1.1814425693792978, "learning_rate": 9.980330018456227e-06, "loss": 0.7598, "step": 357 }, { "epoch": 0.06, "grad_norm": 1.198696177946858, "learning_rate": 9.980098080377771e-06, "loss": 0.653, "step": 358 }, { "epoch": 0.06, "grad_norm": 1.1270488798787484, "learning_rate": 9.979864785583325e-06, "loss": 0.611, "step": 359 }, { "epoch": 0.06, "grad_norm": 1.1764837161047377, "learning_rate": 9.979630134136438e-06, "loss": 0.663, "step": 360 }, { "epoch": 0.06, "grad_norm": 1.242448318393586, "learning_rate": 9.979394126101039e-06, "loss": 0.7504, "step": 361 }, { "epoch": 0.06, "grad_norm": 1.1962281862047308, "learning_rate": 9.979156761541421e-06, "loss": 0.7183, "step": 362 }, { "epoch": 0.06, "grad_norm": 1.146547701924311, "learning_rate": 9.978918040522249e-06, "loss": 0.6336, "step": 363 }, { "epoch": 0.06, "grad_norm": 1.1278651084233722, "learning_rate": 9.97867796310856e-06, "loss": 0.6716, "step": 364 }, { "epoch": 0.06, "grad_norm": 1.2563055824238085, "learning_rate": 9.978436529365757e-06, "loss": 0.7644, "step": 365 }, { "epoch": 0.06, "grad_norm": 1.171095445121014, "learning_rate": 9.978193739359611e-06, "loss": 0.6609, "step": 366 }, { "epoch": 0.06, "grad_norm": 1.2087903631779031, "learning_rate": 9.977949593156264e-06, "loss": 0.6213, "step": 367 }, { "epoch": 0.06, "grad_norm": 1.2787873813634485, "learning_rate": 9.977704090822232e-06, "loss": 0.7989, "step": 368 }, { "epoch": 0.06, "grad_norm": 1.21544841841805, "learning_rate": 9.977457232424394e-06, "loss": 0.6722, "step": 369 }, { "epoch": 0.06, "grad_norm": 1.2607813552972889, "learning_rate": 9.97720901803e-06, "loss": 0.6314, "step": 370 }, { "epoch": 0.06, "grad_norm": 1.28256845878936, "learning_rate": 9.976959447706673e-06, "loss": 0.6381, "step": 371 }, { "epoch": 0.06, "grad_norm": 1.1297128466140272, "learning_rate": 9.976708521522403e-06, "loss": 0.6831, "step": 372 }, { "epoch": 0.06, "grad_norm": 1.1535140281730443, "learning_rate": 9.976456239545547e-06, "loss": 0.6002, "step": 373 }, { "epoch": 0.06, "grad_norm": 1.2673336966957607, "learning_rate": 9.976202601844834e-06, "loss": 0.6539, "step": 374 }, { "epoch": 0.06, "grad_norm": 1.3158001121530785, "learning_rate": 9.975947608489363e-06, "loss": 0.6368, "step": 375 }, { "epoch": 0.06, "grad_norm": 1.1545330178386846, "learning_rate": 9.975691259548598e-06, "loss": 0.7716, "step": 376 }, { "epoch": 0.06, "grad_norm": 1.3194081951710745, "learning_rate": 9.975433555092383e-06, "loss": 0.7104, "step": 377 }, { "epoch": 0.06, "grad_norm": 1.1234745072307746, "learning_rate": 9.975174495190915e-06, "loss": 0.6063, "step": 378 }, { "epoch": 0.06, "grad_norm": 1.2047813612524296, "learning_rate": 9.974914079914775e-06, "loss": 0.6314, "step": 379 }, { "epoch": 0.06, "grad_norm": 1.138228514572019, "learning_rate": 9.974652309334904e-06, "loss": 0.7392, "step": 380 }, { "epoch": 0.06, "grad_norm": 1.07569534927839, "learning_rate": 9.974389183522618e-06, "loss": 0.6504, "step": 381 }, { "epoch": 0.06, "grad_norm": 1.2343228426442545, "learning_rate": 9.9741247025496e-06, "loss": 0.6937, "step": 382 }, { "epoch": 0.06, "grad_norm": 1.1867179341443375, "learning_rate": 9.973858866487898e-06, "loss": 0.6007, "step": 383 }, { "epoch": 0.06, "grad_norm": 1.2396305234459037, "learning_rate": 9.973591675409934e-06, "loss": 0.6993, "step": 384 }, { "epoch": 0.06, "grad_norm": 1.2694710080055773, "learning_rate": 9.973323129388504e-06, "loss": 0.7064, "step": 385 }, { "epoch": 0.06, "grad_norm": 1.2080871696969717, "learning_rate": 9.97305322849676e-06, "loss": 0.6582, "step": 386 }, { "epoch": 0.06, "grad_norm": 1.1073413750342909, "learning_rate": 9.972781972808234e-06, "loss": 0.6735, "step": 387 }, { "epoch": 0.06, "grad_norm": 1.2541194215675302, "learning_rate": 9.972509362396825e-06, "loss": 0.6176, "step": 388 }, { "epoch": 0.06, "grad_norm": 1.047829915296732, "learning_rate": 9.972235397336796e-06, "loss": 0.6028, "step": 389 }, { "epoch": 0.06, "grad_norm": 1.1984323437974636, "learning_rate": 9.971960077702785e-06, "loss": 0.745, "step": 390 }, { "epoch": 0.06, "grad_norm": 1.3560946586338647, "learning_rate": 9.971683403569795e-06, "loss": 0.7396, "step": 391 }, { "epoch": 0.06, "grad_norm": 1.1503300529728597, "learning_rate": 9.971405375013201e-06, "loss": 0.7073, "step": 392 }, { "epoch": 0.06, "grad_norm": 1.31364021338241, "learning_rate": 9.971125992108746e-06, "loss": 0.639, "step": 393 }, { "epoch": 0.06, "grad_norm": 1.2196416543938773, "learning_rate": 9.97084525493254e-06, "loss": 0.6013, "step": 394 }, { "epoch": 0.06, "grad_norm": 1.141087670297583, "learning_rate": 9.970563163561064e-06, "loss": 0.742, "step": 395 }, { "epoch": 0.06, "grad_norm": 1.2652218610208563, "learning_rate": 9.970279718071169e-06, "loss": 0.7436, "step": 396 }, { "epoch": 0.06, "grad_norm": 1.141684970935003, "learning_rate": 9.969994918540071e-06, "loss": 0.6565, "step": 397 }, { "epoch": 0.06, "grad_norm": 1.3109552880370567, "learning_rate": 9.969708765045361e-06, "loss": 0.6552, "step": 398 }, { "epoch": 0.06, "grad_norm": 1.2450928216019495, "learning_rate": 9.96942125766499e-06, "loss": 0.6907, "step": 399 }, { "epoch": 0.06, "grad_norm": 1.1927129228959974, "learning_rate": 9.969132396477286e-06, "loss": 0.7036, "step": 400 }, { "epoch": 0.06, "grad_norm": 1.1982424181724343, "learning_rate": 9.968842181560943e-06, "loss": 0.677, "step": 401 }, { "epoch": 0.06, "grad_norm": 1.2059255747498785, "learning_rate": 9.968550612995023e-06, "loss": 0.6207, "step": 402 }, { "epoch": 0.06, "grad_norm": 1.2126575816916907, "learning_rate": 9.968257690858955e-06, "loss": 0.6298, "step": 403 }, { "epoch": 0.07, "grad_norm": 1.2782251670578761, "learning_rate": 9.967963415232544e-06, "loss": 0.7582, "step": 404 }, { "epoch": 0.07, "grad_norm": 1.1486482386443593, "learning_rate": 9.967667786195955e-06, "loss": 0.5538, "step": 405 }, { "epoch": 0.07, "grad_norm": 1.2451201136556358, "learning_rate": 9.967370803829725e-06, "loss": 0.6924, "step": 406 }, { "epoch": 0.07, "grad_norm": 1.157220433884935, "learning_rate": 9.967072468214763e-06, "loss": 0.6856, "step": 407 }, { "epoch": 0.07, "grad_norm": 0.6575462354452141, "learning_rate": 9.966772779432343e-06, "loss": 0.5274, "step": 408 }, { "epoch": 0.07, "grad_norm": 1.2437295089429923, "learning_rate": 9.966471737564107e-06, "loss": 0.611, "step": 409 }, { "epoch": 0.07, "grad_norm": 1.2018431787770065, "learning_rate": 9.96616934269207e-06, "loss": 0.6813, "step": 410 }, { "epoch": 0.07, "grad_norm": 1.1984920313194274, "learning_rate": 9.965865594898608e-06, "loss": 0.6984, "step": 411 }, { "epoch": 0.07, "grad_norm": 1.243301829001436, "learning_rate": 9.965560494266475e-06, "loss": 0.757, "step": 412 }, { "epoch": 0.07, "grad_norm": 1.209554261657944, "learning_rate": 9.965254040878786e-06, "loss": 0.6858, "step": 413 }, { "epoch": 0.07, "grad_norm": 1.2837342854838043, "learning_rate": 9.964946234819028e-06, "loss": 0.7561, "step": 414 }, { "epoch": 0.07, "grad_norm": 1.309896257034652, "learning_rate": 9.964637076171056e-06, "loss": 0.7213, "step": 415 }, { "epoch": 0.07, "grad_norm": 1.2783130591264011, "learning_rate": 9.964326565019094e-06, "loss": 0.7492, "step": 416 }, { "epoch": 0.07, "grad_norm": 1.1383239570314165, "learning_rate": 9.964014701447733e-06, "loss": 0.6721, "step": 417 }, { "epoch": 0.07, "grad_norm": 0.6854907832660053, "learning_rate": 9.963701485541935e-06, "loss": 0.5149, "step": 418 }, { "epoch": 0.07, "grad_norm": 1.310379703447458, "learning_rate": 9.963386917387025e-06, "loss": 0.7655, "step": 419 }, { "epoch": 0.07, "grad_norm": 1.2287297265272992, "learning_rate": 9.963070997068702e-06, "loss": 0.7296, "step": 420 }, { "epoch": 0.07, "grad_norm": 1.0863892673254076, "learning_rate": 9.962753724673033e-06, "loss": 0.6211, "step": 421 }, { "epoch": 0.07, "grad_norm": 1.3027314630879676, "learning_rate": 9.96243510028645e-06, "loss": 0.7283, "step": 422 }, { "epoch": 0.07, "grad_norm": 1.2465231453810837, "learning_rate": 9.962115123995754e-06, "loss": 0.6911, "step": 423 }, { "epoch": 0.07, "grad_norm": 1.2753288352990937, "learning_rate": 9.961793795888118e-06, "loss": 0.6842, "step": 424 }, { "epoch": 0.07, "grad_norm": 1.2244577773415144, "learning_rate": 9.96147111605108e-06, "loss": 0.703, "step": 425 }, { "epoch": 0.07, "grad_norm": 1.0853815444012955, "learning_rate": 9.961147084572544e-06, "loss": 0.6685, "step": 426 }, { "epoch": 0.07, "grad_norm": 1.0980962163485015, "learning_rate": 9.96082170154079e-06, "loss": 0.7364, "step": 427 }, { "epoch": 0.07, "grad_norm": 1.1064609370198906, "learning_rate": 9.960494967044457e-06, "loss": 0.6961, "step": 428 }, { "epoch": 0.07, "grad_norm": 1.0993942272329889, "learning_rate": 9.960166881172558e-06, "loss": 0.6033, "step": 429 }, { "epoch": 0.07, "grad_norm": 1.1658126062639649, "learning_rate": 9.959837444014473e-06, "loss": 0.6522, "step": 430 }, { "epoch": 0.07, "grad_norm": 1.2436564638694596, "learning_rate": 9.95950665565995e-06, "loss": 0.6781, "step": 431 }, { "epoch": 0.07, "grad_norm": 1.267446097500326, "learning_rate": 9.959174516199105e-06, "loss": 0.6473, "step": 432 }, { "epoch": 0.07, "grad_norm": 1.3963474814024208, "learning_rate": 9.95884102572242e-06, "loss": 0.7078, "step": 433 }, { "epoch": 0.07, "grad_norm": 1.1411297131165479, "learning_rate": 9.958506184320749e-06, "loss": 0.6852, "step": 434 }, { "epoch": 0.07, "grad_norm": 1.1953110134876261, "learning_rate": 9.95816999208531e-06, "loss": 0.6917, "step": 435 }, { "epoch": 0.07, "grad_norm": 0.6740677493231655, "learning_rate": 9.957832449107694e-06, "loss": 0.5036, "step": 436 }, { "epoch": 0.07, "grad_norm": 1.0962127480634218, "learning_rate": 9.957493555479856e-06, "loss": 0.6529, "step": 437 }, { "epoch": 0.07, "grad_norm": 1.2142417208917269, "learning_rate": 9.957153311294119e-06, "loss": 0.7142, "step": 438 }, { "epoch": 0.07, "grad_norm": 1.1695545476592806, "learning_rate": 9.956811716643173e-06, "loss": 0.6728, "step": 439 }, { "epoch": 0.07, "grad_norm": 0.6503216423736332, "learning_rate": 9.956468771620082e-06, "loss": 0.4809, "step": 440 }, { "epoch": 0.07, "grad_norm": 1.364580037944365, "learning_rate": 9.956124476318271e-06, "loss": 0.6878, "step": 441 }, { "epoch": 0.07, "grad_norm": 1.1137815627733518, "learning_rate": 9.955778830831537e-06, "loss": 0.6228, "step": 442 }, { "epoch": 0.07, "grad_norm": 1.1886410527781188, "learning_rate": 9.955431835254044e-06, "loss": 0.6655, "step": 443 }, { "epoch": 0.07, "grad_norm": 1.1781882385729894, "learning_rate": 9.95508348968032e-06, "loss": 0.7544, "step": 444 }, { "epoch": 0.07, "grad_norm": 1.2434011269259, "learning_rate": 9.954733794205264e-06, "loss": 0.6882, "step": 445 }, { "epoch": 0.07, "grad_norm": 1.0866981248051866, "learning_rate": 9.954382748924148e-06, "loss": 0.617, "step": 446 }, { "epoch": 0.07, "grad_norm": 1.288228778203744, "learning_rate": 9.9540303539326e-06, "loss": 0.7296, "step": 447 }, { "epoch": 0.07, "grad_norm": 1.2637737687522859, "learning_rate": 9.953676609326627e-06, "loss": 0.6472, "step": 448 }, { "epoch": 0.07, "grad_norm": 1.1264429975974244, "learning_rate": 9.953321515202597e-06, "loss": 0.6694, "step": 449 }, { "epoch": 0.07, "grad_norm": 1.2260658329930987, "learning_rate": 9.952965071657244e-06, "loss": 0.7322, "step": 450 }, { "epoch": 0.07, "grad_norm": 1.1341017298554932, "learning_rate": 9.952607278787679e-06, "loss": 0.6341, "step": 451 }, { "epoch": 0.07, "grad_norm": 1.1631348298519713, "learning_rate": 9.95224813669137e-06, "loss": 0.6119, "step": 452 }, { "epoch": 0.07, "grad_norm": 1.2072017277737226, "learning_rate": 9.95188764546616e-06, "loss": 0.6689, "step": 453 }, { "epoch": 0.07, "grad_norm": 1.3220061396349503, "learning_rate": 9.951525805210256e-06, "loss": 0.7461, "step": 454 }, { "epoch": 0.07, "grad_norm": 1.2753798221016264, "learning_rate": 9.951162616022234e-06, "loss": 0.7558, "step": 455 }, { "epoch": 0.07, "grad_norm": 1.142408973872296, "learning_rate": 9.950798078001034e-06, "loss": 0.7406, "step": 456 }, { "epoch": 0.07, "grad_norm": 1.291617994771683, "learning_rate": 9.950432191245968e-06, "loss": 0.7248, "step": 457 }, { "epoch": 0.07, "grad_norm": 1.0961719187655317, "learning_rate": 9.950064955856716e-06, "loss": 0.691, "step": 458 }, { "epoch": 0.07, "grad_norm": 1.1987380921034372, "learning_rate": 9.949696371933319e-06, "loss": 0.6827, "step": 459 }, { "epoch": 0.07, "grad_norm": 1.1134318087147919, "learning_rate": 9.94932643957619e-06, "loss": 0.6073, "step": 460 }, { "epoch": 0.07, "grad_norm": 1.1559799439068397, "learning_rate": 9.948955158886113e-06, "loss": 0.6522, "step": 461 }, { "epoch": 0.07, "grad_norm": 1.1394586289731738, "learning_rate": 9.94858252996423e-06, "loss": 0.6269, "step": 462 }, { "epoch": 0.07, "grad_norm": 1.2002732674749657, "learning_rate": 9.948208552912057e-06, "loss": 0.5459, "step": 463 }, { "epoch": 0.07, "grad_norm": 1.1300957542651615, "learning_rate": 9.947833227831477e-06, "loss": 0.6821, "step": 464 }, { "epoch": 0.07, "grad_norm": 1.1983979266121416, "learning_rate": 9.947456554824736e-06, "loss": 0.6699, "step": 465 }, { "epoch": 0.08, "grad_norm": 1.100936896472829, "learning_rate": 9.947078533994454e-06, "loss": 0.6156, "step": 466 }, { "epoch": 0.08, "grad_norm": 1.2878974988182825, "learning_rate": 9.94669916544361e-06, "loss": 0.7358, "step": 467 }, { "epoch": 0.08, "grad_norm": 1.1367661059609175, "learning_rate": 9.94631844927556e-06, "loss": 0.6686, "step": 468 }, { "epoch": 0.08, "grad_norm": 1.1383793253055459, "learning_rate": 9.945936385594017e-06, "loss": 0.6506, "step": 469 }, { "epoch": 0.08, "grad_norm": 1.1854037914673214, "learning_rate": 9.945552974503065e-06, "loss": 0.7272, "step": 470 }, { "epoch": 0.08, "grad_norm": 1.2735080801907124, "learning_rate": 9.94516821610716e-06, "loss": 0.7674, "step": 471 }, { "epoch": 0.08, "grad_norm": 1.1734637162830088, "learning_rate": 9.944782110511119e-06, "loss": 0.6907, "step": 472 }, { "epoch": 0.08, "grad_norm": 1.3135960191278515, "learning_rate": 9.944394657820127e-06, "loss": 0.6557, "step": 473 }, { "epoch": 0.08, "grad_norm": 1.1441923990139509, "learning_rate": 9.944005858139737e-06, "loss": 0.6253, "step": 474 }, { "epoch": 0.08, "grad_norm": 1.1493882934214332, "learning_rate": 9.943615711575867e-06, "loss": 0.7193, "step": 475 }, { "epoch": 0.08, "grad_norm": 1.2282679978937754, "learning_rate": 9.943224218234809e-06, "loss": 0.6816, "step": 476 }, { "epoch": 0.08, "grad_norm": 0.7409637388717382, "learning_rate": 9.94283137822321e-06, "loss": 0.4966, "step": 477 }, { "epoch": 0.08, "grad_norm": 1.2195467870431518, "learning_rate": 9.942437191648095e-06, "loss": 0.7199, "step": 478 }, { "epoch": 0.08, "grad_norm": 1.1923691452498928, "learning_rate": 9.94204165861685e-06, "loss": 0.6553, "step": 479 }, { "epoch": 0.08, "grad_norm": 1.1411051632101619, "learning_rate": 9.94164477923723e-06, "loss": 0.6784, "step": 480 }, { "epoch": 0.08, "grad_norm": 1.1287377055137306, "learning_rate": 9.941246553617353e-06, "loss": 0.6421, "step": 481 }, { "epoch": 0.08, "grad_norm": 1.1636418813669103, "learning_rate": 9.940846981865709e-06, "loss": 0.6576, "step": 482 }, { "epoch": 0.08, "grad_norm": 1.1541497177807003, "learning_rate": 9.940446064091151e-06, "loss": 0.7463, "step": 483 }, { "epoch": 0.08, "grad_norm": 1.2150527017028636, "learning_rate": 9.940043800402903e-06, "loss": 0.7286, "step": 484 }, { "epoch": 0.08, "grad_norm": 1.1972714684104622, "learning_rate": 9.939640190910552e-06, "loss": 0.7284, "step": 485 }, { "epoch": 0.08, "grad_norm": 1.2537624272529804, "learning_rate": 9.939235235724047e-06, "loss": 0.7836, "step": 486 }, { "epoch": 0.08, "grad_norm": 1.1995277117548084, "learning_rate": 9.938828934953716e-06, "loss": 0.6392, "step": 487 }, { "epoch": 0.08, "grad_norm": 1.2421249029696795, "learning_rate": 9.938421288710242e-06, "loss": 0.7847, "step": 488 }, { "epoch": 0.08, "grad_norm": 1.1778886576856036, "learning_rate": 9.93801229710468e-06, "loss": 0.683, "step": 489 }, { "epoch": 0.08, "grad_norm": 0.6557273888295984, "learning_rate": 9.937601960248452e-06, "loss": 0.4832, "step": 490 }, { "epoch": 0.08, "grad_norm": 1.298117776637159, "learning_rate": 9.937190278253345e-06, "loss": 0.6869, "step": 491 }, { "epoch": 0.08, "grad_norm": 1.1295745225258655, "learning_rate": 9.93677725123151e-06, "loss": 0.6866, "step": 492 }, { "epoch": 0.08, "grad_norm": 1.0762405120703018, "learning_rate": 9.936362879295471e-06, "loss": 0.6419, "step": 493 }, { "epoch": 0.08, "grad_norm": 1.1662411308757306, "learning_rate": 9.93594716255811e-06, "loss": 0.7446, "step": 494 }, { "epoch": 0.08, "grad_norm": 1.2022500818632824, "learning_rate": 9.935530101132683e-06, "loss": 0.7342, "step": 495 }, { "epoch": 0.08, "grad_norm": 1.1660846634723887, "learning_rate": 9.935111695132807e-06, "loss": 0.6947, "step": 496 }, { "epoch": 0.08, "grad_norm": 1.1146215157244876, "learning_rate": 9.934691944672468e-06, "loss": 0.5636, "step": 497 }, { "epoch": 0.08, "grad_norm": 1.149770816378899, "learning_rate": 9.934270849866019e-06, "loss": 0.6526, "step": 498 }, { "epoch": 0.08, "grad_norm": 1.1777414548334122, "learning_rate": 9.933848410828175e-06, "loss": 0.6979, "step": 499 }, { "epoch": 0.08, "grad_norm": 1.2165433436804116, "learning_rate": 9.933424627674022e-06, "loss": 0.6992, "step": 500 }, { "epoch": 0.08, "grad_norm": 1.1072268965384175, "learning_rate": 9.932999500519011e-06, "loss": 0.6281, "step": 501 }, { "epoch": 0.08, "grad_norm": 1.1334663035461165, "learning_rate": 9.932573029478956e-06, "loss": 0.7025, "step": 502 }, { "epoch": 0.08, "grad_norm": 1.0774366164520455, "learning_rate": 9.932145214670042e-06, "loss": 0.6295, "step": 503 }, { "epoch": 0.08, "grad_norm": 1.110742520894135, "learning_rate": 9.931716056208814e-06, "loss": 0.7078, "step": 504 }, { "epoch": 0.08, "grad_norm": 1.2138506637232074, "learning_rate": 9.931285554212189e-06, "loss": 0.621, "step": 505 }, { "epoch": 0.08, "grad_norm": 1.1057974112865008, "learning_rate": 9.930853708797448e-06, "loss": 0.727, "step": 506 }, { "epoch": 0.08, "grad_norm": 1.2747805720596324, "learning_rate": 9.930420520082238e-06, "loss": 0.6075, "step": 507 }, { "epoch": 0.08, "grad_norm": 1.197063673279613, "learning_rate": 9.929985988184568e-06, "loss": 0.686, "step": 508 }, { "epoch": 0.08, "grad_norm": 1.3100121171197556, "learning_rate": 9.929550113222822e-06, "loss": 0.7487, "step": 509 }, { "epoch": 0.08, "grad_norm": 1.179954704029963, "learning_rate": 9.929112895315739e-06, "loss": 0.6439, "step": 510 }, { "epoch": 0.08, "grad_norm": 1.2251553338649839, "learning_rate": 9.928674334582431e-06, "loss": 0.7287, "step": 511 }, { "epoch": 0.08, "grad_norm": 1.232762139542434, "learning_rate": 9.928234431142376e-06, "loss": 0.6233, "step": 512 }, { "epoch": 0.08, "grad_norm": 1.2487549666358873, "learning_rate": 9.927793185115416e-06, "loss": 0.6658, "step": 513 }, { "epoch": 0.08, "grad_norm": 0.6951278285076156, "learning_rate": 9.927350596621753e-06, "loss": 0.532, "step": 514 }, { "epoch": 0.08, "grad_norm": 1.241359735962445, "learning_rate": 9.926906665781967e-06, "loss": 0.7226, "step": 515 }, { "epoch": 0.08, "grad_norm": 1.184216563449497, "learning_rate": 9.926461392716993e-06, "loss": 0.6841, "step": 516 }, { "epoch": 0.08, "grad_norm": 1.4427849618152664, "learning_rate": 9.926014777548139e-06, "loss": 0.7274, "step": 517 }, { "epoch": 0.08, "grad_norm": 1.2011617465505329, "learning_rate": 9.92556682039707e-06, "loss": 0.6985, "step": 518 }, { "epoch": 0.08, "grad_norm": 1.0721598703006259, "learning_rate": 9.925117521385827e-06, "loss": 0.6968, "step": 519 }, { "epoch": 0.08, "grad_norm": 1.1682133059523674, "learning_rate": 9.92466688063681e-06, "loss": 0.6654, "step": 520 }, { "epoch": 0.08, "grad_norm": 0.7037391719999192, "learning_rate": 9.924214898272783e-06, "loss": 0.4915, "step": 521 }, { "epoch": 0.08, "grad_norm": 1.2786868587542795, "learning_rate": 9.923761574416884e-06, "loss": 0.7331, "step": 522 }, { "epoch": 0.08, "grad_norm": 1.1865216965166312, "learning_rate": 9.923306909192608e-06, "loss": 0.6434, "step": 523 }, { "epoch": 0.08, "grad_norm": 0.6472122413131611, "learning_rate": 9.922850902723815e-06, "loss": 0.4969, "step": 524 }, { "epoch": 0.08, "grad_norm": 1.1423892338495318, "learning_rate": 9.922393555134739e-06, "loss": 0.6739, "step": 525 }, { "epoch": 0.08, "grad_norm": 1.1281429287655047, "learning_rate": 9.92193486654997e-06, "loss": 0.6577, "step": 526 }, { "epoch": 0.08, "grad_norm": 1.130769436545356, "learning_rate": 9.921474837094473e-06, "loss": 0.7031, "step": 527 }, { "epoch": 0.09, "grad_norm": 1.1053623018996277, "learning_rate": 9.921013466893567e-06, "loss": 0.6365, "step": 528 }, { "epoch": 0.09, "grad_norm": 1.121818177065228, "learning_rate": 9.920550756072945e-06, "loss": 0.6616, "step": 529 }, { "epoch": 0.09, "grad_norm": 1.1450067956094951, "learning_rate": 9.92008670475866e-06, "loss": 0.6778, "step": 530 }, { "epoch": 0.09, "grad_norm": 1.1033360143415212, "learning_rate": 9.919621313077135e-06, "loss": 0.701, "step": 531 }, { "epoch": 0.09, "grad_norm": 1.058289274485389, "learning_rate": 9.919154581155152e-06, "loss": 0.641, "step": 532 }, { "epoch": 0.09, "grad_norm": 1.1889313501506664, "learning_rate": 9.918686509119867e-06, "loss": 0.7124, "step": 533 }, { "epoch": 0.09, "grad_norm": 1.2630487075319436, "learning_rate": 9.91821709709879e-06, "loss": 0.7433, "step": 534 }, { "epoch": 0.09, "grad_norm": 1.219072434854954, "learning_rate": 9.917746345219805e-06, "loss": 0.7342, "step": 535 }, { "epoch": 0.09, "grad_norm": 1.1040858565196574, "learning_rate": 9.917274253611156e-06, "loss": 0.6516, "step": 536 }, { "epoch": 0.09, "grad_norm": 1.2105967303379057, "learning_rate": 9.916800822401457e-06, "loss": 0.6421, "step": 537 }, { "epoch": 0.09, "grad_norm": 1.1694288730726754, "learning_rate": 9.91632605171968e-06, "loss": 0.7354, "step": 538 }, { "epoch": 0.09, "grad_norm": 1.1333568245587693, "learning_rate": 9.915849941695167e-06, "loss": 0.6313, "step": 539 }, { "epoch": 0.09, "grad_norm": 1.184053852232108, "learning_rate": 9.915372492457624e-06, "loss": 0.7035, "step": 540 }, { "epoch": 0.09, "grad_norm": 1.084857145915072, "learning_rate": 9.914893704137123e-06, "loss": 0.565, "step": 541 }, { "epoch": 0.09, "grad_norm": 1.2339695853118111, "learning_rate": 9.914413576864094e-06, "loss": 0.6579, "step": 542 }, { "epoch": 0.09, "grad_norm": 1.1632436425485746, "learning_rate": 9.913932110769342e-06, "loss": 0.6434, "step": 543 }, { "epoch": 0.09, "grad_norm": 1.1630065352792258, "learning_rate": 9.91344930598403e-06, "loss": 0.6181, "step": 544 }, { "epoch": 0.09, "grad_norm": 1.3410053202359082, "learning_rate": 9.912965162639686e-06, "loss": 0.7359, "step": 545 }, { "epoch": 0.09, "grad_norm": 1.1338222943771186, "learning_rate": 9.912479680868205e-06, "loss": 0.7335, "step": 546 }, { "epoch": 0.09, "grad_norm": 1.1873556496658888, "learning_rate": 9.911992860801846e-06, "loss": 0.6602, "step": 547 }, { "epoch": 0.09, "grad_norm": 1.228740007792514, "learning_rate": 9.911504702573232e-06, "loss": 0.7021, "step": 548 }, { "epoch": 0.09, "grad_norm": 1.094285422410936, "learning_rate": 9.91101520631535e-06, "loss": 0.5907, "step": 549 }, { "epoch": 0.09, "grad_norm": 1.1994402125495436, "learning_rate": 9.910524372161554e-06, "loss": 0.6775, "step": 550 }, { "epoch": 0.09, "grad_norm": 1.1610362258753477, "learning_rate": 9.910032200245559e-06, "loss": 0.6217, "step": 551 }, { "epoch": 0.09, "grad_norm": 1.2273931029446528, "learning_rate": 9.909538690701448e-06, "loss": 0.7312, "step": 552 }, { "epoch": 0.09, "grad_norm": 1.2844182559917314, "learning_rate": 9.909043843663664e-06, "loss": 0.6205, "step": 553 }, { "epoch": 0.09, "grad_norm": 1.4034698382779167, "learning_rate": 9.908547659267019e-06, "loss": 0.7232, "step": 554 }, { "epoch": 0.09, "grad_norm": 1.1709127331685032, "learning_rate": 9.908050137646685e-06, "loss": 0.6718, "step": 555 }, { "epoch": 0.09, "grad_norm": 1.063061417451581, "learning_rate": 9.907551278938203e-06, "loss": 0.6176, "step": 556 }, { "epoch": 0.09, "grad_norm": 1.1747705335839875, "learning_rate": 9.907051083277477e-06, "loss": 0.7033, "step": 557 }, { "epoch": 0.09, "grad_norm": 1.175444179989701, "learning_rate": 9.90654955080077e-06, "loss": 0.6516, "step": 558 }, { "epoch": 0.09, "grad_norm": 1.2316346472906745, "learning_rate": 9.906046681644717e-06, "loss": 0.7115, "step": 559 }, { "epoch": 0.09, "grad_norm": 1.1633707899949712, "learning_rate": 9.905542475946312e-06, "loss": 0.6469, "step": 560 }, { "epoch": 0.09, "grad_norm": 1.146568495624561, "learning_rate": 9.905036933842914e-06, "loss": 0.592, "step": 561 }, { "epoch": 0.09, "grad_norm": 1.3077577238127382, "learning_rate": 9.904530055472246e-06, "loss": 0.687, "step": 562 }, { "epoch": 0.09, "grad_norm": 1.2358972052507384, "learning_rate": 9.904021840972397e-06, "loss": 0.7368, "step": 563 }, { "epoch": 0.09, "grad_norm": 1.3106980093867937, "learning_rate": 9.90351229048182e-06, "loss": 0.7422, "step": 564 }, { "epoch": 0.09, "grad_norm": 1.1566044692963313, "learning_rate": 9.903001404139329e-06, "loss": 0.6838, "step": 565 }, { "epoch": 0.09, "grad_norm": 1.1644943085229642, "learning_rate": 9.902489182084101e-06, "loss": 0.7138, "step": 566 }, { "epoch": 0.09, "grad_norm": 1.1741870624884594, "learning_rate": 9.901975624455685e-06, "loss": 0.7217, "step": 567 }, { "epoch": 0.09, "grad_norm": 1.0519172418917817, "learning_rate": 9.901460731393984e-06, "loss": 0.5827, "step": 568 }, { "epoch": 0.09, "grad_norm": 1.1218434577276994, "learning_rate": 9.90094450303927e-06, "loss": 0.6013, "step": 569 }, { "epoch": 0.09, "grad_norm": 1.177373214804677, "learning_rate": 9.900426939532178e-06, "loss": 0.6276, "step": 570 }, { "epoch": 0.09, "grad_norm": 1.2286618811684675, "learning_rate": 9.899908041013706e-06, "loss": 0.7373, "step": 571 }, { "epoch": 0.09, "grad_norm": 1.2366287760338548, "learning_rate": 9.899387807625217e-06, "loss": 0.6714, "step": 572 }, { "epoch": 0.09, "grad_norm": 1.3026809975438984, "learning_rate": 9.898866239508437e-06, "loss": 0.7736, "step": 573 }, { "epoch": 0.09, "grad_norm": 1.1520816039763897, "learning_rate": 9.898343336805456e-06, "loss": 0.716, "step": 574 }, { "epoch": 0.09, "grad_norm": 1.0900733072829238, "learning_rate": 9.897819099658726e-06, "loss": 0.6415, "step": 575 }, { "epoch": 0.09, "grad_norm": 1.218270924805952, "learning_rate": 9.897293528211062e-06, "loss": 0.6468, "step": 576 }, { "epoch": 0.09, "grad_norm": 1.0200023789201833, "learning_rate": 9.896766622605649e-06, "loss": 0.6872, "step": 577 }, { "epoch": 0.09, "grad_norm": 1.1136163288424725, "learning_rate": 9.896238382986028e-06, "loss": 0.7393, "step": 578 }, { "epoch": 0.09, "grad_norm": 1.236151668794328, "learning_rate": 9.895708809496106e-06, "loss": 0.6547, "step": 579 }, { "epoch": 0.09, "grad_norm": 0.8549241049806982, "learning_rate": 9.895177902280151e-06, "loss": 0.5048, "step": 580 }, { "epoch": 0.09, "grad_norm": 1.1784929771722537, "learning_rate": 9.8946456614828e-06, "loss": 0.691, "step": 581 }, { "epoch": 0.09, "grad_norm": 1.1972402589076063, "learning_rate": 9.894112087249049e-06, "loss": 0.7096, "step": 582 }, { "epoch": 0.09, "grad_norm": 1.233755968771153, "learning_rate": 9.893577179724259e-06, "loss": 0.6047, "step": 583 }, { "epoch": 0.09, "grad_norm": 1.2058099789911745, "learning_rate": 9.893040939054152e-06, "loss": 0.6348, "step": 584 }, { "epoch": 0.09, "grad_norm": 1.1894890387326191, "learning_rate": 9.892503365384818e-06, "loss": 0.6789, "step": 585 }, { "epoch": 0.09, "grad_norm": 1.273559098118028, "learning_rate": 9.891964458862702e-06, "loss": 0.7233, "step": 586 }, { "epoch": 0.09, "grad_norm": 1.1425957486655585, "learning_rate": 9.891424219634622e-06, "loss": 0.6412, "step": 587 }, { "epoch": 0.09, "grad_norm": 1.0745400895394557, "learning_rate": 9.89088264784775e-06, "loss": 0.5305, "step": 588 }, { "epoch": 0.09, "grad_norm": 0.8742768135738936, "learning_rate": 9.890339743649628e-06, "loss": 0.5204, "step": 589 }, { "epoch": 0.1, "grad_norm": 1.1540515846346784, "learning_rate": 9.889795507188156e-06, "loss": 0.6956, "step": 590 }, { "epoch": 0.1, "grad_norm": 1.0939616122277867, "learning_rate": 9.889249938611599e-06, "loss": 0.688, "step": 591 }, { "epoch": 0.1, "grad_norm": 0.6874176432520733, "learning_rate": 9.888703038068587e-06, "loss": 0.4784, "step": 592 }, { "epoch": 0.1, "grad_norm": 1.0524315957917807, "learning_rate": 9.88815480570811e-06, "loss": 0.6232, "step": 593 }, { "epoch": 0.1, "grad_norm": 1.164761362071935, "learning_rate": 9.887605241679518e-06, "loss": 0.7593, "step": 594 }, { "epoch": 0.1, "grad_norm": 1.2079202270070422, "learning_rate": 9.887054346132534e-06, "loss": 0.7912, "step": 595 }, { "epoch": 0.1, "grad_norm": 1.0698576196714158, "learning_rate": 9.886502119217232e-06, "loss": 0.6369, "step": 596 }, { "epoch": 0.1, "grad_norm": 1.2028278733223143, "learning_rate": 9.885948561084056e-06, "loss": 0.7513, "step": 597 }, { "epoch": 0.1, "grad_norm": 1.1577396220768945, "learning_rate": 9.885393671883811e-06, "loss": 0.5627, "step": 598 }, { "epoch": 0.1, "grad_norm": 1.0432835497165727, "learning_rate": 9.884837451767662e-06, "loss": 0.5861, "step": 599 }, { "epoch": 0.1, "grad_norm": 1.1810660416020078, "learning_rate": 9.88427990088714e-06, "loss": 0.7421, "step": 600 }, { "epoch": 0.1, "grad_norm": 0.9088559897317271, "learning_rate": 9.883721019394136e-06, "loss": 0.5154, "step": 601 }, { "epoch": 0.1, "grad_norm": 1.051312694435416, "learning_rate": 9.883160807440907e-06, "loss": 0.6475, "step": 602 }, { "epoch": 0.1, "grad_norm": 1.134419693715016, "learning_rate": 9.88259926518007e-06, "loss": 0.5974, "step": 603 }, { "epoch": 0.1, "grad_norm": 1.1596943851834276, "learning_rate": 9.882036392764602e-06, "loss": 0.627, "step": 604 }, { "epoch": 0.1, "grad_norm": 1.1108763034258309, "learning_rate": 9.881472190347846e-06, "loss": 0.6636, "step": 605 }, { "epoch": 0.1, "grad_norm": 1.1798135078930199, "learning_rate": 9.880906658083507e-06, "loss": 0.6754, "step": 606 }, { "epoch": 0.1, "grad_norm": 1.1627964836285178, "learning_rate": 9.880339796125653e-06, "loss": 0.6587, "step": 607 }, { "epoch": 0.1, "grad_norm": 1.2117043357056776, "learning_rate": 9.87977160462871e-06, "loss": 0.5128, "step": 608 }, { "epoch": 0.1, "grad_norm": 1.1341039479983468, "learning_rate": 9.87920208374747e-06, "loss": 0.6298, "step": 609 }, { "epoch": 0.1, "grad_norm": 1.1498543020546936, "learning_rate": 9.878631233637086e-06, "loss": 0.6556, "step": 610 }, { "epoch": 0.1, "grad_norm": 1.04641102779228, "learning_rate": 9.878059054453076e-06, "loss": 0.6047, "step": 611 }, { "epoch": 0.1, "grad_norm": 1.071126347340601, "learning_rate": 9.877485546351312e-06, "loss": 0.6383, "step": 612 }, { "epoch": 0.1, "grad_norm": 1.1753241808683828, "learning_rate": 9.876910709488038e-06, "loss": 0.6768, "step": 613 }, { "epoch": 0.1, "grad_norm": 1.2292926282467858, "learning_rate": 9.876334544019855e-06, "loss": 0.6672, "step": 614 }, { "epoch": 0.1, "grad_norm": 1.2153076807372112, "learning_rate": 9.875757050103722e-06, "loss": 0.6448, "step": 615 }, { "epoch": 0.1, "grad_norm": 1.067205415477389, "learning_rate": 9.87517822789697e-06, "loss": 0.5893, "step": 616 }, { "epoch": 0.1, "grad_norm": 1.0994707634423433, "learning_rate": 9.874598077557284e-06, "loss": 0.5902, "step": 617 }, { "epoch": 0.1, "grad_norm": 1.1401011224368014, "learning_rate": 9.874016599242711e-06, "loss": 0.603, "step": 618 }, { "epoch": 0.1, "grad_norm": 1.1003835302482134, "learning_rate": 9.873433793111665e-06, "loss": 0.5596, "step": 619 }, { "epoch": 0.1, "grad_norm": 1.2091003500915674, "learning_rate": 9.872849659322916e-06, "loss": 0.6302, "step": 620 }, { "epoch": 0.1, "grad_norm": 1.194189383129039, "learning_rate": 9.872264198035599e-06, "loss": 0.7359, "step": 621 }, { "epoch": 0.1, "grad_norm": 1.1882669257254042, "learning_rate": 9.871677409409209e-06, "loss": 0.7466, "step": 622 }, { "epoch": 0.1, "grad_norm": 1.048545848172781, "learning_rate": 9.871089293603604e-06, "loss": 0.675, "step": 623 }, { "epoch": 0.1, "grad_norm": 1.1772444243003835, "learning_rate": 9.870499850779003e-06, "loss": 0.6316, "step": 624 }, { "epoch": 0.1, "grad_norm": 1.0917820312479634, "learning_rate": 9.869909081095988e-06, "loss": 0.6267, "step": 625 }, { "epoch": 0.1, "grad_norm": 1.1056905288428078, "learning_rate": 9.8693169847155e-06, "loss": 0.6103, "step": 626 }, { "epoch": 0.1, "grad_norm": 1.1239072812755515, "learning_rate": 9.86872356179884e-06, "loss": 0.6771, "step": 627 }, { "epoch": 0.1, "grad_norm": 1.1603227876273414, "learning_rate": 9.868128812507675e-06, "loss": 0.5894, "step": 628 }, { "epoch": 0.1, "grad_norm": 0.9866016136427981, "learning_rate": 9.86753273700403e-06, "loss": 0.5816, "step": 629 }, { "epoch": 0.1, "grad_norm": 1.1409097107488921, "learning_rate": 9.866935335450293e-06, "loss": 0.7351, "step": 630 }, { "epoch": 0.1, "grad_norm": 1.0877914302948914, "learning_rate": 9.866336608009215e-06, "loss": 0.6099, "step": 631 }, { "epoch": 0.1, "grad_norm": 1.1477016855108522, "learning_rate": 9.865736554843904e-06, "loss": 0.7153, "step": 632 }, { "epoch": 0.1, "grad_norm": 1.2249509435579633, "learning_rate": 9.865135176117829e-06, "loss": 0.7152, "step": 633 }, { "epoch": 0.1, "grad_norm": 1.0584944801375353, "learning_rate": 9.864532471994823e-06, "loss": 0.5937, "step": 634 }, { "epoch": 0.1, "grad_norm": 1.0579573080148321, "learning_rate": 9.86392844263908e-06, "loss": 0.6522, "step": 635 }, { "epoch": 0.1, "grad_norm": 1.130868722794895, "learning_rate": 9.863323088215155e-06, "loss": 0.673, "step": 636 }, { "epoch": 0.1, "grad_norm": 1.1358829015765082, "learning_rate": 9.862716408887962e-06, "loss": 0.6714, "step": 637 }, { "epoch": 0.1, "grad_norm": 1.019101686143704, "learning_rate": 9.862108404822779e-06, "loss": 0.6388, "step": 638 }, { "epoch": 0.1, "grad_norm": 1.128230021030712, "learning_rate": 9.861499076185241e-06, "loss": 0.6507, "step": 639 }, { "epoch": 0.1, "grad_norm": 1.0917670954400573, "learning_rate": 9.860888423141346e-06, "loss": 0.6943, "step": 640 }, { "epoch": 0.1, "grad_norm": 1.1808613522959637, "learning_rate": 9.860276445857456e-06, "loss": 0.7114, "step": 641 }, { "epoch": 0.1, "grad_norm": 1.194345844631833, "learning_rate": 9.859663144500284e-06, "loss": 0.7589, "step": 642 }, { "epoch": 0.1, "grad_norm": 1.0992402143283686, "learning_rate": 9.859048519236915e-06, "loss": 0.5756, "step": 643 }, { "epoch": 0.1, "grad_norm": 1.295352848148064, "learning_rate": 9.85843257023479e-06, "loss": 0.6805, "step": 644 }, { "epoch": 0.1, "grad_norm": 1.1402459000316831, "learning_rate": 9.85781529766171e-06, "loss": 0.6345, "step": 645 }, { "epoch": 0.1, "grad_norm": 1.1519704840342817, "learning_rate": 9.857196701685836e-06, "loss": 0.7206, "step": 646 }, { "epoch": 0.1, "grad_norm": 1.0864437251552281, "learning_rate": 9.85657678247569e-06, "loss": 0.6296, "step": 647 }, { "epoch": 0.1, "grad_norm": 1.2173719600753716, "learning_rate": 9.855955540200157e-06, "loss": 0.7159, "step": 648 }, { "epoch": 0.1, "grad_norm": 1.1503016699662922, "learning_rate": 9.85533297502848e-06, "loss": 0.7001, "step": 649 }, { "epoch": 0.1, "grad_norm": 1.182056435634815, "learning_rate": 9.854709087130261e-06, "loss": 0.7288, "step": 650 }, { "epoch": 0.1, "grad_norm": 1.0603320690552864, "learning_rate": 9.854083876675466e-06, "loss": 0.6133, "step": 651 }, { "epoch": 0.11, "grad_norm": 1.2333423736364608, "learning_rate": 9.853457343834421e-06, "loss": 0.759, "step": 652 }, { "epoch": 0.11, "grad_norm": 0.7852678318821367, "learning_rate": 9.852829488777808e-06, "loss": 0.4909, "step": 653 }, { "epoch": 0.11, "grad_norm": 1.1790140422269264, "learning_rate": 9.852200311676675e-06, "loss": 0.6759, "step": 654 }, { "epoch": 0.11, "grad_norm": 1.2372068215621552, "learning_rate": 9.851569812702423e-06, "loss": 0.684, "step": 655 }, { "epoch": 0.11, "grad_norm": 1.0780591363053666, "learning_rate": 9.850937992026821e-06, "loss": 0.6337, "step": 656 }, { "epoch": 0.11, "grad_norm": 1.24874562719759, "learning_rate": 9.850304849821992e-06, "loss": 0.6613, "step": 657 }, { "epoch": 0.11, "grad_norm": 1.239132838668827, "learning_rate": 9.849670386260424e-06, "loss": 0.7061, "step": 658 }, { "epoch": 0.11, "grad_norm": 1.1465329065335992, "learning_rate": 9.84903460151496e-06, "loss": 0.6595, "step": 659 }, { "epoch": 0.11, "grad_norm": 1.1502214269235445, "learning_rate": 9.848397495758806e-06, "loss": 0.6677, "step": 660 }, { "epoch": 0.11, "grad_norm": 1.0893541842015229, "learning_rate": 9.847759069165528e-06, "loss": 0.6288, "step": 661 }, { "epoch": 0.11, "grad_norm": 1.0703048100044776, "learning_rate": 9.847119321909049e-06, "loss": 0.6834, "step": 662 }, { "epoch": 0.11, "grad_norm": 1.0830875806924167, "learning_rate": 9.846478254163657e-06, "loss": 0.6919, "step": 663 }, { "epoch": 0.11, "grad_norm": 1.1460320709487686, "learning_rate": 9.845835866103993e-06, "loss": 0.7148, "step": 664 }, { "epoch": 0.11, "grad_norm": 1.1177630643332157, "learning_rate": 9.845192157905063e-06, "loss": 0.6703, "step": 665 }, { "epoch": 0.11, "grad_norm": 1.1247522391186011, "learning_rate": 9.844547129742231e-06, "loss": 0.7139, "step": 666 }, { "epoch": 0.11, "grad_norm": 1.1841373416483318, "learning_rate": 9.84390078179122e-06, "loss": 0.6941, "step": 667 }, { "epoch": 0.11, "grad_norm": 1.1704535720211449, "learning_rate": 9.843253114228113e-06, "loss": 0.6625, "step": 668 }, { "epoch": 0.11, "grad_norm": 1.1612216617566975, "learning_rate": 9.842604127229353e-06, "loss": 0.6624, "step": 669 }, { "epoch": 0.11, "grad_norm": 1.1067298535057377, "learning_rate": 9.841953820971742e-06, "loss": 0.6945, "step": 670 }, { "epoch": 0.11, "grad_norm": 1.09345165677508, "learning_rate": 9.841302195632441e-06, "loss": 0.6591, "step": 671 }, { "epoch": 0.11, "grad_norm": 1.1259759464358314, "learning_rate": 9.84064925138897e-06, "loss": 0.6408, "step": 672 }, { "epoch": 0.11, "grad_norm": 1.0662586892915495, "learning_rate": 9.839994988419208e-06, "loss": 0.6321, "step": 673 }, { "epoch": 0.11, "grad_norm": 1.182905221340787, "learning_rate": 9.839339406901398e-06, "loss": 0.7055, "step": 674 }, { "epoch": 0.11, "grad_norm": 1.2768753281187153, "learning_rate": 9.838682507014138e-06, "loss": 0.7878, "step": 675 }, { "epoch": 0.11, "grad_norm": 0.7870188064085515, "learning_rate": 9.838024288936381e-06, "loss": 0.4757, "step": 676 }, { "epoch": 0.11, "grad_norm": 1.0550074565577388, "learning_rate": 9.837364752847447e-06, "loss": 0.6249, "step": 677 }, { "epoch": 0.11, "grad_norm": 1.15199744650406, "learning_rate": 9.836703898927012e-06, "loss": 0.7149, "step": 678 }, { "epoch": 0.11, "grad_norm": 1.2233988556140745, "learning_rate": 9.83604172735511e-06, "loss": 0.6194, "step": 679 }, { "epoch": 0.11, "grad_norm": 1.0621113126186494, "learning_rate": 9.835378238312136e-06, "loss": 0.6313, "step": 680 }, { "epoch": 0.11, "grad_norm": 1.091762318648744, "learning_rate": 9.834713431978842e-06, "loss": 0.5985, "step": 681 }, { "epoch": 0.11, "grad_norm": 0.7012729213887197, "learning_rate": 9.834047308536342e-06, "loss": 0.4795, "step": 682 }, { "epoch": 0.11, "grad_norm": 1.1695086984838274, "learning_rate": 9.8333798681661e-06, "loss": 0.666, "step": 683 }, { "epoch": 0.11, "grad_norm": 1.1379929483367186, "learning_rate": 9.83271111104995e-06, "loss": 0.6903, "step": 684 }, { "epoch": 0.11, "grad_norm": 1.1673677106963345, "learning_rate": 9.83204103737008e-06, "loss": 0.6772, "step": 685 }, { "epoch": 0.11, "grad_norm": 1.0782050856966119, "learning_rate": 9.831369647309033e-06, "loss": 0.7151, "step": 686 }, { "epoch": 0.11, "grad_norm": 1.0722181439698553, "learning_rate": 9.830696941049718e-06, "loss": 0.6869, "step": 687 }, { "epoch": 0.11, "grad_norm": 1.0771743670512253, "learning_rate": 9.830022918775399e-06, "loss": 0.6889, "step": 688 }, { "epoch": 0.11, "grad_norm": 1.104686317721014, "learning_rate": 9.829347580669694e-06, "loss": 0.6168, "step": 689 }, { "epoch": 0.11, "grad_norm": 1.0959546815793324, "learning_rate": 9.828670926916588e-06, "loss": 0.6831, "step": 690 }, { "epoch": 0.11, "grad_norm": 1.1304581160333063, "learning_rate": 9.827992957700417e-06, "loss": 0.6638, "step": 691 }, { "epoch": 0.11, "grad_norm": 1.163236390540185, "learning_rate": 9.82731367320588e-06, "loss": 0.6033, "step": 692 }, { "epoch": 0.11, "grad_norm": 1.1756678543760257, "learning_rate": 9.826633073618034e-06, "loss": 0.6585, "step": 693 }, { "epoch": 0.11, "grad_norm": 1.096836231345986, "learning_rate": 9.825951159122291e-06, "loss": 0.6613, "step": 694 }, { "epoch": 0.11, "grad_norm": 1.1354930581695504, "learning_rate": 9.825267929904423e-06, "loss": 0.6716, "step": 695 }, { "epoch": 0.11, "grad_norm": 1.092673470636174, "learning_rate": 9.824583386150563e-06, "loss": 0.6532, "step": 696 }, { "epoch": 0.11, "grad_norm": 1.1552783404769593, "learning_rate": 9.8238975280472e-06, "loss": 0.6029, "step": 697 }, { "epoch": 0.11, "grad_norm": 1.018465462176151, "learning_rate": 9.823210355781177e-06, "loss": 0.6109, "step": 698 }, { "epoch": 0.11, "grad_norm": 1.196860147792639, "learning_rate": 9.822521869539699e-06, "loss": 0.7328, "step": 699 }, { "epoch": 0.11, "grad_norm": 1.1127392945890429, "learning_rate": 9.821832069510333e-06, "loss": 0.7219, "step": 700 }, { "epoch": 0.11, "grad_norm": 1.1318025689002094, "learning_rate": 9.821140955880995e-06, "loss": 0.6095, "step": 701 }, { "epoch": 0.11, "grad_norm": 1.0724054236572238, "learning_rate": 9.820448528839965e-06, "loss": 0.6659, "step": 702 }, { "epoch": 0.11, "grad_norm": 1.1351209035612464, "learning_rate": 9.81975478857588e-06, "loss": 0.6478, "step": 703 }, { "epoch": 0.11, "grad_norm": 1.0787567327934542, "learning_rate": 9.819059735277731e-06, "loss": 0.6246, "step": 704 }, { "epoch": 0.11, "grad_norm": 1.0605840424997237, "learning_rate": 9.818363369134876e-06, "loss": 0.5663, "step": 705 }, { "epoch": 0.11, "grad_norm": 1.1255932353729787, "learning_rate": 9.817665690337017e-06, "loss": 0.6862, "step": 706 }, { "epoch": 0.11, "grad_norm": 1.1249921647757453, "learning_rate": 9.816966699074226e-06, "loss": 0.7629, "step": 707 }, { "epoch": 0.11, "grad_norm": 1.1550934119151437, "learning_rate": 9.816266395536924e-06, "loss": 0.6357, "step": 708 }, { "epoch": 0.11, "grad_norm": 1.1906576137877185, "learning_rate": 9.815564779915896e-06, "loss": 0.6904, "step": 709 }, { "epoch": 0.11, "grad_norm": 1.1579017686030542, "learning_rate": 9.81486185240228e-06, "loss": 0.6312, "step": 710 }, { "epoch": 0.11, "grad_norm": 1.1575139068449294, "learning_rate": 9.814157613187573e-06, "loss": 0.6666, "step": 711 }, { "epoch": 0.11, "grad_norm": 1.1139146494658285, "learning_rate": 9.81345206246363e-06, "loss": 0.7292, "step": 712 }, { "epoch": 0.11, "grad_norm": 1.2033822814357062, "learning_rate": 9.812745200422659e-06, "loss": 0.6855, "step": 713 }, { "epoch": 0.12, "grad_norm": 1.2527931288867737, "learning_rate": 9.812037027257234e-06, "loss": 0.65, "step": 714 }, { "epoch": 0.12, "grad_norm": 1.1196317449848967, "learning_rate": 9.811327543160276e-06, "loss": 0.6849, "step": 715 }, { "epoch": 0.12, "grad_norm": 1.0904044480536927, "learning_rate": 9.810616748325072e-06, "loss": 0.6313, "step": 716 }, { "epoch": 0.12, "grad_norm": 1.0372026342087108, "learning_rate": 9.809904642945261e-06, "loss": 0.537, "step": 717 }, { "epoch": 0.12, "grad_norm": 1.1888525468182856, "learning_rate": 9.809191227214838e-06, "loss": 0.6864, "step": 718 }, { "epoch": 0.12, "grad_norm": 1.0619869673768934, "learning_rate": 9.808476501328158e-06, "loss": 0.7105, "step": 719 }, { "epoch": 0.12, "grad_norm": 0.7241118451270379, "learning_rate": 9.807760465479934e-06, "loss": 0.4456, "step": 720 }, { "epoch": 0.12, "grad_norm": 1.2627579704941168, "learning_rate": 9.807043119865232e-06, "loss": 0.6565, "step": 721 }, { "epoch": 0.12, "grad_norm": 1.1371006135755681, "learning_rate": 9.806324464679477e-06, "loss": 0.665, "step": 722 }, { "epoch": 0.12, "grad_norm": 1.2864860733532468, "learning_rate": 9.80560450011845e-06, "loss": 0.7814, "step": 723 }, { "epoch": 0.12, "grad_norm": 1.0446563209593065, "learning_rate": 9.80488322637829e-06, "loss": 0.5876, "step": 724 }, { "epoch": 0.12, "grad_norm": 1.1552104668247112, "learning_rate": 9.804160643655494e-06, "loss": 0.6429, "step": 725 }, { "epoch": 0.12, "grad_norm": 1.2119777153951785, "learning_rate": 9.803436752146909e-06, "loss": 0.6365, "step": 726 }, { "epoch": 0.12, "grad_norm": 1.1257518650891913, "learning_rate": 9.802711552049746e-06, "loss": 0.6703, "step": 727 }, { "epoch": 0.12, "grad_norm": 1.101282230316314, "learning_rate": 9.801985043561569e-06, "loss": 0.6417, "step": 728 }, { "epoch": 0.12, "grad_norm": 1.0110835759810834, "learning_rate": 9.801257226880297e-06, "loss": 0.6924, "step": 729 }, { "epoch": 0.12, "grad_norm": 1.124466843713137, "learning_rate": 9.80052810220421e-06, "loss": 0.6736, "step": 730 }, { "epoch": 0.12, "grad_norm": 1.101597531285294, "learning_rate": 9.79979766973194e-06, "loss": 0.701, "step": 731 }, { "epoch": 0.12, "grad_norm": 1.077469693166165, "learning_rate": 9.799065929662478e-06, "loss": 0.6527, "step": 732 }, { "epoch": 0.12, "grad_norm": 1.1581554184086444, "learning_rate": 9.798332882195172e-06, "loss": 0.646, "step": 733 }, { "epoch": 0.12, "grad_norm": 1.1227707961734683, "learning_rate": 9.79759852752972e-06, "loss": 0.6983, "step": 734 }, { "epoch": 0.12, "grad_norm": 1.164062495658345, "learning_rate": 9.796862865866183e-06, "loss": 0.7205, "step": 735 }, { "epoch": 0.12, "grad_norm": 1.2178982921311714, "learning_rate": 9.796125897404973e-06, "loss": 0.589, "step": 736 }, { "epoch": 0.12, "grad_norm": 1.0160301244125352, "learning_rate": 9.795387622346866e-06, "loss": 0.6328, "step": 737 }, { "epoch": 0.12, "grad_norm": 1.0672582895160998, "learning_rate": 9.794648040892983e-06, "loss": 0.668, "step": 738 }, { "epoch": 0.12, "grad_norm": 1.2228926893775223, "learning_rate": 9.793907153244808e-06, "loss": 0.7207, "step": 739 }, { "epoch": 0.12, "grad_norm": 1.0820212768172814, "learning_rate": 9.79316495960418e-06, "loss": 0.5672, "step": 740 }, { "epoch": 0.12, "grad_norm": 1.1554934015585892, "learning_rate": 9.792421460173294e-06, "loss": 0.7083, "step": 741 }, { "epoch": 0.12, "grad_norm": 1.0952142194969918, "learning_rate": 9.791676655154696e-06, "loss": 0.6947, "step": 742 }, { "epoch": 0.12, "grad_norm": 0.7018228248119613, "learning_rate": 9.790930544751297e-06, "loss": 0.487, "step": 743 }, { "epoch": 0.12, "grad_norm": 1.0923356573550314, "learning_rate": 9.790183129166351e-06, "loss": 0.6142, "step": 744 }, { "epoch": 0.12, "grad_norm": 1.1200044821085462, "learning_rate": 9.78943440860348e-06, "loss": 0.702, "step": 745 }, { "epoch": 0.12, "grad_norm": 1.1654238353641524, "learning_rate": 9.788684383266655e-06, "loss": 0.6778, "step": 746 }, { "epoch": 0.12, "grad_norm": 1.1419804082456895, "learning_rate": 9.787933053360203e-06, "loss": 0.6974, "step": 747 }, { "epoch": 0.12, "grad_norm": 1.1063097025805682, "learning_rate": 9.78718041908881e-06, "loss": 0.624, "step": 748 }, { "epoch": 0.12, "grad_norm": 1.049643560220273, "learning_rate": 9.786426480657507e-06, "loss": 0.6779, "step": 749 }, { "epoch": 0.12, "grad_norm": 1.1054719518643539, "learning_rate": 9.785671238271694e-06, "loss": 0.5698, "step": 750 }, { "epoch": 0.12, "grad_norm": 1.1107476670906633, "learning_rate": 9.784914692137118e-06, "loss": 0.6779, "step": 751 }, { "epoch": 0.12, "grad_norm": 1.0825626936632151, "learning_rate": 9.784156842459882e-06, "loss": 0.6742, "step": 752 }, { "epoch": 0.12, "grad_norm": 1.1876552992394858, "learning_rate": 9.783397689446447e-06, "loss": 0.6925, "step": 753 }, { "epoch": 0.12, "grad_norm": 1.0935688099305556, "learning_rate": 9.782637233303624e-06, "loss": 0.6637, "step": 754 }, { "epoch": 0.12, "grad_norm": 1.1056973865756239, "learning_rate": 9.781875474238587e-06, "loss": 0.6508, "step": 755 }, { "epoch": 0.12, "grad_norm": 1.0251144668629986, "learning_rate": 9.781112412458858e-06, "loss": 0.681, "step": 756 }, { "epoch": 0.12, "grad_norm": 1.118401847631918, "learning_rate": 9.780348048172315e-06, "loss": 0.6291, "step": 757 }, { "epoch": 0.12, "grad_norm": 1.0856948322979647, "learning_rate": 9.779582381587192e-06, "loss": 0.6764, "step": 758 }, { "epoch": 0.12, "grad_norm": 1.1167775815106917, "learning_rate": 9.778815412912078e-06, "loss": 0.69, "step": 759 }, { "epoch": 0.12, "grad_norm": 1.1156245136316403, "learning_rate": 9.778047142355917e-06, "loss": 0.6555, "step": 760 }, { "epoch": 0.12, "grad_norm": 1.117224523287798, "learning_rate": 9.777277570128008e-06, "loss": 0.6188, "step": 761 }, { "epoch": 0.12, "grad_norm": 1.1400145150224619, "learning_rate": 9.776506696438002e-06, "loss": 0.6966, "step": 762 }, { "epoch": 0.12, "grad_norm": 1.1098635952580846, "learning_rate": 9.775734521495905e-06, "loss": 0.7047, "step": 763 }, { "epoch": 0.12, "grad_norm": 1.176647613060308, "learning_rate": 9.774961045512082e-06, "loss": 0.7205, "step": 764 }, { "epoch": 0.12, "grad_norm": 1.060312563904902, "learning_rate": 9.774186268697247e-06, "loss": 0.6392, "step": 765 }, { "epoch": 0.12, "grad_norm": 1.1332929406673735, "learning_rate": 9.773410191262471e-06, "loss": 0.704, "step": 766 }, { "epoch": 0.12, "grad_norm": 1.0878436519922638, "learning_rate": 9.772632813419181e-06, "loss": 0.6296, "step": 767 }, { "epoch": 0.12, "grad_norm": 1.1094079349670647, "learning_rate": 9.771854135379153e-06, "loss": 0.7008, "step": 768 }, { "epoch": 0.12, "grad_norm": 1.0867050871032815, "learning_rate": 9.771074157354521e-06, "loss": 0.6736, "step": 769 }, { "epoch": 0.12, "grad_norm": 1.108129505299658, "learning_rate": 9.770292879557774e-06, "loss": 0.6275, "step": 770 }, { "epoch": 0.12, "grad_norm": 1.1733838763765414, "learning_rate": 9.769510302201751e-06, "loss": 0.6377, "step": 771 }, { "epoch": 0.12, "grad_norm": 1.2078112756030042, "learning_rate": 9.76872642549965e-06, "loss": 0.7529, "step": 772 }, { "epoch": 0.12, "grad_norm": 1.188180707633336, "learning_rate": 9.767941249665022e-06, "loss": 0.7171, "step": 773 }, { "epoch": 0.12, "grad_norm": 1.015452664559184, "learning_rate": 9.767154774911767e-06, "loss": 0.5639, "step": 774 }, { "epoch": 0.12, "grad_norm": 0.9867899983879593, "learning_rate": 9.766367001454144e-06, "loss": 0.5951, "step": 775 }, { "epoch": 0.13, "grad_norm": 1.1090135612693237, "learning_rate": 9.765577929506764e-06, "loss": 0.7219, "step": 776 }, { "epoch": 0.13, "grad_norm": 1.0934918207561355, "learning_rate": 9.764787559284592e-06, "loss": 0.6733, "step": 777 }, { "epoch": 0.13, "grad_norm": 1.0662334974604029, "learning_rate": 9.763995891002946e-06, "loss": 0.6458, "step": 778 }, { "epoch": 0.13, "grad_norm": 1.0481399885582665, "learning_rate": 9.763202924877502e-06, "loss": 0.6507, "step": 779 }, { "epoch": 0.13, "grad_norm": 1.1235560146026446, "learning_rate": 9.762408661124279e-06, "loss": 0.6719, "step": 780 }, { "epoch": 0.13, "grad_norm": 1.1621910675287144, "learning_rate": 9.761613099959663e-06, "loss": 0.7111, "step": 781 }, { "epoch": 0.13, "grad_norm": 1.0343618062802882, "learning_rate": 9.760816241600383e-06, "loss": 0.57, "step": 782 }, { "epoch": 0.13, "grad_norm": 1.1602436667974612, "learning_rate": 9.760018086263525e-06, "loss": 0.6901, "step": 783 }, { "epoch": 0.13, "grad_norm": 1.0690032226897987, "learning_rate": 9.759218634166531e-06, "loss": 0.6748, "step": 784 }, { "epoch": 0.13, "grad_norm": 1.109745664602789, "learning_rate": 9.75841788552719e-06, "loss": 0.6241, "step": 785 }, { "epoch": 0.13, "grad_norm": 1.0897497460054035, "learning_rate": 9.757615840563654e-06, "loss": 0.682, "step": 786 }, { "epoch": 0.13, "grad_norm": 1.035094430965384, "learning_rate": 9.756812499494417e-06, "loss": 0.6437, "step": 787 }, { "epoch": 0.13, "grad_norm": 1.046623122788427, "learning_rate": 9.756007862538333e-06, "loss": 0.6591, "step": 788 }, { "epoch": 0.13, "grad_norm": 1.1518106250189126, "learning_rate": 9.755201929914607e-06, "loss": 0.7309, "step": 789 }, { "epoch": 0.13, "grad_norm": 1.1135561339059143, "learning_rate": 9.754394701842797e-06, "loss": 0.7424, "step": 790 }, { "epoch": 0.13, "grad_norm": 0.990885023960312, "learning_rate": 9.753586178542815e-06, "loss": 0.617, "step": 791 }, { "epoch": 0.13, "grad_norm": 1.2891519186594527, "learning_rate": 9.752776360234925e-06, "loss": 0.793, "step": 792 }, { "epoch": 0.13, "grad_norm": 1.0976370897847914, "learning_rate": 9.751965247139743e-06, "loss": 0.6584, "step": 793 }, { "epoch": 0.13, "grad_norm": 1.0980152434001413, "learning_rate": 9.751152839478238e-06, "loss": 0.6525, "step": 794 }, { "epoch": 0.13, "grad_norm": 1.1686544447687142, "learning_rate": 9.750339137471733e-06, "loss": 0.6586, "step": 795 }, { "epoch": 0.13, "grad_norm": 1.0804798707012149, "learning_rate": 9.749524141341905e-06, "loss": 0.673, "step": 796 }, { "epoch": 0.13, "grad_norm": 1.2306899490406018, "learning_rate": 9.74870785131078e-06, "loss": 0.68, "step": 797 }, { "epoch": 0.13, "grad_norm": 1.0870759004278514, "learning_rate": 9.747890267600735e-06, "loss": 0.7081, "step": 798 }, { "epoch": 0.13, "grad_norm": 1.027898436759447, "learning_rate": 9.747071390434507e-06, "loss": 0.6773, "step": 799 }, { "epoch": 0.13, "grad_norm": 1.1362753191845703, "learning_rate": 9.746251220035176e-06, "loss": 0.7214, "step": 800 }, { "epoch": 0.13, "grad_norm": 1.142417329753885, "learning_rate": 9.745429756626184e-06, "loss": 0.6716, "step": 801 }, { "epoch": 0.13, "grad_norm": 1.0901056393581727, "learning_rate": 9.744607000431317e-06, "loss": 0.6324, "step": 802 }, { "epoch": 0.13, "grad_norm": 1.1392228290670723, "learning_rate": 9.743782951674715e-06, "loss": 0.6286, "step": 803 }, { "epoch": 0.13, "grad_norm": 1.055072373051012, "learning_rate": 9.742957610580876e-06, "loss": 0.6666, "step": 804 }, { "epoch": 0.13, "grad_norm": 1.047155710841041, "learning_rate": 9.742130977374643e-06, "loss": 0.6228, "step": 805 }, { "epoch": 0.13, "grad_norm": 1.049592201301659, "learning_rate": 9.741303052281214e-06, "loss": 0.5766, "step": 806 }, { "epoch": 0.13, "grad_norm": 1.1677753500888257, "learning_rate": 9.740473835526139e-06, "loss": 0.7503, "step": 807 }, { "epoch": 0.13, "grad_norm": 1.055867029592767, "learning_rate": 9.73964332733532e-06, "loss": 0.6741, "step": 808 }, { "epoch": 0.13, "grad_norm": 1.1829879144892457, "learning_rate": 9.738811527935008e-06, "loss": 0.593, "step": 809 }, { "epoch": 0.13, "grad_norm": 1.1435573110805033, "learning_rate": 9.737978437551812e-06, "loss": 0.6499, "step": 810 }, { "epoch": 0.13, "grad_norm": 1.0980371455634672, "learning_rate": 9.737144056412685e-06, "loss": 0.5825, "step": 811 }, { "epoch": 0.13, "grad_norm": 1.060457506292837, "learning_rate": 9.736308384744935e-06, "loss": 0.6587, "step": 812 }, { "epoch": 0.13, "grad_norm": 1.0180825102620248, "learning_rate": 9.735471422776225e-06, "loss": 0.5964, "step": 813 }, { "epoch": 0.13, "grad_norm": 1.1984903285617077, "learning_rate": 9.734633170734568e-06, "loss": 0.7007, "step": 814 }, { "epoch": 0.13, "grad_norm": 1.0941518362757818, "learning_rate": 9.733793628848323e-06, "loss": 0.643, "step": 815 }, { "epoch": 0.13, "grad_norm": 1.1305095837908723, "learning_rate": 9.732952797346205e-06, "loss": 0.5793, "step": 816 }, { "epoch": 0.13, "grad_norm": 1.1076645657388142, "learning_rate": 9.73211067645728e-06, "loss": 0.6271, "step": 817 }, { "epoch": 0.13, "grad_norm": 1.226772057164279, "learning_rate": 9.731267266410967e-06, "loss": 0.6866, "step": 818 }, { "epoch": 0.13, "grad_norm": 1.034266121827543, "learning_rate": 9.73042256743703e-06, "loss": 0.6298, "step": 819 }, { "epoch": 0.13, "grad_norm": 1.0318928534724523, "learning_rate": 9.729576579765594e-06, "loss": 0.6544, "step": 820 }, { "epoch": 0.13, "grad_norm": 1.074490253360686, "learning_rate": 9.728729303627124e-06, "loss": 0.6491, "step": 821 }, { "epoch": 0.13, "grad_norm": 1.0867944566176206, "learning_rate": 9.727880739252444e-06, "loss": 0.6981, "step": 822 }, { "epoch": 0.13, "grad_norm": 1.109875300934573, "learning_rate": 9.727030886872724e-06, "loss": 0.6538, "step": 823 }, { "epoch": 0.13, "grad_norm": 1.1758615686974467, "learning_rate": 9.726179746719492e-06, "loss": 0.6619, "step": 824 }, { "epoch": 0.13, "grad_norm": 1.145086451533505, "learning_rate": 9.725327319024618e-06, "loss": 0.6703, "step": 825 }, { "epoch": 0.13, "grad_norm": 1.1070722263596582, "learning_rate": 9.724473604020327e-06, "loss": 0.6395, "step": 826 }, { "epoch": 0.13, "grad_norm": 1.1352621847649027, "learning_rate": 9.723618601939197e-06, "loss": 0.611, "step": 827 }, { "epoch": 0.13, "grad_norm": 1.0469384773305612, "learning_rate": 9.722762313014152e-06, "loss": 0.6155, "step": 828 }, { "epoch": 0.13, "grad_norm": 1.1717381655097072, "learning_rate": 9.721904737478468e-06, "loss": 0.6281, "step": 829 }, { "epoch": 0.13, "grad_norm": 1.1094618445886826, "learning_rate": 9.721045875565774e-06, "loss": 0.744, "step": 830 }, { "epoch": 0.13, "grad_norm": 1.0072172503069594, "learning_rate": 9.720185727510047e-06, "loss": 0.6605, "step": 831 }, { "epoch": 0.13, "grad_norm": 1.1824748151331537, "learning_rate": 9.719324293545615e-06, "loss": 0.6994, "step": 832 }, { "epoch": 0.13, "grad_norm": 1.0453950970818084, "learning_rate": 9.718461573907158e-06, "loss": 0.6481, "step": 833 }, { "epoch": 0.13, "grad_norm": 1.6403298070341439, "learning_rate": 9.717597568829702e-06, "loss": 0.6364, "step": 834 }, { "epoch": 0.13, "grad_norm": 1.1493262552461538, "learning_rate": 9.716732278548628e-06, "loss": 0.6513, "step": 835 }, { "epoch": 0.13, "grad_norm": 1.1579344302878785, "learning_rate": 9.715865703299663e-06, "loss": 0.6794, "step": 836 }, { "epoch": 0.13, "grad_norm": 1.2249903414880976, "learning_rate": 9.714997843318887e-06, "loss": 0.7409, "step": 837 }, { "epoch": 0.14, "grad_norm": 1.0881109905088606, "learning_rate": 9.714128698842728e-06, "loss": 0.5861, "step": 838 }, { "epoch": 0.14, "grad_norm": 1.2070736888264695, "learning_rate": 9.713258270107966e-06, "loss": 0.7161, "step": 839 }, { "epoch": 0.14, "grad_norm": 1.0847757512329015, "learning_rate": 9.71238655735173e-06, "loss": 0.5815, "step": 840 }, { "epoch": 0.14, "grad_norm": 1.1386957887593023, "learning_rate": 9.711513560811498e-06, "loss": 0.7045, "step": 841 }, { "epoch": 0.14, "grad_norm": 1.1059070091102732, "learning_rate": 9.710639280725097e-06, "loss": 0.6515, "step": 842 }, { "epoch": 0.14, "grad_norm": 0.9714775195343888, "learning_rate": 9.709763717330709e-06, "loss": 0.63, "step": 843 }, { "epoch": 0.14, "grad_norm": 1.113181883490431, "learning_rate": 9.708886870866856e-06, "loss": 0.6224, "step": 844 }, { "epoch": 0.14, "grad_norm": 1.035446031914055, "learning_rate": 9.708008741572422e-06, "loss": 0.6571, "step": 845 }, { "epoch": 0.14, "grad_norm": 1.1563565652996073, "learning_rate": 9.707129329686626e-06, "loss": 0.6679, "step": 846 }, { "epoch": 0.14, "grad_norm": 1.0976526167786826, "learning_rate": 9.706248635449048e-06, "loss": 0.6834, "step": 847 }, { "epoch": 0.14, "grad_norm": 1.1267220131330384, "learning_rate": 9.705366659099617e-06, "loss": 0.623, "step": 848 }, { "epoch": 0.14, "grad_norm": 1.171574879508651, "learning_rate": 9.704483400878602e-06, "loss": 0.6779, "step": 849 }, { "epoch": 0.14, "grad_norm": 1.0831137161215711, "learning_rate": 9.703598861026627e-06, "loss": 0.716, "step": 850 }, { "epoch": 0.14, "grad_norm": 1.1103037656427084, "learning_rate": 9.702713039784668e-06, "loss": 0.7062, "step": 851 }, { "epoch": 0.14, "grad_norm": 1.1627111593577728, "learning_rate": 9.701825937394045e-06, "loss": 0.7174, "step": 852 }, { "epoch": 0.14, "grad_norm": 0.827784484577726, "learning_rate": 9.700937554096432e-06, "loss": 0.4967, "step": 853 }, { "epoch": 0.14, "grad_norm": 0.7681586313814103, "learning_rate": 9.700047890133845e-06, "loss": 0.4805, "step": 854 }, { "epoch": 0.14, "grad_norm": 1.1642378919689045, "learning_rate": 9.699156945748657e-06, "loss": 0.7068, "step": 855 }, { "epoch": 0.14, "grad_norm": 1.0724223629447225, "learning_rate": 9.698264721183584e-06, "loss": 0.6721, "step": 856 }, { "epoch": 0.14, "grad_norm": 0.6884331745793695, "learning_rate": 9.69737121668169e-06, "loss": 0.4977, "step": 857 }, { "epoch": 0.14, "grad_norm": 1.056495437876798, "learning_rate": 9.696476432486395e-06, "loss": 0.644, "step": 858 }, { "epoch": 0.14, "grad_norm": 1.0891122881543713, "learning_rate": 9.695580368841462e-06, "loss": 0.6839, "step": 859 }, { "epoch": 0.14, "grad_norm": 1.0386168600187955, "learning_rate": 9.694683025991e-06, "loss": 0.5733, "step": 860 }, { "epoch": 0.14, "grad_norm": 1.1844881432130085, "learning_rate": 9.693784404179472e-06, "loss": 0.7426, "step": 861 }, { "epoch": 0.14, "grad_norm": 1.0289717955913023, "learning_rate": 9.692884503651687e-06, "loss": 0.6666, "step": 862 }, { "epoch": 0.14, "grad_norm": 1.0737368679428585, "learning_rate": 9.691983324652804e-06, "loss": 0.4978, "step": 863 }, { "epoch": 0.14, "grad_norm": 1.028322598339251, "learning_rate": 9.691080867428328e-06, "loss": 0.6368, "step": 864 }, { "epoch": 0.14, "grad_norm": 1.171478679657464, "learning_rate": 9.690177132224113e-06, "loss": 0.784, "step": 865 }, { "epoch": 0.14, "grad_norm": 1.1377306989337588, "learning_rate": 9.689272119286361e-06, "loss": 0.6456, "step": 866 }, { "epoch": 0.14, "grad_norm": 1.1727858762219754, "learning_rate": 9.688365828861625e-06, "loss": 0.7021, "step": 867 }, { "epoch": 0.14, "grad_norm": 1.1349317783807622, "learning_rate": 9.6874582611968e-06, "loss": 0.6557, "step": 868 }, { "epoch": 0.14, "grad_norm": 1.1713535969491309, "learning_rate": 9.686549416539135e-06, "loss": 0.7064, "step": 869 }, { "epoch": 0.14, "grad_norm": 0.8532548834613224, "learning_rate": 9.685639295136224e-06, "loss": 0.5009, "step": 870 }, { "epoch": 0.14, "grad_norm": 1.0712037023872192, "learning_rate": 9.684727897236008e-06, "loss": 0.6097, "step": 871 }, { "epoch": 0.14, "grad_norm": 1.1624935024369119, "learning_rate": 9.683815223086777e-06, "loss": 0.7184, "step": 872 }, { "epoch": 0.14, "grad_norm": 0.7720612323267094, "learning_rate": 9.68290127293717e-06, "loss": 0.5101, "step": 873 }, { "epoch": 0.14, "grad_norm": 1.1434917699888576, "learning_rate": 9.68198604703617e-06, "loss": 0.6994, "step": 874 }, { "epoch": 0.14, "grad_norm": 0.6899637140459434, "learning_rate": 9.681069545633113e-06, "loss": 0.4665, "step": 875 }, { "epoch": 0.14, "grad_norm": 1.1056463048579763, "learning_rate": 9.680151768977676e-06, "loss": 0.6286, "step": 876 }, { "epoch": 0.14, "grad_norm": 1.1516937702379184, "learning_rate": 9.679232717319887e-06, "loss": 0.6927, "step": 877 }, { "epoch": 0.14, "grad_norm": 1.0994712415279446, "learning_rate": 9.678312390910123e-06, "loss": 0.6683, "step": 878 }, { "epoch": 0.14, "grad_norm": 1.1830052145770937, "learning_rate": 9.677390789999106e-06, "loss": 0.7788, "step": 879 }, { "epoch": 0.14, "grad_norm": 1.057782602742653, "learning_rate": 9.676467914837904e-06, "loss": 0.5236, "step": 880 }, { "epoch": 0.14, "grad_norm": 1.5359709510592465, "learning_rate": 9.675543765677935e-06, "loss": 0.5224, "step": 881 }, { "epoch": 0.14, "grad_norm": 1.100154684112589, "learning_rate": 9.674618342770962e-06, "loss": 0.6958, "step": 882 }, { "epoch": 0.14, "grad_norm": 1.136639220551466, "learning_rate": 9.673691646369094e-06, "loss": 0.6564, "step": 883 }, { "epoch": 0.14, "grad_norm": 1.2037814544313608, "learning_rate": 9.672763676724792e-06, "loss": 0.6566, "step": 884 }, { "epoch": 0.14, "grad_norm": 1.1032279079872478, "learning_rate": 9.671834434090861e-06, "loss": 0.6862, "step": 885 }, { "epoch": 0.14, "grad_norm": 1.036831533197075, "learning_rate": 9.670903918720446e-06, "loss": 0.6109, "step": 886 }, { "epoch": 0.14, "grad_norm": 1.2209863702651758, "learning_rate": 9.669972130867053e-06, "loss": 0.675, "step": 887 }, { "epoch": 0.14, "grad_norm": 1.0165531027139059, "learning_rate": 9.66903907078452e-06, "loss": 0.5698, "step": 888 }, { "epoch": 0.14, "grad_norm": 1.123683699176069, "learning_rate": 9.668104738727045e-06, "loss": 0.7357, "step": 889 }, { "epoch": 0.14, "grad_norm": 1.0449187369928445, "learning_rate": 9.667169134949158e-06, "loss": 0.6679, "step": 890 }, { "epoch": 0.14, "grad_norm": 1.117650197986259, "learning_rate": 9.666232259705751e-06, "loss": 0.603, "step": 891 }, { "epoch": 0.14, "grad_norm": 1.0962511217947746, "learning_rate": 9.66529411325205e-06, "loss": 0.7291, "step": 892 }, { "epoch": 0.14, "grad_norm": 1.0411376733134625, "learning_rate": 9.664354695843632e-06, "loss": 0.6411, "step": 893 }, { "epoch": 0.14, "grad_norm": 1.1233724061884105, "learning_rate": 9.66341400773642e-06, "loss": 0.5903, "step": 894 }, { "epoch": 0.14, "grad_norm": 1.1429336410299331, "learning_rate": 9.662472049186688e-06, "loss": 0.7038, "step": 895 }, { "epoch": 0.14, "grad_norm": 1.1443299551680814, "learning_rate": 9.661528820451045e-06, "loss": 0.7106, "step": 896 }, { "epoch": 0.14, "grad_norm": 1.1385015234568867, "learning_rate": 9.660584321786456e-06, "loss": 0.5751, "step": 897 }, { "epoch": 0.14, "grad_norm": 1.0473321858348568, "learning_rate": 9.65963855345023e-06, "loss": 0.6439, "step": 898 }, { "epoch": 0.14, "grad_norm": 1.0886459659523429, "learning_rate": 9.658691515700016e-06, "loss": 0.693, "step": 899 }, { "epoch": 0.15, "grad_norm": 1.0793842832486205, "learning_rate": 9.657743208793818e-06, "loss": 0.584, "step": 900 }, { "epoch": 0.15, "grad_norm": 1.1213955090964036, "learning_rate": 9.656793632989976e-06, "loss": 0.7693, "step": 901 }, { "epoch": 0.15, "grad_norm": 1.0862173410923455, "learning_rate": 9.655842788547184e-06, "loss": 0.5919, "step": 902 }, { "epoch": 0.15, "grad_norm": 1.112356874383914, "learning_rate": 9.654890675724478e-06, "loss": 0.6794, "step": 903 }, { "epoch": 0.15, "grad_norm": 1.2235856467663266, "learning_rate": 9.653937294781237e-06, "loss": 0.6724, "step": 904 }, { "epoch": 0.15, "grad_norm": 1.1662553926184664, "learning_rate": 9.652982645977193e-06, "loss": 0.6753, "step": 905 }, { "epoch": 0.15, "grad_norm": 1.1928430117441486, "learning_rate": 9.652026729572415e-06, "loss": 0.6826, "step": 906 }, { "epoch": 0.15, "grad_norm": 1.1172491333052608, "learning_rate": 9.651069545827321e-06, "loss": 0.6576, "step": 907 }, { "epoch": 0.15, "grad_norm": 1.1087359463284592, "learning_rate": 9.650111095002676e-06, "loss": 0.6077, "step": 908 }, { "epoch": 0.15, "grad_norm": 1.0960101508285884, "learning_rate": 9.649151377359588e-06, "loss": 0.7588, "step": 909 }, { "epoch": 0.15, "grad_norm": 1.113640118578029, "learning_rate": 9.64819039315951e-06, "loss": 0.6631, "step": 910 }, { "epoch": 0.15, "grad_norm": 1.0757990384428555, "learning_rate": 9.647228142664241e-06, "loss": 0.6957, "step": 911 }, { "epoch": 0.15, "grad_norm": 1.0964473543020707, "learning_rate": 9.646264626135924e-06, "loss": 0.6412, "step": 912 }, { "epoch": 0.15, "grad_norm": 1.1738124788974702, "learning_rate": 9.645299843837047e-06, "loss": 0.723, "step": 913 }, { "epoch": 0.15, "grad_norm": 1.1149588254920535, "learning_rate": 9.644333796030444e-06, "loss": 0.6204, "step": 914 }, { "epoch": 0.15, "grad_norm": 1.0234351273863371, "learning_rate": 9.643366482979296e-06, "loss": 0.7208, "step": 915 }, { "epoch": 0.15, "grad_norm": 1.0846232740957282, "learning_rate": 9.64239790494712e-06, "loss": 0.5845, "step": 916 }, { "epoch": 0.15, "grad_norm": 1.1518984132232157, "learning_rate": 9.641428062197789e-06, "loss": 0.6871, "step": 917 }, { "epoch": 0.15, "grad_norm": 1.0898179301392574, "learning_rate": 9.640456954995509e-06, "loss": 0.6522, "step": 918 }, { "epoch": 0.15, "grad_norm": 1.1011862063784692, "learning_rate": 9.639484583604841e-06, "loss": 0.6956, "step": 919 }, { "epoch": 0.15, "grad_norm": 1.2751146477467272, "learning_rate": 9.638510948290684e-06, "loss": 0.6727, "step": 920 }, { "epoch": 0.15, "grad_norm": 1.0840926302595486, "learning_rate": 9.637536049318284e-06, "loss": 0.7329, "step": 921 }, { "epoch": 0.15, "grad_norm": 1.1685891887418123, "learning_rate": 9.63655988695323e-06, "loss": 0.7877, "step": 922 }, { "epoch": 0.15, "grad_norm": 1.117801561495144, "learning_rate": 9.635582461461455e-06, "loss": 0.6997, "step": 923 }, { "epoch": 0.15, "grad_norm": 1.130812199330516, "learning_rate": 9.634603773109235e-06, "loss": 0.6491, "step": 924 }, { "epoch": 0.15, "grad_norm": 1.0439990237422345, "learning_rate": 9.633623822163196e-06, "loss": 0.693, "step": 925 }, { "epoch": 0.15, "grad_norm": 1.0273413604250146, "learning_rate": 9.6326426088903e-06, "loss": 0.6889, "step": 926 }, { "epoch": 0.15, "grad_norm": 1.0719983580740586, "learning_rate": 9.631660133557858e-06, "loss": 0.6696, "step": 927 }, { "epoch": 0.15, "grad_norm": 1.1047675843873939, "learning_rate": 9.630676396433524e-06, "loss": 0.6232, "step": 928 }, { "epoch": 0.15, "grad_norm": 1.0785846392506815, "learning_rate": 9.629691397785294e-06, "loss": 0.6707, "step": 929 }, { "epoch": 0.15, "grad_norm": 1.0958993582790775, "learning_rate": 9.628705137881509e-06, "loss": 0.6236, "step": 930 }, { "epoch": 0.15, "grad_norm": 1.152198655506274, "learning_rate": 9.627717616990853e-06, "loss": 0.6595, "step": 931 }, { "epoch": 0.15, "grad_norm": 1.0585660818456581, "learning_rate": 9.626728835382353e-06, "loss": 0.7122, "step": 932 }, { "epoch": 0.15, "grad_norm": 1.070465798404608, "learning_rate": 9.625738793325384e-06, "loss": 0.6245, "step": 933 }, { "epoch": 0.15, "grad_norm": 1.153897335597271, "learning_rate": 9.624747491089657e-06, "loss": 0.7077, "step": 934 }, { "epoch": 0.15, "grad_norm": 1.0864882772702205, "learning_rate": 9.623754928945233e-06, "loss": 0.6052, "step": 935 }, { "epoch": 0.15, "grad_norm": 0.994726691565809, "learning_rate": 9.622761107162511e-06, "loss": 0.5298, "step": 936 }, { "epoch": 0.15, "grad_norm": 1.0469794163856567, "learning_rate": 9.621766026012236e-06, "loss": 0.5545, "step": 937 }, { "epoch": 0.15, "grad_norm": 1.169758419362588, "learning_rate": 9.620769685765497e-06, "loss": 0.7218, "step": 938 }, { "epoch": 0.15, "grad_norm": 1.1602537149422971, "learning_rate": 9.619772086693721e-06, "loss": 0.6838, "step": 939 }, { "epoch": 0.15, "grad_norm": 1.070276426000004, "learning_rate": 9.618773229068685e-06, "loss": 0.6879, "step": 940 }, { "epoch": 0.15, "grad_norm": 1.025536597725996, "learning_rate": 9.617773113162505e-06, "loss": 0.6068, "step": 941 }, { "epoch": 0.15, "grad_norm": 0.7313660770269282, "learning_rate": 9.616771739247639e-06, "loss": 0.5093, "step": 942 }, { "epoch": 0.15, "grad_norm": 1.0953577298773127, "learning_rate": 9.615769107596888e-06, "loss": 0.6864, "step": 943 }, { "epoch": 0.15, "grad_norm": 1.0747299133367834, "learning_rate": 9.614765218483398e-06, "loss": 0.6503, "step": 944 }, { "epoch": 0.15, "grad_norm": 1.066941078305158, "learning_rate": 9.613760072180656e-06, "loss": 0.6523, "step": 945 }, { "epoch": 0.15, "grad_norm": 1.157118947739973, "learning_rate": 9.612753668962492e-06, "loss": 0.6954, "step": 946 }, { "epoch": 0.15, "grad_norm": 1.0060634229123426, "learning_rate": 9.611746009103077e-06, "loss": 0.6212, "step": 947 }, { "epoch": 0.15, "grad_norm": 1.1238563226085063, "learning_rate": 9.610737092876924e-06, "loss": 0.7266, "step": 948 }, { "epoch": 0.15, "grad_norm": 1.0458060523573245, "learning_rate": 9.609726920558893e-06, "loss": 0.639, "step": 949 }, { "epoch": 0.15, "grad_norm": 0.9720594327926683, "learning_rate": 9.60871549242418e-06, "loss": 0.6299, "step": 950 }, { "epoch": 0.15, "grad_norm": 1.0991738860570166, "learning_rate": 9.607702808748329e-06, "loss": 0.6112, "step": 951 }, { "epoch": 0.15, "grad_norm": 1.0095207334456053, "learning_rate": 9.60668886980722e-06, "loss": 0.5569, "step": 952 }, { "epoch": 0.15, "grad_norm": 1.1126848004583536, "learning_rate": 9.605673675877077e-06, "loss": 0.6149, "step": 953 }, { "epoch": 0.15, "grad_norm": 1.1911304343291462, "learning_rate": 9.604657227234468e-06, "loss": 0.7099, "step": 954 }, { "epoch": 0.15, "grad_norm": 1.0534403542883153, "learning_rate": 9.603639524156306e-06, "loss": 0.6823, "step": 955 }, { "epoch": 0.15, "grad_norm": 1.0516101331808927, "learning_rate": 9.602620566919834e-06, "loss": 0.6399, "step": 956 }, { "epoch": 0.15, "grad_norm": 1.0499763084025036, "learning_rate": 9.601600355802649e-06, "loss": 0.6496, "step": 957 }, { "epoch": 0.15, "grad_norm": 1.1293754980044328, "learning_rate": 9.600578891082683e-06, "loss": 0.6631, "step": 958 }, { "epoch": 0.15, "grad_norm": 1.0502904566139173, "learning_rate": 9.599556173038211e-06, "loss": 0.5549, "step": 959 }, { "epoch": 0.15, "grad_norm": 1.0759478002381275, "learning_rate": 9.59853220194785e-06, "loss": 0.5845, "step": 960 }, { "epoch": 0.15, "grad_norm": 1.308552938369847, "learning_rate": 9.597506978090558e-06, "loss": 0.5742, "step": 961 }, { "epoch": 0.15, "grad_norm": 1.1206630000574285, "learning_rate": 9.596480501745635e-06, "loss": 0.701, "step": 962 }, { "epoch": 0.16, "grad_norm": 1.0318104664560217, "learning_rate": 9.59545277319272e-06, "loss": 0.5293, "step": 963 }, { "epoch": 0.16, "grad_norm": 1.1529080040459074, "learning_rate": 9.594423792711796e-06, "loss": 0.7055, "step": 964 }, { "epoch": 0.16, "grad_norm": 1.1809894168146542, "learning_rate": 9.593393560583184e-06, "loss": 0.7125, "step": 965 }, { "epoch": 0.16, "grad_norm": 1.180935189197949, "learning_rate": 9.592362077087548e-06, "loss": 0.7648, "step": 966 }, { "epoch": 0.16, "grad_norm": 0.9612061868710139, "learning_rate": 9.591329342505894e-06, "loss": 0.6317, "step": 967 }, { "epoch": 0.16, "grad_norm": 1.0727807330395043, "learning_rate": 9.590295357119565e-06, "loss": 0.6076, "step": 968 }, { "epoch": 0.16, "grad_norm": 1.0055986147244516, "learning_rate": 9.589260121210249e-06, "loss": 0.7117, "step": 969 }, { "epoch": 0.16, "grad_norm": 1.0033832134441991, "learning_rate": 9.588223635059973e-06, "loss": 0.5656, "step": 970 }, { "epoch": 0.16, "grad_norm": 1.0814408716538169, "learning_rate": 9.587185898951104e-06, "loss": 0.6167, "step": 971 }, { "epoch": 0.16, "grad_norm": 1.074292401768141, "learning_rate": 9.58614691316635e-06, "loss": 0.6202, "step": 972 }, { "epoch": 0.16, "grad_norm": 0.9129956297990014, "learning_rate": 9.585106677988756e-06, "loss": 0.5198, "step": 973 }, { "epoch": 0.16, "grad_norm": 1.0685695120202705, "learning_rate": 9.584065193701717e-06, "loss": 0.5342, "step": 974 }, { "epoch": 0.16, "grad_norm": 1.053562906867665, "learning_rate": 9.583022460588956e-06, "loss": 0.6798, "step": 975 }, { "epoch": 0.16, "grad_norm": 0.9782225534476784, "learning_rate": 9.581978478934547e-06, "loss": 0.5582, "step": 976 }, { "epoch": 0.16, "grad_norm": 1.1962585343056356, "learning_rate": 9.580933249022896e-06, "loss": 0.6083, "step": 977 }, { "epoch": 0.16, "grad_norm": 1.05228409684923, "learning_rate": 9.579886771138754e-06, "loss": 0.6743, "step": 978 }, { "epoch": 0.16, "grad_norm": 1.1170726552830834, "learning_rate": 9.578839045567212e-06, "loss": 0.7001, "step": 979 }, { "epoch": 0.16, "grad_norm": 0.9112665215625259, "learning_rate": 9.577790072593696e-06, "loss": 0.5801, "step": 980 }, { "epoch": 0.16, "grad_norm": 1.0661693287778133, "learning_rate": 9.576739852503976e-06, "loss": 0.6034, "step": 981 }, { "epoch": 0.16, "grad_norm": 1.3235536582507976, "learning_rate": 9.575688385584162e-06, "loss": 0.6702, "step": 982 }, { "epoch": 0.16, "grad_norm": 1.0138726072680782, "learning_rate": 9.574635672120705e-06, "loss": 0.6531, "step": 983 }, { "epoch": 0.16, "grad_norm": 0.9893404239322373, "learning_rate": 9.573581712400386e-06, "loss": 0.5796, "step": 984 }, { "epoch": 0.16, "grad_norm": 1.1221063417929105, "learning_rate": 9.57252650671034e-06, "loss": 0.6461, "step": 985 }, { "epoch": 0.16, "grad_norm": 1.1184053021868707, "learning_rate": 9.57147005533803e-06, "loss": 0.6353, "step": 986 }, { "epoch": 0.16, "grad_norm": 1.1394894590771216, "learning_rate": 9.570412358571264e-06, "loss": 0.6956, "step": 987 }, { "epoch": 0.16, "grad_norm": 1.196597060480913, "learning_rate": 9.569353416698187e-06, "loss": 0.7122, "step": 988 }, { "epoch": 0.16, "grad_norm": 1.0919614188821092, "learning_rate": 9.568293230007284e-06, "loss": 0.6799, "step": 989 }, { "epoch": 0.16, "grad_norm": 1.0789266655103467, "learning_rate": 9.56723179878738e-06, "loss": 0.6733, "step": 990 }, { "epoch": 0.16, "grad_norm": 1.054446385079856, "learning_rate": 9.566169123327638e-06, "loss": 0.6982, "step": 991 }, { "epoch": 0.16, "grad_norm": 1.0575308473552738, "learning_rate": 9.565105203917559e-06, "loss": 0.6354, "step": 992 }, { "epoch": 0.16, "grad_norm": 1.0279614479300914, "learning_rate": 9.564040040846984e-06, "loss": 0.6113, "step": 993 }, { "epoch": 0.16, "grad_norm": 1.0083167847563335, "learning_rate": 9.562973634406096e-06, "loss": 0.5922, "step": 994 }, { "epoch": 0.16, "grad_norm": 1.1168000958897386, "learning_rate": 9.561905984885407e-06, "loss": 0.6101, "step": 995 }, { "epoch": 0.16, "grad_norm": 1.0410854175900213, "learning_rate": 9.560837092575781e-06, "loss": 0.5763, "step": 996 }, { "epoch": 0.16, "grad_norm": 0.704859720733422, "learning_rate": 9.55976695776841e-06, "loss": 0.4741, "step": 997 }, { "epoch": 0.16, "grad_norm": 1.1653599862752817, "learning_rate": 9.558695580754828e-06, "loss": 0.7356, "step": 998 }, { "epoch": 0.16, "grad_norm": 1.1021709436890195, "learning_rate": 9.55762296182691e-06, "loss": 0.6582, "step": 999 }, { "epoch": 0.16, "grad_norm": 1.0842460978639399, "learning_rate": 9.556549101276865e-06, "loss": 0.6645, "step": 1000 }, { "epoch": 0.16, "grad_norm": 1.0987064323932816, "learning_rate": 9.555473999397242e-06, "loss": 0.6218, "step": 1001 }, { "epoch": 0.16, "grad_norm": 1.139750477899937, "learning_rate": 9.55439765648093e-06, "loss": 0.7272, "step": 1002 }, { "epoch": 0.16, "grad_norm": 1.0699884904351982, "learning_rate": 9.553320072821154e-06, "loss": 0.7348, "step": 1003 }, { "epoch": 0.16, "grad_norm": 0.9952557290081818, "learning_rate": 9.552241248711478e-06, "loss": 0.5712, "step": 1004 }, { "epoch": 0.16, "grad_norm": 1.0647379138297899, "learning_rate": 9.551161184445801e-06, "loss": 0.6133, "step": 1005 }, { "epoch": 0.16, "grad_norm": 1.017581941066395, "learning_rate": 9.550079880318363e-06, "loss": 0.6248, "step": 1006 }, { "epoch": 0.16, "grad_norm": 0.728936553347363, "learning_rate": 9.548997336623744e-06, "loss": 0.4895, "step": 1007 }, { "epoch": 0.16, "grad_norm": 0.9948388468302773, "learning_rate": 9.547913553656856e-06, "loss": 0.6214, "step": 1008 }, { "epoch": 0.16, "grad_norm": 1.1437496215668703, "learning_rate": 9.54682853171295e-06, "loss": 0.6694, "step": 1009 }, { "epoch": 0.16, "grad_norm": 1.1952011541009888, "learning_rate": 9.545742271087622e-06, "loss": 0.728, "step": 1010 }, { "epoch": 0.16, "grad_norm": 1.081529900865028, "learning_rate": 9.54465477207679e-06, "loss": 0.6773, "step": 1011 }, { "epoch": 0.16, "grad_norm": 1.050731103556642, "learning_rate": 9.543566034976728e-06, "loss": 0.5817, "step": 1012 }, { "epoch": 0.16, "grad_norm": 1.0321591924352482, "learning_rate": 9.542476060084035e-06, "loss": 0.6412, "step": 1013 }, { "epoch": 0.16, "grad_norm": 1.0273157733773564, "learning_rate": 9.541384847695645e-06, "loss": 0.5903, "step": 1014 }, { "epoch": 0.16, "grad_norm": 1.0304904001691007, "learning_rate": 9.540292398108842e-06, "loss": 0.6062, "step": 1015 }, { "epoch": 0.16, "grad_norm": 1.081837612606692, "learning_rate": 9.539198711621234e-06, "loss": 0.6252, "step": 1016 }, { "epoch": 0.16, "grad_norm": 1.1295474336579325, "learning_rate": 9.538103788530773e-06, "loss": 0.6425, "step": 1017 }, { "epoch": 0.16, "grad_norm": 1.0194068114574404, "learning_rate": 9.537007629135745e-06, "loss": 0.5517, "step": 1018 }, { "epoch": 0.16, "grad_norm": 1.0325828575722746, "learning_rate": 9.53591023373478e-06, "loss": 0.5903, "step": 1019 }, { "epoch": 0.16, "grad_norm": 1.0260765734962127, "learning_rate": 9.53481160262683e-06, "loss": 0.5698, "step": 1020 }, { "epoch": 0.16, "grad_norm": 1.0951006092215407, "learning_rate": 9.5337117361112e-06, "loss": 0.6442, "step": 1021 }, { "epoch": 0.16, "grad_norm": 1.1621859396641459, "learning_rate": 9.532610634487519e-06, "loss": 0.6824, "step": 1022 }, { "epoch": 0.16, "grad_norm": 1.2389190815244484, "learning_rate": 9.531508298055758e-06, "loss": 0.793, "step": 1023 }, { "epoch": 0.16, "grad_norm": 1.087497083191494, "learning_rate": 9.530404727116225e-06, "loss": 0.6303, "step": 1024 }, { "epoch": 0.17, "grad_norm": 0.6777898595721662, "learning_rate": 9.529299921969563e-06, "loss": 0.5097, "step": 1025 }, { "epoch": 0.17, "grad_norm": 1.0915995902746736, "learning_rate": 9.528193882916753e-06, "loss": 0.6837, "step": 1026 }, { "epoch": 0.17, "grad_norm": 1.1085655973232171, "learning_rate": 9.52708661025911e-06, "loss": 0.6219, "step": 1027 }, { "epoch": 0.17, "grad_norm": 1.0452463858932728, "learning_rate": 9.525978104298282e-06, "loss": 0.6222, "step": 1028 }, { "epoch": 0.17, "grad_norm": 1.052396718240974, "learning_rate": 9.524868365336259e-06, "loss": 0.7177, "step": 1029 }, { "epoch": 0.17, "grad_norm": 1.0489006976855053, "learning_rate": 9.523757393675365e-06, "loss": 0.5991, "step": 1030 }, { "epoch": 0.17, "grad_norm": 0.994170797456951, "learning_rate": 9.522645189618258e-06, "loss": 0.5957, "step": 1031 }, { "epoch": 0.17, "grad_norm": 1.0648631326669196, "learning_rate": 9.521531753467935e-06, "loss": 0.5667, "step": 1032 }, { "epoch": 0.17, "grad_norm": 1.024249069294412, "learning_rate": 9.520417085527725e-06, "loss": 0.6865, "step": 1033 }, { "epoch": 0.17, "grad_norm": 1.0323248495750048, "learning_rate": 9.519301186101297e-06, "loss": 0.5894, "step": 1034 }, { "epoch": 0.17, "grad_norm": 1.1872508562299484, "learning_rate": 9.518184055492648e-06, "loss": 0.7089, "step": 1035 }, { "epoch": 0.17, "grad_norm": 1.0752819051439941, "learning_rate": 9.517065694006118e-06, "loss": 0.666, "step": 1036 }, { "epoch": 0.17, "grad_norm": 1.1105561466253857, "learning_rate": 9.51594610194638e-06, "loss": 0.6737, "step": 1037 }, { "epoch": 0.17, "grad_norm": 0.8154925649063165, "learning_rate": 9.514825279618443e-06, "loss": 0.5242, "step": 1038 }, { "epoch": 0.17, "grad_norm": 1.1074637562604694, "learning_rate": 9.513703227327646e-06, "loss": 0.6536, "step": 1039 }, { "epoch": 0.17, "grad_norm": 1.0737546944299396, "learning_rate": 9.512579945379668e-06, "loss": 0.6429, "step": 1040 }, { "epoch": 0.17, "grad_norm": 1.1286206779146506, "learning_rate": 9.511455434080525e-06, "loss": 0.5987, "step": 1041 }, { "epoch": 0.17, "grad_norm": 1.1319256860563078, "learning_rate": 9.51032969373656e-06, "loss": 0.6941, "step": 1042 }, { "epoch": 0.17, "grad_norm": 0.9797067007811695, "learning_rate": 9.50920272465446e-06, "loss": 0.644, "step": 1043 }, { "epoch": 0.17, "grad_norm": 1.1730949121799528, "learning_rate": 9.508074527141243e-06, "loss": 0.6962, "step": 1044 }, { "epoch": 0.17, "grad_norm": 1.0604995218014863, "learning_rate": 9.506945101504256e-06, "loss": 0.6512, "step": 1045 }, { "epoch": 0.17, "grad_norm": 1.1151795995324953, "learning_rate": 9.50581444805119e-06, "loss": 0.7242, "step": 1046 }, { "epoch": 0.17, "grad_norm": 0.7311448785681954, "learning_rate": 9.504682567090064e-06, "loss": 0.5115, "step": 1047 }, { "epoch": 0.17, "grad_norm": 1.0365243238319133, "learning_rate": 9.503549458929236e-06, "loss": 0.6794, "step": 1048 }, { "epoch": 0.17, "grad_norm": 1.0044721735542828, "learning_rate": 9.502415123877393e-06, "loss": 0.5497, "step": 1049 }, { "epoch": 0.17, "grad_norm": 1.1242288466236354, "learning_rate": 9.501279562243561e-06, "loss": 0.6752, "step": 1050 }, { "epoch": 0.17, "grad_norm": 1.1331670185085372, "learning_rate": 9.500142774337097e-06, "loss": 0.7197, "step": 1051 }, { "epoch": 0.17, "grad_norm": 1.0103550192543755, "learning_rate": 9.499004760467694e-06, "loss": 0.603, "step": 1052 }, { "epoch": 0.17, "grad_norm": 1.0533246463330384, "learning_rate": 9.49786552094538e-06, "loss": 0.5605, "step": 1053 }, { "epoch": 0.17, "grad_norm": 1.0451492548509471, "learning_rate": 9.496725056080512e-06, "loss": 0.7136, "step": 1054 }, { "epoch": 0.17, "grad_norm": 0.9921862442775142, "learning_rate": 9.495583366183788e-06, "loss": 0.6157, "step": 1055 }, { "epoch": 0.17, "grad_norm": 1.1750595671905533, "learning_rate": 9.494440451566232e-06, "loss": 0.6459, "step": 1056 }, { "epoch": 0.17, "grad_norm": 0.9992534350081556, "learning_rate": 9.493296312539206e-06, "loss": 0.6489, "step": 1057 }, { "epoch": 0.17, "grad_norm": 1.0321907251505391, "learning_rate": 9.492150949414408e-06, "loss": 0.6748, "step": 1058 }, { "epoch": 0.17, "grad_norm": 1.045928752077126, "learning_rate": 9.491004362503862e-06, "loss": 0.6338, "step": 1059 }, { "epoch": 0.17, "grad_norm": 1.1068064325353986, "learning_rate": 9.489856552119934e-06, "loss": 0.6598, "step": 1060 }, { "epoch": 0.17, "grad_norm": 1.0620415062035822, "learning_rate": 9.488707518575319e-06, "loss": 0.5837, "step": 1061 }, { "epoch": 0.17, "grad_norm": 1.0663443799558874, "learning_rate": 9.487557262183043e-06, "loss": 0.6156, "step": 1062 }, { "epoch": 0.17, "grad_norm": 1.0861266500955866, "learning_rate": 9.486405783256467e-06, "loss": 0.6903, "step": 1063 }, { "epoch": 0.17, "grad_norm": 1.0823357865648187, "learning_rate": 9.485253082109289e-06, "loss": 0.6214, "step": 1064 }, { "epoch": 0.17, "grad_norm": 1.0753546883658682, "learning_rate": 9.484099159055534e-06, "loss": 0.6628, "step": 1065 }, { "epoch": 0.17, "grad_norm": 1.0724229101600617, "learning_rate": 9.482944014409563e-06, "loss": 0.6598, "step": 1066 }, { "epoch": 0.17, "grad_norm": 1.0135985503263316, "learning_rate": 9.481787648486069e-06, "loss": 0.6221, "step": 1067 }, { "epoch": 0.17, "grad_norm": 1.086275601865181, "learning_rate": 9.480630061600079e-06, "loss": 0.7074, "step": 1068 }, { "epoch": 0.17, "grad_norm": 1.0964530637608785, "learning_rate": 9.47947125406695e-06, "loss": 0.6518, "step": 1069 }, { "epoch": 0.17, "grad_norm": 1.0723795638785913, "learning_rate": 9.478311226202375e-06, "loss": 0.6972, "step": 1070 }, { "epoch": 0.17, "grad_norm": 1.0992302190855479, "learning_rate": 9.477149978322378e-06, "loss": 0.6634, "step": 1071 }, { "epoch": 0.17, "grad_norm": 1.1613175510587674, "learning_rate": 9.475987510743311e-06, "loss": 0.6912, "step": 1072 }, { "epoch": 0.17, "grad_norm": 1.027713959737752, "learning_rate": 9.474823823781866e-06, "loss": 0.6543, "step": 1073 }, { "epoch": 0.17, "grad_norm": 1.128922780945922, "learning_rate": 9.473658917755063e-06, "loss": 0.6255, "step": 1074 }, { "epoch": 0.17, "grad_norm": 1.0984362213611925, "learning_rate": 9.472492792980252e-06, "loss": 0.6751, "step": 1075 }, { "epoch": 0.17, "grad_norm": 1.0502364975895737, "learning_rate": 9.471325449775119e-06, "loss": 0.6352, "step": 1076 }, { "epoch": 0.17, "grad_norm": 1.0327652001917453, "learning_rate": 9.470156888457682e-06, "loss": 0.5715, "step": 1077 }, { "epoch": 0.17, "grad_norm": 1.0220506068336401, "learning_rate": 9.468987109346288e-06, "loss": 0.636, "step": 1078 }, { "epoch": 0.17, "grad_norm": 1.1005100810369575, "learning_rate": 9.467816112759616e-06, "loss": 0.6752, "step": 1079 }, { "epoch": 0.17, "grad_norm": 1.0560512286705024, "learning_rate": 9.46664389901668e-06, "loss": 0.565, "step": 1080 }, { "epoch": 0.17, "grad_norm": 1.1426094207887838, "learning_rate": 9.465470468436822e-06, "loss": 0.649, "step": 1081 }, { "epoch": 0.17, "grad_norm": 1.025384478031677, "learning_rate": 9.464295821339715e-06, "loss": 0.6952, "step": 1082 }, { "epoch": 0.17, "grad_norm": 0.9452834167643365, "learning_rate": 9.46311995804537e-06, "loss": 0.5813, "step": 1083 }, { "epoch": 0.17, "grad_norm": 1.0134587988017134, "learning_rate": 9.46194287887412e-06, "loss": 0.6274, "step": 1084 }, { "epoch": 0.17, "grad_norm": 1.0463946367818822, "learning_rate": 9.460764584146635e-06, "loss": 0.644, "step": 1085 }, { "epoch": 0.17, "grad_norm": 1.0740947551913984, "learning_rate": 9.459585074183919e-06, "loss": 0.6753, "step": 1086 }, { "epoch": 0.18, "grad_norm": 1.1403711895037398, "learning_rate": 9.458404349307295e-06, "loss": 0.6609, "step": 1087 }, { "epoch": 0.18, "grad_norm": 0.7380717786674862, "learning_rate": 9.457222409838433e-06, "loss": 0.4881, "step": 1088 }, { "epoch": 0.18, "grad_norm": 1.1102071189666116, "learning_rate": 9.456039256099321e-06, "loss": 0.5692, "step": 1089 }, { "epoch": 0.18, "grad_norm": 1.139399781337805, "learning_rate": 9.454854888412285e-06, "loss": 0.7134, "step": 1090 }, { "epoch": 0.18, "grad_norm": 1.0099436525935124, "learning_rate": 9.453669307099978e-06, "loss": 0.6956, "step": 1091 }, { "epoch": 0.18, "grad_norm": 1.0961834439674991, "learning_rate": 9.452482512485386e-06, "loss": 0.6881, "step": 1092 }, { "epoch": 0.18, "grad_norm": 1.1129848859481177, "learning_rate": 9.451294504891824e-06, "loss": 0.6388, "step": 1093 }, { "epoch": 0.18, "grad_norm": 1.0409418072682515, "learning_rate": 9.450105284642938e-06, "loss": 0.6821, "step": 1094 }, { "epoch": 0.18, "grad_norm": 0.6940216433613278, "learning_rate": 9.448914852062705e-06, "loss": 0.4935, "step": 1095 }, { "epoch": 0.18, "grad_norm": 1.041109449996568, "learning_rate": 9.447723207475432e-06, "loss": 0.599, "step": 1096 }, { "epoch": 0.18, "grad_norm": 1.071810706869906, "learning_rate": 9.446530351205754e-06, "loss": 0.6563, "step": 1097 }, { "epoch": 0.18, "grad_norm": 1.0791535216175594, "learning_rate": 9.445336283578639e-06, "loss": 0.6994, "step": 1098 }, { "epoch": 0.18, "grad_norm": 1.131459703710808, "learning_rate": 9.444141004919385e-06, "loss": 0.6824, "step": 1099 }, { "epoch": 0.18, "grad_norm": 1.1109924808320397, "learning_rate": 9.442944515553616e-06, "loss": 0.6892, "step": 1100 }, { "epoch": 0.18, "grad_norm": 1.0613221388403484, "learning_rate": 9.441746815807292e-06, "loss": 0.6862, "step": 1101 }, { "epoch": 0.18, "grad_norm": 1.1481310697294973, "learning_rate": 9.440547906006697e-06, "loss": 0.7421, "step": 1102 }, { "epoch": 0.18, "grad_norm": 1.1421279577171106, "learning_rate": 9.439347786478448e-06, "loss": 0.6875, "step": 1103 }, { "epoch": 0.18, "grad_norm": 1.182388471131474, "learning_rate": 9.438146457549491e-06, "loss": 0.7018, "step": 1104 }, { "epoch": 0.18, "grad_norm": 1.1853389093598405, "learning_rate": 9.436943919547101e-06, "loss": 0.6955, "step": 1105 }, { "epoch": 0.18, "grad_norm": 1.14598600020169, "learning_rate": 9.435740172798881e-06, "loss": 0.6452, "step": 1106 }, { "epoch": 0.18, "grad_norm": 1.0069612755304904, "learning_rate": 9.434535217632768e-06, "loss": 0.6274, "step": 1107 }, { "epoch": 0.18, "grad_norm": 1.142387996447214, "learning_rate": 9.433329054377023e-06, "loss": 0.6869, "step": 1108 }, { "epoch": 0.18, "grad_norm": 1.117654650314097, "learning_rate": 9.432121683360235e-06, "loss": 0.736, "step": 1109 }, { "epoch": 0.18, "grad_norm": 1.1278281277060207, "learning_rate": 9.430913104911331e-06, "loss": 0.6747, "step": 1110 }, { "epoch": 0.18, "grad_norm": 1.1349248587271872, "learning_rate": 9.429703319359558e-06, "loss": 0.672, "step": 1111 }, { "epoch": 0.18, "grad_norm": 1.024185517192485, "learning_rate": 9.428492327034496e-06, "loss": 0.6033, "step": 1112 }, { "epoch": 0.18, "grad_norm": 0.9511302016841211, "learning_rate": 9.427280128266049e-06, "loss": 0.6185, "step": 1113 }, { "epoch": 0.18, "grad_norm": 1.1228438446502222, "learning_rate": 9.42606672338446e-06, "loss": 0.7208, "step": 1114 }, { "epoch": 0.18, "grad_norm": 1.036050911711722, "learning_rate": 9.424852112720289e-06, "loss": 0.5679, "step": 1115 }, { "epoch": 0.18, "grad_norm": 1.129863645277397, "learning_rate": 9.42363629660443e-06, "loss": 0.6281, "step": 1116 }, { "epoch": 0.18, "grad_norm": 1.0226645253793512, "learning_rate": 9.422419275368107e-06, "loss": 0.5991, "step": 1117 }, { "epoch": 0.18, "grad_norm": 1.069717462366646, "learning_rate": 9.421201049342867e-06, "loss": 0.6227, "step": 1118 }, { "epoch": 0.18, "grad_norm": 1.0827797842288633, "learning_rate": 9.41998161886059e-06, "loss": 0.6439, "step": 1119 }, { "epoch": 0.18, "grad_norm": 1.1153927857178165, "learning_rate": 9.418760984253485e-06, "loss": 0.6253, "step": 1120 }, { "epoch": 0.18, "grad_norm": 1.0532959580783336, "learning_rate": 9.417539145854082e-06, "loss": 0.6771, "step": 1121 }, { "epoch": 0.18, "grad_norm": 1.0562968714954466, "learning_rate": 9.416316103995246e-06, "loss": 0.6069, "step": 1122 }, { "epoch": 0.18, "grad_norm": 1.0457506568836676, "learning_rate": 9.415091859010165e-06, "loss": 0.5926, "step": 1123 }, { "epoch": 0.18, "grad_norm": 1.0366500652286177, "learning_rate": 9.413866411232361e-06, "loss": 0.6296, "step": 1124 }, { "epoch": 0.18, "grad_norm": 1.0985700431079428, "learning_rate": 9.41263976099568e-06, "loss": 0.6496, "step": 1125 }, { "epoch": 0.18, "grad_norm": 1.0169290242622118, "learning_rate": 9.411411908634289e-06, "loss": 0.6111, "step": 1126 }, { "epoch": 0.18, "grad_norm": 1.0410409589621543, "learning_rate": 9.410182854482693e-06, "loss": 0.6308, "step": 1127 }, { "epoch": 0.18, "grad_norm": 0.8426116974971252, "learning_rate": 9.40895259887572e-06, "loss": 0.5101, "step": 1128 }, { "epoch": 0.18, "grad_norm": 1.1097992827551972, "learning_rate": 9.407721142148528e-06, "loss": 0.6285, "step": 1129 }, { "epoch": 0.18, "grad_norm": 0.9762140060945351, "learning_rate": 9.406488484636595e-06, "loss": 0.6862, "step": 1130 }, { "epoch": 0.18, "grad_norm": 1.2300256645964416, "learning_rate": 9.405254626675736e-06, "loss": 0.7907, "step": 1131 }, { "epoch": 0.18, "grad_norm": 1.0385376128817734, "learning_rate": 9.404019568602081e-06, "loss": 0.6059, "step": 1132 }, { "epoch": 0.18, "grad_norm": 1.0396253594334501, "learning_rate": 9.402783310752102e-06, "loss": 0.6212, "step": 1133 }, { "epoch": 0.18, "grad_norm": 1.1077407311219116, "learning_rate": 9.401545853462584e-06, "loss": 0.6282, "step": 1134 }, { "epoch": 0.18, "grad_norm": 1.0331647075588999, "learning_rate": 9.400307197070646e-06, "loss": 0.6491, "step": 1135 }, { "epoch": 0.18, "grad_norm": 1.0598044177470136, "learning_rate": 9.399067341913732e-06, "loss": 0.5906, "step": 1136 }, { "epoch": 0.18, "grad_norm": 1.080395347863945, "learning_rate": 9.397826288329615e-06, "loss": 0.6683, "step": 1137 }, { "epoch": 0.18, "grad_norm": 0.7950180638145538, "learning_rate": 9.396584036656388e-06, "loss": 0.4966, "step": 1138 }, { "epoch": 0.18, "grad_norm": 1.2139499886683336, "learning_rate": 9.395340587232478e-06, "loss": 0.7255, "step": 1139 }, { "epoch": 0.18, "grad_norm": 1.0494093727175315, "learning_rate": 9.394095940396632e-06, "loss": 0.6709, "step": 1140 }, { "epoch": 0.18, "grad_norm": 1.042252114979571, "learning_rate": 9.39285009648793e-06, "loss": 0.5401, "step": 1141 }, { "epoch": 0.18, "grad_norm": 1.2010949383594216, "learning_rate": 9.391603055845771e-06, "loss": 0.7117, "step": 1142 }, { "epoch": 0.18, "grad_norm": 0.9972246359789446, "learning_rate": 9.390354818809886e-06, "loss": 0.7257, "step": 1143 }, { "epoch": 0.18, "grad_norm": 0.9553562407962161, "learning_rate": 9.389105385720325e-06, "loss": 0.5333, "step": 1144 }, { "epoch": 0.18, "grad_norm": 1.0298252606824072, "learning_rate": 9.387854756917472e-06, "loss": 0.5917, "step": 1145 }, { "epoch": 0.18, "grad_norm": 1.0586860634228825, "learning_rate": 9.386602932742032e-06, "loss": 0.6591, "step": 1146 }, { "epoch": 0.18, "grad_norm": 1.119388948541729, "learning_rate": 9.385349913535034e-06, "loss": 0.6465, "step": 1147 }, { "epoch": 0.18, "grad_norm": 1.0953107410902572, "learning_rate": 9.384095699637839e-06, "loss": 0.6314, "step": 1148 }, { "epoch": 0.19, "grad_norm": 0.6623052598265775, "learning_rate": 9.382840291392124e-06, "loss": 0.4491, "step": 1149 }, { "epoch": 0.19, "grad_norm": 1.0634631297463455, "learning_rate": 9.3815836891399e-06, "loss": 0.6581, "step": 1150 }, { "epoch": 0.19, "grad_norm": 1.0812505534269203, "learning_rate": 9.380325893223503e-06, "loss": 0.7094, "step": 1151 }, { "epoch": 0.19, "grad_norm": 0.6011310255268045, "learning_rate": 9.379066903985588e-06, "loss": 0.4815, "step": 1152 }, { "epoch": 0.19, "grad_norm": 1.0593325736721697, "learning_rate": 9.377806721769138e-06, "loss": 0.616, "step": 1153 }, { "epoch": 0.19, "grad_norm": 1.0850398478736807, "learning_rate": 9.37654534691746e-06, "loss": 0.6381, "step": 1154 }, { "epoch": 0.19, "grad_norm": 1.0907408361667887, "learning_rate": 9.37528277977419e-06, "loss": 0.5573, "step": 1155 }, { "epoch": 0.19, "grad_norm": 1.0922646132955156, "learning_rate": 9.374019020683287e-06, "loss": 0.6532, "step": 1156 }, { "epoch": 0.19, "grad_norm": 1.1344615053723808, "learning_rate": 9.372754069989032e-06, "loss": 0.6822, "step": 1157 }, { "epoch": 0.19, "grad_norm": 1.0555579156587251, "learning_rate": 9.371487928036032e-06, "loss": 0.6476, "step": 1158 }, { "epoch": 0.19, "grad_norm": 1.0259261792104715, "learning_rate": 9.37022059516922e-06, "loss": 0.6442, "step": 1159 }, { "epoch": 0.19, "grad_norm": 0.8984732101302717, "learning_rate": 9.368952071733851e-06, "loss": 0.604, "step": 1160 }, { "epoch": 0.19, "grad_norm": 1.1010232792375327, "learning_rate": 9.367682358075509e-06, "loss": 0.7074, "step": 1161 }, { "epoch": 0.19, "grad_norm": 1.0814698431766612, "learning_rate": 9.366411454540095e-06, "loss": 0.6852, "step": 1162 }, { "epoch": 0.19, "grad_norm": 0.9417378999784696, "learning_rate": 9.36513936147384e-06, "loss": 0.6336, "step": 1163 }, { "epoch": 0.19, "grad_norm": 0.9926096428894494, "learning_rate": 9.363866079223299e-06, "loss": 0.6062, "step": 1164 }, { "epoch": 0.19, "grad_norm": 1.2087772481159154, "learning_rate": 9.362591608135346e-06, "loss": 0.7518, "step": 1165 }, { "epoch": 0.19, "grad_norm": 1.4209728972648705, "learning_rate": 9.361315948557182e-06, "loss": 0.6574, "step": 1166 }, { "epoch": 0.19, "grad_norm": 1.0622213307593036, "learning_rate": 9.360039100836335e-06, "loss": 0.6411, "step": 1167 }, { "epoch": 0.19, "grad_norm": 1.1067636537158854, "learning_rate": 9.358761065320653e-06, "loss": 0.6431, "step": 1168 }, { "epoch": 0.19, "grad_norm": 1.1398463939948387, "learning_rate": 9.357481842358302e-06, "loss": 0.7377, "step": 1169 }, { "epoch": 0.19, "grad_norm": 0.9815774673164988, "learning_rate": 9.356201432297788e-06, "loss": 0.6614, "step": 1170 }, { "epoch": 0.19, "grad_norm": 0.8941479688054781, "learning_rate": 9.354919835487922e-06, "loss": 0.4741, "step": 1171 }, { "epoch": 0.19, "grad_norm": 1.044632672835645, "learning_rate": 9.353637052277848e-06, "loss": 0.656, "step": 1172 }, { "epoch": 0.19, "grad_norm": 1.052307133992648, "learning_rate": 9.352353083017034e-06, "loss": 0.6429, "step": 1173 }, { "epoch": 0.19, "grad_norm": 1.2004441664637213, "learning_rate": 9.351067928055265e-06, "loss": 0.7377, "step": 1174 }, { "epoch": 0.19, "grad_norm": 1.127592989401024, "learning_rate": 9.349781587742655e-06, "loss": 0.6029, "step": 1175 }, { "epoch": 0.19, "grad_norm": 1.1248373783626897, "learning_rate": 9.348494062429639e-06, "loss": 0.6627, "step": 1176 }, { "epoch": 0.19, "grad_norm": 1.0168885892101591, "learning_rate": 9.347205352466972e-06, "loss": 0.5711, "step": 1177 }, { "epoch": 0.19, "grad_norm": 1.1507393033298814, "learning_rate": 9.345915458205736e-06, "loss": 0.6199, "step": 1178 }, { "epoch": 0.19, "grad_norm": 1.0560258294320832, "learning_rate": 9.344624379997335e-06, "loss": 0.6065, "step": 1179 }, { "epoch": 0.19, "grad_norm": 1.1027335552100108, "learning_rate": 9.343332118193492e-06, "loss": 0.66, "step": 1180 }, { "epoch": 0.19, "grad_norm": 1.0216012701447972, "learning_rate": 9.342038673146253e-06, "loss": 0.6253, "step": 1181 }, { "epoch": 0.19, "grad_norm": 1.0188424914554506, "learning_rate": 9.340744045207992e-06, "loss": 0.6847, "step": 1182 }, { "epoch": 0.19, "grad_norm": 1.044803461282638, "learning_rate": 9.3394482347314e-06, "loss": 0.5991, "step": 1183 }, { "epoch": 0.19, "grad_norm": 1.1602146469722492, "learning_rate": 9.338151242069491e-06, "loss": 0.6096, "step": 1184 }, { "epoch": 0.19, "grad_norm": 1.0128035216136144, "learning_rate": 9.336853067575603e-06, "loss": 0.6396, "step": 1185 }, { "epoch": 0.19, "grad_norm": 1.1933892623226698, "learning_rate": 9.335553711603393e-06, "loss": 0.6524, "step": 1186 }, { "epoch": 0.19, "grad_norm": 1.0013575123851317, "learning_rate": 9.33425317450684e-06, "loss": 0.573, "step": 1187 }, { "epoch": 0.19, "grad_norm": 1.1586743928279306, "learning_rate": 9.332951456640249e-06, "loss": 0.6981, "step": 1188 }, { "epoch": 0.19, "grad_norm": 0.9976559520022303, "learning_rate": 9.331648558358244e-06, "loss": 0.6135, "step": 1189 }, { "epoch": 0.19, "grad_norm": 1.0464451762886755, "learning_rate": 9.330344480015767e-06, "loss": 0.6205, "step": 1190 }, { "epoch": 0.19, "grad_norm": 1.1256798629405222, "learning_rate": 9.329039221968087e-06, "loss": 0.6796, "step": 1191 }, { "epoch": 0.19, "grad_norm": 0.7693710804160085, "learning_rate": 9.327732784570794e-06, "loss": 0.5184, "step": 1192 }, { "epoch": 0.19, "grad_norm": 0.9727490541022548, "learning_rate": 9.326425168179795e-06, "loss": 0.6014, "step": 1193 }, { "epoch": 0.19, "grad_norm": 1.0697413606763762, "learning_rate": 9.325116373151322e-06, "loss": 0.673, "step": 1194 }, { "epoch": 0.19, "grad_norm": 1.0501332540088344, "learning_rate": 9.323806399841927e-06, "loss": 0.6269, "step": 1195 }, { "epoch": 0.19, "grad_norm": 1.0820759551201784, "learning_rate": 9.32249524860848e-06, "loss": 0.6542, "step": 1196 }, { "epoch": 0.19, "grad_norm": 1.0323939610653803, "learning_rate": 9.321182919808179e-06, "loss": 0.6186, "step": 1197 }, { "epoch": 0.19, "grad_norm": 1.156914332748338, "learning_rate": 9.319869413798535e-06, "loss": 0.6454, "step": 1198 }, { "epoch": 0.19, "grad_norm": 1.16910904059714, "learning_rate": 9.318554730937385e-06, "loss": 0.6349, "step": 1199 }, { "epoch": 0.19, "grad_norm": 0.6277148539258679, "learning_rate": 9.317238871582886e-06, "loss": 0.4597, "step": 1200 }, { "epoch": 0.19, "grad_norm": 0.9519685426947867, "learning_rate": 9.31592183609351e-06, "loss": 0.566, "step": 1201 }, { "epoch": 0.19, "grad_norm": 1.1131236362461228, "learning_rate": 9.31460362482806e-06, "loss": 0.5806, "step": 1202 }, { "epoch": 0.19, "grad_norm": 0.9914960013657294, "learning_rate": 9.313284238145648e-06, "loss": 0.5832, "step": 1203 }, { "epoch": 0.19, "grad_norm": 1.1394984204933547, "learning_rate": 9.311963676405716e-06, "loss": 0.6665, "step": 1204 }, { "epoch": 0.19, "grad_norm": 1.141113550590736, "learning_rate": 9.310641939968016e-06, "loss": 0.6722, "step": 1205 }, { "epoch": 0.19, "grad_norm": 1.0899706323857257, "learning_rate": 9.309319029192627e-06, "loss": 0.6116, "step": 1206 }, { "epoch": 0.19, "grad_norm": 1.1064720127728438, "learning_rate": 9.307994944439949e-06, "loss": 0.6702, "step": 1207 }, { "epoch": 0.19, "grad_norm": 1.1719854279700321, "learning_rate": 9.306669686070697e-06, "loss": 0.6232, "step": 1208 }, { "epoch": 0.19, "grad_norm": 1.0968729728740734, "learning_rate": 9.30534325444591e-06, "loss": 0.7028, "step": 1209 }, { "epoch": 0.19, "grad_norm": 0.6894523783298816, "learning_rate": 9.304015649926941e-06, "loss": 0.4883, "step": 1210 }, { "epoch": 0.2, "grad_norm": 1.086859867690113, "learning_rate": 9.30268687287547e-06, "loss": 0.683, "step": 1211 }, { "epoch": 0.2, "grad_norm": 1.0680938255003465, "learning_rate": 9.30135692365349e-06, "loss": 0.6597, "step": 1212 }, { "epoch": 0.2, "grad_norm": 1.012338176949859, "learning_rate": 9.300025802623316e-06, "loss": 0.6656, "step": 1213 }, { "epoch": 0.2, "grad_norm": 1.0306906641337574, "learning_rate": 9.29869351014758e-06, "loss": 0.5925, "step": 1214 }, { "epoch": 0.2, "grad_norm": 0.719161842505433, "learning_rate": 9.29736004658924e-06, "loss": 0.4964, "step": 1215 }, { "epoch": 0.2, "grad_norm": 1.1849584994273956, "learning_rate": 9.296025412311567e-06, "loss": 0.7035, "step": 1216 }, { "epoch": 0.2, "grad_norm": 1.0587016842136787, "learning_rate": 9.294689607678151e-06, "loss": 0.656, "step": 1217 }, { "epoch": 0.2, "grad_norm": 1.1043385773877985, "learning_rate": 9.293352633052901e-06, "loss": 0.5673, "step": 1218 }, { "epoch": 0.2, "grad_norm": 1.0161825194031135, "learning_rate": 9.292014488800046e-06, "loss": 0.6026, "step": 1219 }, { "epoch": 0.2, "grad_norm": 0.6190578711695649, "learning_rate": 9.290675175284135e-06, "loss": 0.4907, "step": 1220 }, { "epoch": 0.2, "grad_norm": 1.125714700914956, "learning_rate": 9.289334692870033e-06, "loss": 0.7165, "step": 1221 }, { "epoch": 0.2, "grad_norm": 1.2875941445325274, "learning_rate": 9.287993041922924e-06, "loss": 0.7112, "step": 1222 }, { "epoch": 0.2, "grad_norm": 1.1078222309915893, "learning_rate": 9.286650222808314e-06, "loss": 0.7601, "step": 1223 }, { "epoch": 0.2, "grad_norm": 0.999944304866867, "learning_rate": 9.285306235892017e-06, "loss": 0.5661, "step": 1224 }, { "epoch": 0.2, "grad_norm": 0.9952463958134803, "learning_rate": 9.283961081540178e-06, "loss": 0.6069, "step": 1225 }, { "epoch": 0.2, "grad_norm": 1.1478291170578638, "learning_rate": 9.282614760119252e-06, "loss": 0.6923, "step": 1226 }, { "epoch": 0.2, "grad_norm": 1.024788043357834, "learning_rate": 9.281267271996015e-06, "loss": 0.7213, "step": 1227 }, { "epoch": 0.2, "grad_norm": 1.048442578485777, "learning_rate": 9.27991861753756e-06, "loss": 0.6394, "step": 1228 }, { "epoch": 0.2, "grad_norm": 1.022485456447724, "learning_rate": 9.278568797111294e-06, "loss": 0.6112, "step": 1229 }, { "epoch": 0.2, "grad_norm": 1.0888123572603883, "learning_rate": 9.277217811084952e-06, "loss": 0.5784, "step": 1230 }, { "epoch": 0.2, "grad_norm": 1.0939112564306201, "learning_rate": 9.275865659826573e-06, "loss": 0.6281, "step": 1231 }, { "epoch": 0.2, "grad_norm": 0.915737101049708, "learning_rate": 9.274512343704525e-06, "loss": 0.598, "step": 1232 }, { "epoch": 0.2, "grad_norm": 0.9767689527332065, "learning_rate": 9.273157863087486e-06, "loss": 0.5339, "step": 1233 }, { "epoch": 0.2, "grad_norm": 1.0785580622773865, "learning_rate": 9.271802218344455e-06, "loss": 0.6258, "step": 1234 }, { "epoch": 0.2, "grad_norm": 0.9939454227184851, "learning_rate": 9.270445409844749e-06, "loss": 0.6299, "step": 1235 }, { "epoch": 0.2, "grad_norm": 1.0079941254433253, "learning_rate": 9.269087437957996e-06, "loss": 0.6046, "step": 1236 }, { "epoch": 0.2, "grad_norm": 1.1072551085362412, "learning_rate": 9.267728303054146e-06, "loss": 0.6721, "step": 1237 }, { "epoch": 0.2, "grad_norm": 1.1053569057984955, "learning_rate": 9.266368005503465e-06, "loss": 0.6123, "step": 1238 }, { "epoch": 0.2, "grad_norm": 1.0139210005755706, "learning_rate": 9.265006545676538e-06, "loss": 0.6156, "step": 1239 }, { "epoch": 0.2, "grad_norm": 1.0743280202582823, "learning_rate": 9.263643923944262e-06, "loss": 0.6568, "step": 1240 }, { "epoch": 0.2, "grad_norm": 1.198856314599014, "learning_rate": 9.262280140677852e-06, "loss": 0.7356, "step": 1241 }, { "epoch": 0.2, "grad_norm": 0.9865904398756403, "learning_rate": 9.260915196248842e-06, "loss": 0.5548, "step": 1242 }, { "epoch": 0.2, "grad_norm": 0.6984235997615423, "learning_rate": 9.259549091029082e-06, "loss": 0.4859, "step": 1243 }, { "epoch": 0.2, "grad_norm": 1.0443175137011091, "learning_rate": 9.258181825390732e-06, "loss": 0.6019, "step": 1244 }, { "epoch": 0.2, "grad_norm": 1.1430195282177522, "learning_rate": 9.256813399706276e-06, "loss": 0.6485, "step": 1245 }, { "epoch": 0.2, "grad_norm": 1.0702155304377488, "learning_rate": 9.25544381434851e-06, "loss": 0.6268, "step": 1246 }, { "epoch": 0.2, "grad_norm": 1.1258650641586976, "learning_rate": 9.254073069690545e-06, "loss": 0.6028, "step": 1247 }, { "epoch": 0.2, "grad_norm": 0.9979885635295688, "learning_rate": 9.252701166105815e-06, "loss": 0.5231, "step": 1248 }, { "epoch": 0.2, "grad_norm": 1.1225800216660617, "learning_rate": 9.251328103968059e-06, "loss": 0.695, "step": 1249 }, { "epoch": 0.2, "grad_norm": 1.0150053148432505, "learning_rate": 9.24995388365134e-06, "loss": 0.5764, "step": 1250 }, { "epoch": 0.2, "grad_norm": 1.0567664311953584, "learning_rate": 9.24857850553003e-06, "loss": 0.6376, "step": 1251 }, { "epoch": 0.2, "grad_norm": 1.0209686185559421, "learning_rate": 9.247201969978825e-06, "loss": 0.6542, "step": 1252 }, { "epoch": 0.2, "grad_norm": 1.0707225531437685, "learning_rate": 9.245824277372729e-06, "loss": 0.6318, "step": 1253 }, { "epoch": 0.2, "grad_norm": 1.220432558688987, "learning_rate": 9.24444542808706e-06, "loss": 0.6879, "step": 1254 }, { "epoch": 0.2, "grad_norm": 1.1569060907214943, "learning_rate": 9.24306542249746e-06, "loss": 0.674, "step": 1255 }, { "epoch": 0.2, "grad_norm": 1.069302019008103, "learning_rate": 9.241684260979877e-06, "loss": 0.6861, "step": 1256 }, { "epoch": 0.2, "grad_norm": 1.0313837833709343, "learning_rate": 9.240301943910578e-06, "loss": 0.6203, "step": 1257 }, { "epoch": 0.2, "grad_norm": 1.1356470117218578, "learning_rate": 9.238918471666148e-06, "loss": 0.6329, "step": 1258 }, { "epoch": 0.2, "grad_norm": 1.0759068997696632, "learning_rate": 9.237533844623477e-06, "loss": 0.583, "step": 1259 }, { "epoch": 0.2, "grad_norm": 1.072656661028882, "learning_rate": 9.236148063159778e-06, "loss": 0.6853, "step": 1260 }, { "epoch": 0.2, "grad_norm": 1.0307163773942474, "learning_rate": 9.234761127652578e-06, "loss": 0.5126, "step": 1261 }, { "epoch": 0.2, "grad_norm": 1.0948171100839685, "learning_rate": 9.233373038479716e-06, "loss": 0.7146, "step": 1262 }, { "epoch": 0.2, "grad_norm": 1.1143406928943824, "learning_rate": 9.231983796019342e-06, "loss": 0.6514, "step": 1263 }, { "epoch": 0.2, "grad_norm": 1.0767395438149383, "learning_rate": 9.230593400649928e-06, "loss": 0.6284, "step": 1264 }, { "epoch": 0.2, "grad_norm": 1.0141466953354, "learning_rate": 9.229201852750254e-06, "loss": 0.6817, "step": 1265 }, { "epoch": 0.2, "grad_norm": 1.0049429474092488, "learning_rate": 9.227809152699418e-06, "loss": 0.6424, "step": 1266 }, { "epoch": 0.2, "grad_norm": 1.101760671320198, "learning_rate": 9.226415300876828e-06, "loss": 0.6909, "step": 1267 }, { "epoch": 0.2, "grad_norm": 1.0700946452574387, "learning_rate": 9.225020297662208e-06, "loss": 0.6795, "step": 1268 }, { "epoch": 0.2, "grad_norm": 0.7404293522852861, "learning_rate": 9.223624143435595e-06, "loss": 0.4872, "step": 1269 }, { "epoch": 0.2, "grad_norm": 1.0312883836740334, "learning_rate": 9.222226838577342e-06, "loss": 0.5905, "step": 1270 }, { "epoch": 0.2, "grad_norm": 1.2077457820550166, "learning_rate": 9.22082838346811e-06, "loss": 0.6995, "step": 1271 }, { "epoch": 0.2, "grad_norm": 1.0529698040361815, "learning_rate": 9.21942877848888e-06, "loss": 0.5983, "step": 1272 }, { "epoch": 0.21, "grad_norm": 1.0022401754811046, "learning_rate": 9.21802802402094e-06, "loss": 0.6284, "step": 1273 }, { "epoch": 0.21, "grad_norm": 0.9833586086698595, "learning_rate": 9.216626120445897e-06, "loss": 0.5923, "step": 1274 }, { "epoch": 0.21, "grad_norm": 1.0530649725869778, "learning_rate": 9.215223068145666e-06, "loss": 0.677, "step": 1275 }, { "epoch": 0.21, "grad_norm": 1.0464753437976047, "learning_rate": 9.213818867502478e-06, "loss": 0.6045, "step": 1276 }, { "epoch": 0.21, "grad_norm": 0.9926183539978418, "learning_rate": 9.212413518898878e-06, "loss": 0.5699, "step": 1277 }, { "epoch": 0.21, "grad_norm": 1.0116267351415305, "learning_rate": 9.211007022717716e-06, "loss": 0.5614, "step": 1278 }, { "epoch": 0.21, "grad_norm": 1.076793931414792, "learning_rate": 9.209599379342165e-06, "loss": 0.6384, "step": 1279 }, { "epoch": 0.21, "grad_norm": 1.1052948235191529, "learning_rate": 9.208190589155705e-06, "loss": 0.6909, "step": 1280 }, { "epoch": 0.21, "grad_norm": 1.0363158883956463, "learning_rate": 9.206780652542131e-06, "loss": 0.6198, "step": 1281 }, { "epoch": 0.21, "grad_norm": 1.0293878264882128, "learning_rate": 9.205369569885544e-06, "loss": 0.6484, "step": 1282 }, { "epoch": 0.21, "grad_norm": 1.016716085100523, "learning_rate": 9.203957341570367e-06, "loss": 0.5898, "step": 1283 }, { "epoch": 0.21, "grad_norm": 0.6948168492948111, "learning_rate": 9.202543967981325e-06, "loss": 0.499, "step": 1284 }, { "epoch": 0.21, "grad_norm": 1.2041013176322988, "learning_rate": 9.201129449503463e-06, "loss": 0.6232, "step": 1285 }, { "epoch": 0.21, "grad_norm": 1.052663557529718, "learning_rate": 9.199713786522135e-06, "loss": 0.6386, "step": 1286 }, { "epoch": 0.21, "grad_norm": 1.1784149455736117, "learning_rate": 9.198296979423006e-06, "loss": 0.6909, "step": 1287 }, { "epoch": 0.21, "grad_norm": 0.9868320250156666, "learning_rate": 9.196879028592052e-06, "loss": 0.5193, "step": 1288 }, { "epoch": 0.21, "grad_norm": 1.0784236032227814, "learning_rate": 9.195459934415563e-06, "loss": 0.6336, "step": 1289 }, { "epoch": 0.21, "grad_norm": 1.0607256825807188, "learning_rate": 9.19403969728014e-06, "loss": 0.5959, "step": 1290 }, { "epoch": 0.21, "grad_norm": 1.0201409310772067, "learning_rate": 9.192618317572693e-06, "loss": 0.5715, "step": 1291 }, { "epoch": 0.21, "grad_norm": 1.0967435752614172, "learning_rate": 9.191195795680447e-06, "loss": 0.5446, "step": 1292 }, { "epoch": 0.21, "grad_norm": 1.0954440188196075, "learning_rate": 9.189772131990935e-06, "loss": 0.5848, "step": 1293 }, { "epoch": 0.21, "grad_norm": 1.0609636422321207, "learning_rate": 9.188347326892002e-06, "loss": 0.635, "step": 1294 }, { "epoch": 0.21, "grad_norm": 1.0161170852666188, "learning_rate": 9.186921380771806e-06, "loss": 0.6672, "step": 1295 }, { "epoch": 0.21, "grad_norm": 1.000184592347299, "learning_rate": 9.185494294018811e-06, "loss": 0.5943, "step": 1296 }, { "epoch": 0.21, "grad_norm": 0.9876484186694694, "learning_rate": 9.184066067021798e-06, "loss": 0.6028, "step": 1297 }, { "epoch": 0.21, "grad_norm": 1.0411622139889567, "learning_rate": 9.182636700169854e-06, "loss": 0.6229, "step": 1298 }, { "epoch": 0.21, "grad_norm": 0.6684721939962599, "learning_rate": 9.181206193852376e-06, "loss": 0.448, "step": 1299 }, { "epoch": 0.21, "grad_norm": 1.0460170258968344, "learning_rate": 9.179774548459077e-06, "loss": 0.6166, "step": 1300 }, { "epoch": 0.21, "grad_norm": 1.0606393057387997, "learning_rate": 9.178341764379974e-06, "loss": 0.6426, "step": 1301 }, { "epoch": 0.21, "grad_norm": 1.156393292749792, "learning_rate": 9.176907842005398e-06, "loss": 0.6889, "step": 1302 }, { "epoch": 0.21, "grad_norm": 1.027990199636308, "learning_rate": 9.17547278172599e-06, "loss": 0.5626, "step": 1303 }, { "epoch": 0.21, "grad_norm": 1.0138277480040518, "learning_rate": 9.174036583932695e-06, "loss": 0.6696, "step": 1304 }, { "epoch": 0.21, "grad_norm": 0.6525229324631442, "learning_rate": 9.17259924901678e-06, "loss": 0.5144, "step": 1305 }, { "epoch": 0.21, "grad_norm": 1.1085156960711897, "learning_rate": 9.17116077736981e-06, "loss": 0.7345, "step": 1306 }, { "epoch": 0.21, "grad_norm": 1.0816571505859058, "learning_rate": 9.169721169383666e-06, "loss": 0.6738, "step": 1307 }, { "epoch": 0.21, "grad_norm": 0.9889485788779966, "learning_rate": 9.168280425450535e-06, "loss": 0.596, "step": 1308 }, { "epoch": 0.21, "grad_norm": 1.0800951920606323, "learning_rate": 9.166838545962917e-06, "loss": 0.6372, "step": 1309 }, { "epoch": 0.21, "grad_norm": 1.0834988246180475, "learning_rate": 9.165395531313622e-06, "loss": 0.6318, "step": 1310 }, { "epoch": 0.21, "grad_norm": 1.03066141556626, "learning_rate": 9.163951381895761e-06, "loss": 0.6419, "step": 1311 }, { "epoch": 0.21, "grad_norm": 1.0876845816630245, "learning_rate": 9.162506098102765e-06, "loss": 0.6852, "step": 1312 }, { "epoch": 0.21, "grad_norm": 1.058458063999532, "learning_rate": 9.161059680328368e-06, "loss": 0.5862, "step": 1313 }, { "epoch": 0.21, "grad_norm": 1.0019865134231243, "learning_rate": 9.159612128966615e-06, "loss": 0.5792, "step": 1314 }, { "epoch": 0.21, "grad_norm": 1.0547634676928261, "learning_rate": 9.158163444411857e-06, "loss": 0.6837, "step": 1315 }, { "epoch": 0.21, "grad_norm": 1.0883117640684918, "learning_rate": 9.156713627058754e-06, "loss": 0.605, "step": 1316 }, { "epoch": 0.21, "grad_norm": 1.0185022650609008, "learning_rate": 9.155262677302281e-06, "loss": 0.6413, "step": 1317 }, { "epoch": 0.21, "grad_norm": 1.0170124100150486, "learning_rate": 9.153810595537713e-06, "loss": 0.5307, "step": 1318 }, { "epoch": 0.21, "grad_norm": 1.1311136669530881, "learning_rate": 9.152357382160639e-06, "loss": 0.6308, "step": 1319 }, { "epoch": 0.21, "grad_norm": 0.6541292516969023, "learning_rate": 9.150903037566954e-06, "loss": 0.4329, "step": 1320 }, { "epoch": 0.21, "grad_norm": 1.1487889041829114, "learning_rate": 9.14944756215286e-06, "loss": 0.666, "step": 1321 }, { "epoch": 0.21, "grad_norm": 1.024128203359301, "learning_rate": 9.14799095631487e-06, "loss": 0.6238, "step": 1322 }, { "epoch": 0.21, "grad_norm": 0.9882189189352942, "learning_rate": 9.146533220449804e-06, "loss": 0.5338, "step": 1323 }, { "epoch": 0.21, "grad_norm": 1.0898690130374142, "learning_rate": 9.14507435495479e-06, "loss": 0.6479, "step": 1324 }, { "epoch": 0.21, "grad_norm": 1.0326716813996526, "learning_rate": 9.143614360227261e-06, "loss": 0.6237, "step": 1325 }, { "epoch": 0.21, "grad_norm": 1.0577939091282096, "learning_rate": 9.142153236664961e-06, "loss": 0.6411, "step": 1326 }, { "epoch": 0.21, "grad_norm": 0.9604742133786004, "learning_rate": 9.14069098466594e-06, "loss": 0.5531, "step": 1327 }, { "epoch": 0.21, "grad_norm": 0.7125360178080434, "learning_rate": 9.139227604628556e-06, "loss": 0.5467, "step": 1328 }, { "epoch": 0.21, "grad_norm": 1.0423687649540945, "learning_rate": 9.137763096951474e-06, "loss": 0.6056, "step": 1329 }, { "epoch": 0.21, "grad_norm": 1.0834471726699733, "learning_rate": 9.136297462033667e-06, "loss": 0.7023, "step": 1330 }, { "epoch": 0.21, "grad_norm": 1.1300533974132025, "learning_rate": 9.134830700274413e-06, "loss": 0.6385, "step": 1331 }, { "epoch": 0.21, "grad_norm": 1.0765766405327657, "learning_rate": 9.133362812073298e-06, "loss": 0.596, "step": 1332 }, { "epoch": 0.21, "grad_norm": 1.0986521761479784, "learning_rate": 9.131893797830219e-06, "loss": 0.6769, "step": 1333 }, { "epoch": 0.21, "grad_norm": 1.1294104115009782, "learning_rate": 9.130423657945369e-06, "loss": 0.5503, "step": 1334 }, { "epoch": 0.22, "grad_norm": 1.0150987693400615, "learning_rate": 9.128952392819262e-06, "loss": 0.6175, "step": 1335 }, { "epoch": 0.22, "grad_norm": 1.1455229913482834, "learning_rate": 9.127480002852706e-06, "loss": 0.6709, "step": 1336 }, { "epoch": 0.22, "grad_norm": 1.113249858812364, "learning_rate": 9.126006488446824e-06, "loss": 0.673, "step": 1337 }, { "epoch": 0.22, "grad_norm": 1.081055884443081, "learning_rate": 9.12453185000304e-06, "loss": 0.6924, "step": 1338 }, { "epoch": 0.22, "grad_norm": 1.1666976481591584, "learning_rate": 9.123056087923088e-06, "loss": 0.7032, "step": 1339 }, { "epoch": 0.22, "grad_norm": 1.0228319307836449, "learning_rate": 9.121579202609004e-06, "loss": 0.6525, "step": 1340 }, { "epoch": 0.22, "grad_norm": 0.6469079481282709, "learning_rate": 9.12010119446313e-06, "loss": 0.485, "step": 1341 }, { "epoch": 0.22, "grad_norm": 0.7042101347952994, "learning_rate": 9.118622063888124e-06, "loss": 0.5174, "step": 1342 }, { "epoch": 0.22, "grad_norm": 1.0981131784492781, "learning_rate": 9.117141811286935e-06, "loss": 0.6659, "step": 1343 }, { "epoch": 0.22, "grad_norm": 1.1283918435241405, "learning_rate": 9.115660437062828e-06, "loss": 0.6193, "step": 1344 }, { "epoch": 0.22, "grad_norm": 1.0325628029589136, "learning_rate": 9.114177941619369e-06, "loss": 0.6506, "step": 1345 }, { "epoch": 0.22, "grad_norm": 1.1704250599488917, "learning_rate": 9.11269432536043e-06, "loss": 0.635, "step": 1346 }, { "epoch": 0.22, "grad_norm": 1.0487258624444653, "learning_rate": 9.11120958869019e-06, "loss": 0.6473, "step": 1347 }, { "epoch": 0.22, "grad_norm": 0.6630797570640432, "learning_rate": 9.109723732013132e-06, "loss": 0.492, "step": 1348 }, { "epoch": 0.22, "grad_norm": 1.0631836038739766, "learning_rate": 9.108236755734045e-06, "loss": 0.6399, "step": 1349 }, { "epoch": 0.22, "grad_norm": 1.0416000412977222, "learning_rate": 9.10674866025802e-06, "loss": 0.6401, "step": 1350 }, { "epoch": 0.22, "grad_norm": 1.0476568157916513, "learning_rate": 9.105259445990457e-06, "loss": 0.7022, "step": 1351 }, { "epoch": 0.22, "grad_norm": 0.993864636778809, "learning_rate": 9.10376911333706e-06, "loss": 0.5898, "step": 1352 }, { "epoch": 0.22, "grad_norm": 1.0413112967865412, "learning_rate": 9.102277662703834e-06, "loss": 0.6153, "step": 1353 }, { "epoch": 0.22, "grad_norm": 1.010006342010514, "learning_rate": 9.100785094497093e-06, "loss": 0.6296, "step": 1354 }, { "epoch": 0.22, "grad_norm": 1.0096545116284337, "learning_rate": 9.099291409123454e-06, "loss": 0.7163, "step": 1355 }, { "epoch": 0.22, "grad_norm": 1.0439025396194834, "learning_rate": 9.097796606989838e-06, "loss": 0.6266, "step": 1356 }, { "epoch": 0.22, "grad_norm": 1.0448381189778546, "learning_rate": 9.09630068850347e-06, "loss": 0.6362, "step": 1357 }, { "epoch": 0.22, "grad_norm": 1.08568752755913, "learning_rate": 9.094803654071877e-06, "loss": 0.6969, "step": 1358 }, { "epoch": 0.22, "grad_norm": 1.049579526620219, "learning_rate": 9.093305504102897e-06, "loss": 0.5182, "step": 1359 }, { "epoch": 0.22, "grad_norm": 1.0359177107256712, "learning_rate": 9.091806239004664e-06, "loss": 0.684, "step": 1360 }, { "epoch": 0.22, "grad_norm": 0.6931210113715948, "learning_rate": 9.090305859185619e-06, "loss": 0.4926, "step": 1361 }, { "epoch": 0.22, "grad_norm": 0.9921018624879908, "learning_rate": 9.088804365054511e-06, "loss": 0.6219, "step": 1362 }, { "epoch": 0.22, "grad_norm": 1.6239639129793633, "learning_rate": 9.087301757020384e-06, "loss": 0.6003, "step": 1363 }, { "epoch": 0.22, "grad_norm": 0.9825841550040483, "learning_rate": 9.08579803549259e-06, "loss": 0.5938, "step": 1364 }, { "epoch": 0.22, "grad_norm": 1.0680435943703834, "learning_rate": 9.084293200880787e-06, "loss": 0.6651, "step": 1365 }, { "epoch": 0.22, "grad_norm": 0.9932673877387073, "learning_rate": 9.082787253594932e-06, "loss": 0.6031, "step": 1366 }, { "epoch": 0.22, "grad_norm": 0.9420629925339018, "learning_rate": 9.081280194045286e-06, "loss": 0.5354, "step": 1367 }, { "epoch": 0.22, "grad_norm": 1.0573858054704095, "learning_rate": 9.079772022642413e-06, "loss": 0.6701, "step": 1368 }, { "epoch": 0.22, "grad_norm": 1.0444577130293116, "learning_rate": 9.078262739797184e-06, "loss": 0.6534, "step": 1369 }, { "epoch": 0.22, "grad_norm": 1.0065191470541899, "learning_rate": 9.076752345920764e-06, "loss": 0.6137, "step": 1370 }, { "epoch": 0.22, "grad_norm": 1.0129837858779838, "learning_rate": 9.075240841424629e-06, "loss": 0.5639, "step": 1371 }, { "epoch": 0.22, "grad_norm": 1.1543883971204312, "learning_rate": 9.073728226720555e-06, "loss": 0.6625, "step": 1372 }, { "epoch": 0.22, "grad_norm": 1.0972377559857236, "learning_rate": 9.07221450222062e-06, "loss": 0.7125, "step": 1373 }, { "epoch": 0.22, "grad_norm": 0.956946091256148, "learning_rate": 9.070699668337202e-06, "loss": 0.6118, "step": 1374 }, { "epoch": 0.22, "grad_norm": 1.0322466730180362, "learning_rate": 9.069183725482984e-06, "loss": 0.5909, "step": 1375 }, { "epoch": 0.22, "grad_norm": 1.0817020230398517, "learning_rate": 9.067666674070952e-06, "loss": 0.6058, "step": 1376 }, { "epoch": 0.22, "grad_norm": 0.9129587604116987, "learning_rate": 9.066148514514395e-06, "loss": 0.603, "step": 1377 }, { "epoch": 0.22, "grad_norm": 1.0166212285215896, "learning_rate": 9.064629247226895e-06, "loss": 0.6555, "step": 1378 }, { "epoch": 0.22, "grad_norm": 1.054780511818078, "learning_rate": 9.063108872622348e-06, "loss": 0.644, "step": 1379 }, { "epoch": 0.22, "grad_norm": 1.1700083035659647, "learning_rate": 9.061587391114942e-06, "loss": 0.7466, "step": 1380 }, { "epoch": 0.22, "grad_norm": 1.0505106069264116, "learning_rate": 9.060064803119173e-06, "loss": 0.72, "step": 1381 }, { "epoch": 0.22, "grad_norm": 1.0551900581640228, "learning_rate": 9.058541109049836e-06, "loss": 0.6341, "step": 1382 }, { "epoch": 0.22, "grad_norm": 1.1194935385821996, "learning_rate": 9.057016309322026e-06, "loss": 0.817, "step": 1383 }, { "epoch": 0.22, "grad_norm": 1.0668787387305103, "learning_rate": 9.055490404351141e-06, "loss": 0.6579, "step": 1384 }, { "epoch": 0.22, "grad_norm": 1.0637976882370652, "learning_rate": 9.05396339455288e-06, "loss": 0.6477, "step": 1385 }, { "epoch": 0.22, "grad_norm": 1.161854519320283, "learning_rate": 9.05243528034324e-06, "loss": 0.6956, "step": 1386 }, { "epoch": 0.22, "grad_norm": 0.9453457969355008, "learning_rate": 9.050906062138527e-06, "loss": 0.693, "step": 1387 }, { "epoch": 0.22, "grad_norm": 1.0505183870051222, "learning_rate": 9.049375740355338e-06, "loss": 0.7126, "step": 1388 }, { "epoch": 0.22, "grad_norm": 1.0856608096892482, "learning_rate": 9.047844315410574e-06, "loss": 0.6582, "step": 1389 }, { "epoch": 0.22, "grad_norm": 1.093157385772696, "learning_rate": 9.04631178772144e-06, "loss": 0.6326, "step": 1390 }, { "epoch": 0.22, "grad_norm": 1.0677806164608241, "learning_rate": 9.044778157705437e-06, "loss": 0.6593, "step": 1391 }, { "epoch": 0.22, "grad_norm": 1.0416462731597982, "learning_rate": 9.04324342578037e-06, "loss": 0.6476, "step": 1392 }, { "epoch": 0.22, "grad_norm": 1.0405003588410895, "learning_rate": 9.04170759236434e-06, "loss": 0.636, "step": 1393 }, { "epoch": 0.22, "grad_norm": 1.109274215901161, "learning_rate": 9.040170657875753e-06, "loss": 0.5808, "step": 1394 }, { "epoch": 0.22, "grad_norm": 1.1082056816133068, "learning_rate": 9.038632622733311e-06, "loss": 0.63, "step": 1395 }, { "epoch": 0.22, "grad_norm": 1.1026516262977042, "learning_rate": 9.037093487356016e-06, "loss": 0.7177, "step": 1396 }, { "epoch": 0.23, "grad_norm": 1.0644638696196977, "learning_rate": 9.035553252163172e-06, "loss": 0.6869, "step": 1397 }, { "epoch": 0.23, "grad_norm": 0.9806611002038922, "learning_rate": 9.034011917574382e-06, "loss": 0.6167, "step": 1398 }, { "epoch": 0.23, "grad_norm": 1.0878974003679027, "learning_rate": 9.032469484009548e-06, "loss": 0.6647, "step": 1399 }, { "epoch": 0.23, "grad_norm": 1.0801494168326196, "learning_rate": 9.030925951888869e-06, "loss": 0.6665, "step": 1400 }, { "epoch": 0.23, "grad_norm": 1.0497997377947759, "learning_rate": 9.029381321632849e-06, "loss": 0.6555, "step": 1401 }, { "epoch": 0.23, "grad_norm": 1.0588087676173972, "learning_rate": 9.027835593662285e-06, "loss": 0.6029, "step": 1402 }, { "epoch": 0.23, "grad_norm": 1.0365680610901784, "learning_rate": 9.026288768398278e-06, "loss": 0.6319, "step": 1403 }, { "epoch": 0.23, "grad_norm": 0.8906181517900568, "learning_rate": 9.024740846262225e-06, "loss": 0.5554, "step": 1404 }, { "epoch": 0.23, "grad_norm": 1.0639166313317698, "learning_rate": 9.023191827675824e-06, "loss": 0.6312, "step": 1405 }, { "epoch": 0.23, "grad_norm": 1.0778823460216507, "learning_rate": 9.021641713061069e-06, "loss": 0.6586, "step": 1406 }, { "epoch": 0.23, "grad_norm": 1.0422156539703293, "learning_rate": 9.020090502840253e-06, "loss": 0.6578, "step": 1407 }, { "epoch": 0.23, "grad_norm": 1.1229123110341979, "learning_rate": 9.01853819743597e-06, "loss": 0.6788, "step": 1408 }, { "epoch": 0.23, "grad_norm": 1.2101381260602155, "learning_rate": 9.016984797271112e-06, "loss": 0.6664, "step": 1409 }, { "epoch": 0.23, "grad_norm": 1.0566900395722054, "learning_rate": 9.015430302768865e-06, "loss": 0.5809, "step": 1410 }, { "epoch": 0.23, "grad_norm": 1.1469800123289702, "learning_rate": 9.013874714352716e-06, "loss": 0.7116, "step": 1411 }, { "epoch": 0.23, "grad_norm": 0.9971819448767034, "learning_rate": 9.012318032446454e-06, "loss": 0.6547, "step": 1412 }, { "epoch": 0.23, "grad_norm": 1.0073186291041025, "learning_rate": 9.010760257474158e-06, "loss": 0.6181, "step": 1413 }, { "epoch": 0.23, "grad_norm": 1.0571180880945343, "learning_rate": 9.009201389860212e-06, "loss": 0.6798, "step": 1414 }, { "epoch": 0.23, "grad_norm": 1.088820057183002, "learning_rate": 9.007641430029292e-06, "loss": 0.6245, "step": 1415 }, { "epoch": 0.23, "grad_norm": 1.0307297331627165, "learning_rate": 9.006080378406377e-06, "loss": 0.6237, "step": 1416 }, { "epoch": 0.23, "grad_norm": 1.106008756404052, "learning_rate": 9.004518235416737e-06, "loss": 0.7473, "step": 1417 }, { "epoch": 0.23, "grad_norm": 1.0761915208260295, "learning_rate": 9.002955001485945e-06, "loss": 0.6418, "step": 1418 }, { "epoch": 0.23, "grad_norm": 1.0519003102911193, "learning_rate": 9.001390677039868e-06, "loss": 0.7114, "step": 1419 }, { "epoch": 0.23, "grad_norm": 1.06933104192568, "learning_rate": 8.999825262504672e-06, "loss": 0.6671, "step": 1420 }, { "epoch": 0.23, "grad_norm": 1.0024346237329713, "learning_rate": 8.998258758306819e-06, "loss": 0.5722, "step": 1421 }, { "epoch": 0.23, "grad_norm": 1.0884012946637913, "learning_rate": 8.996691164873068e-06, "loss": 0.6639, "step": 1422 }, { "epoch": 0.23, "grad_norm": 1.0792796001461855, "learning_rate": 8.995122482630473e-06, "loss": 0.5921, "step": 1423 }, { "epoch": 0.23, "grad_norm": 1.0253928774530683, "learning_rate": 8.993552712006388e-06, "loss": 0.6353, "step": 1424 }, { "epoch": 0.23, "grad_norm": 1.0611814316767676, "learning_rate": 8.99198185342846e-06, "loss": 0.6448, "step": 1425 }, { "epoch": 0.23, "grad_norm": 0.9729411275824148, "learning_rate": 8.990409907324638e-06, "loss": 0.6037, "step": 1426 }, { "epoch": 0.23, "grad_norm": 0.9881382168648065, "learning_rate": 8.98883687412316e-06, "loss": 0.6376, "step": 1427 }, { "epoch": 0.23, "grad_norm": 1.0155742607429574, "learning_rate": 8.987262754252565e-06, "loss": 0.6051, "step": 1428 }, { "epoch": 0.23, "grad_norm": 1.0399315985565132, "learning_rate": 8.985687548141685e-06, "loss": 0.6584, "step": 1429 }, { "epoch": 0.23, "grad_norm": 1.0026553178023434, "learning_rate": 8.984111256219651e-06, "loss": 0.6098, "step": 1430 }, { "epoch": 0.23, "grad_norm": 0.9832513074201062, "learning_rate": 8.982533878915889e-06, "loss": 0.6352, "step": 1431 }, { "epoch": 0.23, "grad_norm": 1.0579708074182446, "learning_rate": 8.98095541666012e-06, "loss": 0.6537, "step": 1432 }, { "epoch": 0.23, "grad_norm": 1.1130460136431584, "learning_rate": 8.979375869882358e-06, "loss": 0.6532, "step": 1433 }, { "epoch": 0.23, "grad_norm": 1.0002837188096758, "learning_rate": 8.977795239012916e-06, "loss": 0.6066, "step": 1434 }, { "epoch": 0.23, "grad_norm": 0.9808336244622646, "learning_rate": 8.976213524482404e-06, "loss": 0.6766, "step": 1435 }, { "epoch": 0.23, "grad_norm": 1.096713230384531, "learning_rate": 8.974630726721723e-06, "loss": 0.6626, "step": 1436 }, { "epoch": 0.23, "grad_norm": 1.0578575299460342, "learning_rate": 8.97304684616207e-06, "loss": 0.663, "step": 1437 }, { "epoch": 0.23, "grad_norm": 0.9683831751834716, "learning_rate": 8.97146188323494e-06, "loss": 0.5683, "step": 1438 }, { "epoch": 0.23, "grad_norm": 1.0589364304161788, "learning_rate": 8.969875838372117e-06, "loss": 0.6491, "step": 1439 }, { "epoch": 0.23, "grad_norm": 1.0311570189096695, "learning_rate": 8.968288712005688e-06, "loss": 0.595, "step": 1440 }, { "epoch": 0.23, "grad_norm": 0.9975930813864733, "learning_rate": 8.966700504568025e-06, "loss": 0.6322, "step": 1441 }, { "epoch": 0.23, "grad_norm": 1.067823484224228, "learning_rate": 8.965111216491803e-06, "loss": 0.6464, "step": 1442 }, { "epoch": 0.23, "grad_norm": 1.0283474615731316, "learning_rate": 8.963520848209985e-06, "loss": 0.576, "step": 1443 }, { "epoch": 0.23, "grad_norm": 1.0121956374436312, "learning_rate": 8.961929400155833e-06, "loss": 0.6495, "step": 1444 }, { "epoch": 0.23, "grad_norm": 0.9947993733094849, "learning_rate": 8.960336872762903e-06, "loss": 0.668, "step": 1445 }, { "epoch": 0.23, "grad_norm": 1.1018081944338125, "learning_rate": 8.958743266465041e-06, "loss": 0.6781, "step": 1446 }, { "epoch": 0.23, "grad_norm": 1.032205939594897, "learning_rate": 8.957148581696389e-06, "loss": 0.614, "step": 1447 }, { "epoch": 0.23, "grad_norm": 1.0018226932024632, "learning_rate": 8.955552818891384e-06, "loss": 0.5993, "step": 1448 }, { "epoch": 0.23, "grad_norm": 0.9487645848948856, "learning_rate": 8.953955978484756e-06, "loss": 0.6174, "step": 1449 }, { "epoch": 0.23, "grad_norm": 1.054663585165408, "learning_rate": 8.95235806091153e-06, "loss": 0.5258, "step": 1450 }, { "epoch": 0.23, "grad_norm": 1.1508342107909812, "learning_rate": 8.950759066607017e-06, "loss": 0.6782, "step": 1451 }, { "epoch": 0.23, "grad_norm": 1.0676223992114562, "learning_rate": 8.949158996006834e-06, "loss": 0.6692, "step": 1452 }, { "epoch": 0.23, "grad_norm": 1.0260256617593244, "learning_rate": 8.94755784954688e-06, "loss": 0.6633, "step": 1453 }, { "epoch": 0.23, "grad_norm": 1.0187619959840217, "learning_rate": 8.945955627663353e-06, "loss": 0.5627, "step": 1454 }, { "epoch": 0.23, "grad_norm": 1.0518968378568396, "learning_rate": 8.944352330792741e-06, "loss": 0.5802, "step": 1455 }, { "epoch": 0.23, "grad_norm": 1.0130467867865363, "learning_rate": 8.942747959371829e-06, "loss": 0.5963, "step": 1456 }, { "epoch": 0.23, "grad_norm": 1.1149757650642895, "learning_rate": 8.941142513837689e-06, "loss": 0.6366, "step": 1457 }, { "epoch": 0.23, "grad_norm": 1.0726075141671725, "learning_rate": 8.939535994627692e-06, "loss": 0.6282, "step": 1458 }, { "epoch": 0.24, "grad_norm": 1.0537008522251274, "learning_rate": 8.937928402179495e-06, "loss": 0.6026, "step": 1459 }, { "epoch": 0.24, "grad_norm": 0.975524913513794, "learning_rate": 8.936319736931051e-06, "loss": 0.5593, "step": 1460 }, { "epoch": 0.24, "grad_norm": 1.0360998035398263, "learning_rate": 8.934709999320605e-06, "loss": 0.6961, "step": 1461 }, { "epoch": 0.24, "grad_norm": 1.034605856925702, "learning_rate": 8.933099189786697e-06, "loss": 0.6852, "step": 1462 }, { "epoch": 0.24, "grad_norm": 1.0870992225092975, "learning_rate": 8.93148730876815e-06, "loss": 0.5848, "step": 1463 }, { "epoch": 0.24, "grad_norm": 1.1493617672520888, "learning_rate": 8.92987435670409e-06, "loss": 0.6141, "step": 1464 }, { "epoch": 0.24, "grad_norm": 1.0063412205856306, "learning_rate": 8.928260334033927e-06, "loss": 0.6286, "step": 1465 }, { "epoch": 0.24, "grad_norm": 1.037769738541675, "learning_rate": 8.926645241197365e-06, "loss": 0.6547, "step": 1466 }, { "epoch": 0.24, "grad_norm": 1.0365758173384156, "learning_rate": 8.925029078634401e-06, "loss": 0.6165, "step": 1467 }, { "epoch": 0.24, "grad_norm": 1.0080011580160892, "learning_rate": 8.923411846785322e-06, "loss": 0.5648, "step": 1468 }, { "epoch": 0.24, "grad_norm": 1.0489487831016127, "learning_rate": 8.921793546090709e-06, "loss": 0.6436, "step": 1469 }, { "epoch": 0.24, "grad_norm": 1.0176680361301425, "learning_rate": 8.920174176991426e-06, "loss": 0.5276, "step": 1470 }, { "epoch": 0.24, "grad_norm": 1.086557866647455, "learning_rate": 8.918553739928637e-06, "loss": 0.6442, "step": 1471 }, { "epoch": 0.24, "grad_norm": 1.0360629532542711, "learning_rate": 8.916932235343797e-06, "loss": 0.5219, "step": 1472 }, { "epoch": 0.24, "grad_norm": 0.9150853989413839, "learning_rate": 8.915309663678641e-06, "loss": 0.4808, "step": 1473 }, { "epoch": 0.24, "grad_norm": 0.738454409470134, "learning_rate": 8.913686025375207e-06, "loss": 0.4826, "step": 1474 }, { "epoch": 0.24, "grad_norm": 1.0164443478842413, "learning_rate": 8.91206132087582e-06, "loss": 0.5974, "step": 1475 }, { "epoch": 0.24, "grad_norm": 1.078104225078703, "learning_rate": 8.910435550623093e-06, "loss": 0.679, "step": 1476 }, { "epoch": 0.24, "grad_norm": 1.0343444515289968, "learning_rate": 8.908808715059929e-06, "loss": 0.6405, "step": 1477 }, { "epoch": 0.24, "grad_norm": 1.0254828394527824, "learning_rate": 8.907180814629526e-06, "loss": 0.5603, "step": 1478 }, { "epoch": 0.24, "grad_norm": 1.0413531257034343, "learning_rate": 8.905551849775365e-06, "loss": 0.5764, "step": 1479 }, { "epoch": 0.24, "grad_norm": 1.0934818756944118, "learning_rate": 8.903921820941224e-06, "loss": 0.6805, "step": 1480 }, { "epoch": 0.24, "grad_norm": 1.0511557236364124, "learning_rate": 8.902290728571165e-06, "loss": 0.5895, "step": 1481 }, { "epoch": 0.24, "grad_norm": 1.0657948372414696, "learning_rate": 8.900658573109546e-06, "loss": 0.6218, "step": 1482 }, { "epoch": 0.24, "grad_norm": 1.0024071700845156, "learning_rate": 8.89902535500101e-06, "loss": 0.5925, "step": 1483 }, { "epoch": 0.24, "grad_norm": 1.0387619596933229, "learning_rate": 8.897391074690489e-06, "loss": 0.6822, "step": 1484 }, { "epoch": 0.24, "grad_norm": 1.0312979038838492, "learning_rate": 8.895755732623207e-06, "loss": 0.6449, "step": 1485 }, { "epoch": 0.24, "grad_norm": 1.0790057677602334, "learning_rate": 8.894119329244675e-06, "loss": 0.6883, "step": 1486 }, { "epoch": 0.24, "grad_norm": 0.9995900236789885, "learning_rate": 8.892481865000698e-06, "loss": 0.5953, "step": 1487 }, { "epoch": 0.24, "grad_norm": 1.0130139338640964, "learning_rate": 8.890843340337363e-06, "loss": 0.6232, "step": 1488 }, { "epoch": 0.24, "grad_norm": 1.0122041981422187, "learning_rate": 8.88920375570105e-06, "loss": 0.5069, "step": 1489 }, { "epoch": 0.24, "grad_norm": 1.1317800510763913, "learning_rate": 8.887563111538428e-06, "loss": 0.5805, "step": 1490 }, { "epoch": 0.24, "grad_norm": 1.0630407526416232, "learning_rate": 8.885921408296454e-06, "loss": 0.616, "step": 1491 }, { "epoch": 0.24, "grad_norm": 1.0687415970615437, "learning_rate": 8.88427864642237e-06, "loss": 0.6205, "step": 1492 }, { "epoch": 0.24, "grad_norm": 1.032691954140164, "learning_rate": 8.882634826363714e-06, "loss": 0.5602, "step": 1493 }, { "epoch": 0.24, "grad_norm": 1.0151872248202518, "learning_rate": 8.880989948568303e-06, "loss": 0.6209, "step": 1494 }, { "epoch": 0.24, "grad_norm": 0.9605974662378616, "learning_rate": 8.879344013484253e-06, "loss": 0.5996, "step": 1495 }, { "epoch": 0.24, "grad_norm": 1.1246166032300906, "learning_rate": 8.877697021559958e-06, "loss": 0.7271, "step": 1496 }, { "epoch": 0.24, "grad_norm": 1.0988429410556217, "learning_rate": 8.876048973244105e-06, "loss": 0.6674, "step": 1497 }, { "epoch": 0.24, "grad_norm": 1.0540556185526617, "learning_rate": 8.874399868985668e-06, "loss": 0.5741, "step": 1498 }, { "epoch": 0.24, "grad_norm": 1.154351969784402, "learning_rate": 8.872749709233907e-06, "loss": 0.6944, "step": 1499 }, { "epoch": 0.24, "grad_norm": 1.0479973778098817, "learning_rate": 8.871098494438375e-06, "loss": 0.64, "step": 1500 }, { "epoch": 0.24, "grad_norm": 1.1089315627007361, "learning_rate": 8.869446225048903e-06, "loss": 0.6268, "step": 1501 }, { "epoch": 0.24, "grad_norm": 1.021501253264102, "learning_rate": 8.867792901515617e-06, "loss": 0.5781, "step": 1502 }, { "epoch": 0.24, "grad_norm": 1.09310067762115, "learning_rate": 8.866138524288929e-06, "loss": 0.6449, "step": 1503 }, { "epoch": 0.24, "grad_norm": 1.01618572080514, "learning_rate": 8.864483093819537e-06, "loss": 0.7068, "step": 1504 }, { "epoch": 0.24, "grad_norm": 1.0700028006954734, "learning_rate": 8.862826610558427e-06, "loss": 0.624, "step": 1505 }, { "epoch": 0.24, "grad_norm": 1.0417879877839549, "learning_rate": 8.861169074956865e-06, "loss": 0.5715, "step": 1506 }, { "epoch": 0.24, "grad_norm": 1.0941366897768456, "learning_rate": 8.859510487466415e-06, "loss": 0.6658, "step": 1507 }, { "epoch": 0.24, "grad_norm": 1.0894715757770213, "learning_rate": 8.85785084853892e-06, "loss": 0.6816, "step": 1508 }, { "epoch": 0.24, "grad_norm": 1.0440828024241646, "learning_rate": 8.856190158626512e-06, "loss": 0.6645, "step": 1509 }, { "epoch": 0.24, "grad_norm": 0.9800280676379383, "learning_rate": 8.854528418181609e-06, "loss": 0.6314, "step": 1510 }, { "epoch": 0.24, "grad_norm": 1.0346539432910857, "learning_rate": 8.852865627656912e-06, "loss": 0.6212, "step": 1511 }, { "epoch": 0.24, "grad_norm": 1.1031783112781088, "learning_rate": 8.851201787505415e-06, "loss": 0.6458, "step": 1512 }, { "epoch": 0.24, "grad_norm": 1.0778708148484915, "learning_rate": 8.84953689818039e-06, "loss": 0.673, "step": 1513 }, { "epoch": 0.24, "grad_norm": 0.9833523491723598, "learning_rate": 8.847870960135403e-06, "loss": 0.5401, "step": 1514 }, { "epoch": 0.24, "grad_norm": 0.9947151505723325, "learning_rate": 8.8462039738243e-06, "loss": 0.6156, "step": 1515 }, { "epoch": 0.24, "grad_norm": 1.0185927764063212, "learning_rate": 8.84453593970121e-06, "loss": 0.6498, "step": 1516 }, { "epoch": 0.24, "grad_norm": 1.0269473423982132, "learning_rate": 8.842866858220558e-06, "loss": 0.6381, "step": 1517 }, { "epoch": 0.24, "grad_norm": 1.0199822959916656, "learning_rate": 8.841196729837044e-06, "loss": 0.6154, "step": 1518 }, { "epoch": 0.24, "grad_norm": 0.9765012900082941, "learning_rate": 8.839525555005656e-06, "loss": 0.7073, "step": 1519 }, { "epoch": 0.24, "grad_norm": 1.1495053197757263, "learning_rate": 8.837853334181669e-06, "loss": 0.7184, "step": 1520 }, { "epoch": 0.25, "grad_norm": 1.0555606447747357, "learning_rate": 8.836180067820646e-06, "loss": 0.5939, "step": 1521 }, { "epoch": 0.25, "grad_norm": 1.0782147776476414, "learning_rate": 8.834505756378425e-06, "loss": 0.6539, "step": 1522 }, { "epoch": 0.25, "grad_norm": 1.0669674198660808, "learning_rate": 8.832830400311137e-06, "loss": 0.6574, "step": 1523 }, { "epoch": 0.25, "grad_norm": 1.0555694178294315, "learning_rate": 8.831154000075196e-06, "loss": 0.6395, "step": 1524 }, { "epoch": 0.25, "grad_norm": 1.0207620301246727, "learning_rate": 8.829476556127301e-06, "loss": 0.601, "step": 1525 }, { "epoch": 0.25, "grad_norm": 1.032999601432603, "learning_rate": 8.82779806892443e-06, "loss": 0.6212, "step": 1526 }, { "epoch": 0.25, "grad_norm": 1.0697205731503756, "learning_rate": 8.826118538923851e-06, "loss": 0.652, "step": 1527 }, { "epoch": 0.25, "grad_norm": 1.0181741600530463, "learning_rate": 8.824437966583114e-06, "loss": 0.711, "step": 1528 }, { "epoch": 0.25, "grad_norm": 1.0691253459695536, "learning_rate": 8.822756352360056e-06, "loss": 0.6733, "step": 1529 }, { "epoch": 0.25, "grad_norm": 1.2300538157109882, "learning_rate": 8.82107369671279e-06, "loss": 0.6741, "step": 1530 }, { "epoch": 0.25, "grad_norm": 1.032340800337491, "learning_rate": 8.819390000099723e-06, "loss": 0.632, "step": 1531 }, { "epoch": 0.25, "grad_norm": 1.0585166360182745, "learning_rate": 8.817705262979536e-06, "loss": 0.564, "step": 1532 }, { "epoch": 0.25, "grad_norm": 1.0807736682342988, "learning_rate": 8.8160194858112e-06, "loss": 0.6535, "step": 1533 }, { "epoch": 0.25, "grad_norm": 1.0057269110316291, "learning_rate": 8.814332669053968e-06, "loss": 0.6781, "step": 1534 }, { "epoch": 0.25, "grad_norm": 1.0690694637543459, "learning_rate": 8.812644813167372e-06, "loss": 0.6127, "step": 1535 }, { "epoch": 0.25, "grad_norm": 0.95297497030005, "learning_rate": 8.810955918611235e-06, "loss": 0.5491, "step": 1536 }, { "epoch": 0.25, "grad_norm": 1.0643776281675157, "learning_rate": 8.809265985845655e-06, "loss": 0.6269, "step": 1537 }, { "epoch": 0.25, "grad_norm": 0.7657711567997146, "learning_rate": 8.807575015331019e-06, "loss": 0.5138, "step": 1538 }, { "epoch": 0.25, "grad_norm": 1.0551066490204484, "learning_rate": 8.805883007527992e-06, "loss": 0.6505, "step": 1539 }, { "epoch": 0.25, "grad_norm": 1.129762087885999, "learning_rate": 8.804189962897521e-06, "loss": 0.7038, "step": 1540 }, { "epoch": 0.25, "grad_norm": 1.1004353013799344, "learning_rate": 8.802495881900844e-06, "loss": 0.5983, "step": 1541 }, { "epoch": 0.25, "grad_norm": 0.9400236819690566, "learning_rate": 8.80080076499947e-06, "loss": 0.6222, "step": 1542 }, { "epoch": 0.25, "grad_norm": 1.0662222392052771, "learning_rate": 8.7991046126552e-06, "loss": 0.6768, "step": 1543 }, { "epoch": 0.25, "grad_norm": 0.9964479661598958, "learning_rate": 8.79740742533011e-06, "loss": 0.624, "step": 1544 }, { "epoch": 0.25, "grad_norm": 1.1149768599964365, "learning_rate": 8.795709203486563e-06, "loss": 0.6699, "step": 1545 }, { "epoch": 0.25, "grad_norm": 0.9798992543153964, "learning_rate": 8.794009947587197e-06, "loss": 0.6298, "step": 1546 }, { "epoch": 0.25, "grad_norm": 1.1543236725068406, "learning_rate": 8.79230965809494e-06, "loss": 0.6395, "step": 1547 }, { "epoch": 0.25, "grad_norm": 1.0827574631517152, "learning_rate": 8.790608335472995e-06, "loss": 0.6594, "step": 1548 }, { "epoch": 0.25, "grad_norm": 1.0675398408332948, "learning_rate": 8.788905980184851e-06, "loss": 0.6311, "step": 1549 }, { "epoch": 0.25, "grad_norm": 0.9442519024763887, "learning_rate": 8.787202592694278e-06, "loss": 0.5528, "step": 1550 }, { "epoch": 0.25, "grad_norm": 0.812937967542351, "learning_rate": 8.785498173465323e-06, "loss": 0.5466, "step": 1551 }, { "epoch": 0.25, "grad_norm": 1.123070633027778, "learning_rate": 8.783792722962316e-06, "loss": 0.7048, "step": 1552 }, { "epoch": 0.25, "grad_norm": 1.0695087180612246, "learning_rate": 8.782086241649874e-06, "loss": 0.5786, "step": 1553 }, { "epoch": 0.25, "grad_norm": 1.1103592582331396, "learning_rate": 8.780378729992884e-06, "loss": 0.6329, "step": 1554 }, { "epoch": 0.25, "grad_norm": 1.0819087202157869, "learning_rate": 8.778670188456519e-06, "loss": 0.6534, "step": 1555 }, { "epoch": 0.25, "grad_norm": 0.6377229052319721, "learning_rate": 8.776960617506237e-06, "loss": 0.4977, "step": 1556 }, { "epoch": 0.25, "grad_norm": 0.9943102584229293, "learning_rate": 8.77525001760777e-06, "loss": 0.5542, "step": 1557 }, { "epoch": 0.25, "grad_norm": 1.0000370786761754, "learning_rate": 8.773538389227134e-06, "loss": 0.7041, "step": 1558 }, { "epoch": 0.25, "grad_norm": 1.0389936307919212, "learning_rate": 8.771825732830622e-06, "loss": 0.6764, "step": 1559 }, { "epoch": 0.25, "grad_norm": 1.0162407585068645, "learning_rate": 8.77011204888481e-06, "loss": 0.607, "step": 1560 }, { "epoch": 0.25, "grad_norm": 0.9675094640830435, "learning_rate": 8.76839733785655e-06, "loss": 0.6181, "step": 1561 }, { "epoch": 0.25, "grad_norm": 1.099032648864559, "learning_rate": 8.766681600212981e-06, "loss": 0.6055, "step": 1562 }, { "epoch": 0.25, "grad_norm": 1.0353773626494007, "learning_rate": 8.764964836421515e-06, "loss": 0.6413, "step": 1563 }, { "epoch": 0.25, "grad_norm": 1.0506688255339323, "learning_rate": 8.763247046949843e-06, "loss": 0.6327, "step": 1564 }, { "epoch": 0.25, "grad_norm": 1.0335132240676155, "learning_rate": 8.761528232265944e-06, "loss": 0.6418, "step": 1565 }, { "epoch": 0.25, "grad_norm": 0.9778028796006232, "learning_rate": 8.759808392838066e-06, "loss": 0.5767, "step": 1566 }, { "epoch": 0.25, "grad_norm": 1.0158305395107219, "learning_rate": 8.75808752913474e-06, "loss": 0.6211, "step": 1567 }, { "epoch": 0.25, "grad_norm": 0.9416086540391815, "learning_rate": 8.756365641624782e-06, "loss": 0.653, "step": 1568 }, { "epoch": 0.25, "grad_norm": 1.1188704018876807, "learning_rate": 8.754642730777276e-06, "loss": 0.721, "step": 1569 }, { "epoch": 0.25, "grad_norm": 1.0925725161919906, "learning_rate": 8.752918797061593e-06, "loss": 0.5907, "step": 1570 }, { "epoch": 0.25, "grad_norm": 1.07778335632773, "learning_rate": 8.751193840947382e-06, "loss": 0.624, "step": 1571 }, { "epoch": 0.25, "grad_norm": 1.002404163704054, "learning_rate": 8.749467862904565e-06, "loss": 0.6001, "step": 1572 }, { "epoch": 0.25, "grad_norm": 1.0167764354228188, "learning_rate": 8.747740863403348e-06, "loss": 0.5923, "step": 1573 }, { "epoch": 0.25, "grad_norm": 1.0699150666345603, "learning_rate": 8.746012842914214e-06, "loss": 0.5538, "step": 1574 }, { "epoch": 0.25, "grad_norm": 0.9707404125730749, "learning_rate": 8.74428380190792e-06, "loss": 0.6715, "step": 1575 }, { "epoch": 0.25, "grad_norm": 0.9992683726708323, "learning_rate": 8.742553740855507e-06, "loss": 0.5782, "step": 1576 }, { "epoch": 0.25, "grad_norm": 0.988104761649749, "learning_rate": 8.74082266022829e-06, "loss": 0.6677, "step": 1577 }, { "epoch": 0.25, "grad_norm": 0.9748303766760963, "learning_rate": 8.739090560497864e-06, "loss": 0.6467, "step": 1578 }, { "epoch": 0.25, "grad_norm": 1.0594982513880946, "learning_rate": 8.737357442136104e-06, "loss": 0.6756, "step": 1579 }, { "epoch": 0.25, "grad_norm": 1.0402860973172223, "learning_rate": 8.735623305615153e-06, "loss": 0.6505, "step": 1580 }, { "epoch": 0.25, "grad_norm": 0.982373754870477, "learning_rate": 8.733888151407441e-06, "loss": 0.6607, "step": 1581 }, { "epoch": 0.25, "grad_norm": 1.0317524778602136, "learning_rate": 8.732151979985671e-06, "loss": 0.6938, "step": 1582 }, { "epoch": 0.26, "grad_norm": 1.0761223281040657, "learning_rate": 8.730414791822825e-06, "loss": 0.6617, "step": 1583 }, { "epoch": 0.26, "grad_norm": 1.005720666490807, "learning_rate": 8.72867658739216e-06, "loss": 0.5116, "step": 1584 }, { "epoch": 0.26, "grad_norm": 1.0867411532114422, "learning_rate": 8.726937367167211e-06, "loss": 0.6918, "step": 1585 }, { "epoch": 0.26, "grad_norm": 0.9964653117831407, "learning_rate": 8.72519713162179e-06, "loss": 0.5608, "step": 1586 }, { "epoch": 0.26, "grad_norm": 1.0870998554991365, "learning_rate": 8.723455881229984e-06, "loss": 0.6413, "step": 1587 }, { "epoch": 0.26, "grad_norm": 1.0685465894499973, "learning_rate": 8.721713616466158e-06, "loss": 0.6507, "step": 1588 }, { "epoch": 0.26, "grad_norm": 1.0556099452071792, "learning_rate": 8.719970337804953e-06, "loss": 0.5845, "step": 1589 }, { "epoch": 0.26, "grad_norm": 1.102439536532582, "learning_rate": 8.718226045721287e-06, "loss": 0.6866, "step": 1590 }, { "epoch": 0.26, "grad_norm": 1.0903285475999822, "learning_rate": 8.716480740690353e-06, "loss": 0.6781, "step": 1591 }, { "epoch": 0.26, "grad_norm": 1.048903353082005, "learning_rate": 8.71473442318762e-06, "loss": 0.6703, "step": 1592 }, { "epoch": 0.26, "grad_norm": 1.056870976515927, "learning_rate": 8.712987093688833e-06, "loss": 0.6836, "step": 1593 }, { "epoch": 0.26, "grad_norm": 0.9626053247305686, "learning_rate": 8.711238752670012e-06, "loss": 0.5208, "step": 1594 }, { "epoch": 0.26, "grad_norm": 1.1344262678857058, "learning_rate": 8.709489400607453e-06, "loss": 0.6873, "step": 1595 }, { "epoch": 0.26, "grad_norm": 1.1133076118515337, "learning_rate": 8.70773903797773e-06, "loss": 0.709, "step": 1596 }, { "epoch": 0.26, "grad_norm": 1.0883570248936871, "learning_rate": 8.705987665257688e-06, "loss": 0.6366, "step": 1597 }, { "epoch": 0.26, "grad_norm": 1.0183267678822412, "learning_rate": 8.704235282924449e-06, "loss": 0.6171, "step": 1598 }, { "epoch": 0.26, "grad_norm": 1.0946328692999128, "learning_rate": 8.702481891455414e-06, "loss": 0.6611, "step": 1599 }, { "epoch": 0.26, "grad_norm": 0.9679801197678418, "learning_rate": 8.70072749132825e-06, "loss": 0.5326, "step": 1600 }, { "epoch": 0.26, "grad_norm": 1.1603361555886775, "learning_rate": 8.698972083020905e-06, "loss": 0.6206, "step": 1601 }, { "epoch": 0.26, "grad_norm": 0.6978951406280792, "learning_rate": 8.697215667011605e-06, "loss": 0.487, "step": 1602 }, { "epoch": 0.26, "grad_norm": 0.9392583365461291, "learning_rate": 8.69545824377884e-06, "loss": 0.5962, "step": 1603 }, { "epoch": 0.26, "grad_norm": 1.1042017911288597, "learning_rate": 8.693699813801387e-06, "loss": 0.6476, "step": 1604 }, { "epoch": 0.26, "grad_norm": 1.0563581847605283, "learning_rate": 8.691940377558284e-06, "loss": 0.6606, "step": 1605 }, { "epoch": 0.26, "grad_norm": 1.025950882237531, "learning_rate": 8.690179935528853e-06, "loss": 0.6165, "step": 1606 }, { "epoch": 0.26, "grad_norm": 0.9559777331165553, "learning_rate": 8.68841848819269e-06, "loss": 0.6004, "step": 1607 }, { "epoch": 0.26, "grad_norm": 0.9766649953650622, "learning_rate": 8.686656036029657e-06, "loss": 0.5941, "step": 1608 }, { "epoch": 0.26, "grad_norm": 0.9891151804843429, "learning_rate": 8.684892579519897e-06, "loss": 0.5936, "step": 1609 }, { "epoch": 0.26, "grad_norm": 1.0928074277795945, "learning_rate": 8.683128119143824e-06, "loss": 0.663, "step": 1610 }, { "epoch": 0.26, "grad_norm": 1.0318830969797916, "learning_rate": 8.681362655382125e-06, "loss": 0.6325, "step": 1611 }, { "epoch": 0.26, "grad_norm": 1.1723120465259382, "learning_rate": 8.67959618871576e-06, "loss": 0.6772, "step": 1612 }, { "epoch": 0.26, "grad_norm": 0.9998221958513286, "learning_rate": 8.677828719625967e-06, "loss": 0.6309, "step": 1613 }, { "epoch": 0.26, "grad_norm": 1.0812882573047393, "learning_rate": 8.676060248594248e-06, "loss": 0.6204, "step": 1614 }, { "epoch": 0.26, "grad_norm": 1.1269224636145994, "learning_rate": 8.674290776102388e-06, "loss": 0.6721, "step": 1615 }, { "epoch": 0.26, "grad_norm": 1.0365051895780908, "learning_rate": 8.67252030263244e-06, "loss": 0.5391, "step": 1616 }, { "epoch": 0.26, "grad_norm": 1.0056816044417796, "learning_rate": 8.670748828666725e-06, "loss": 0.6696, "step": 1617 }, { "epoch": 0.26, "grad_norm": 0.9627779876234087, "learning_rate": 8.668976354687844e-06, "loss": 0.5621, "step": 1618 }, { "epoch": 0.26, "grad_norm": 1.1217891486778544, "learning_rate": 8.66720288117867e-06, "loss": 0.7468, "step": 1619 }, { "epoch": 0.26, "grad_norm": 1.0085984162496406, "learning_rate": 8.665428408622343e-06, "loss": 0.6715, "step": 1620 }, { "epoch": 0.26, "grad_norm": 0.7104010120169026, "learning_rate": 8.66365293750228e-06, "loss": 0.5029, "step": 1621 }, { "epoch": 0.26, "grad_norm": 0.8671782712952599, "learning_rate": 8.661876468302167e-06, "loss": 0.5123, "step": 1622 }, { "epoch": 0.26, "grad_norm": 1.0199256782742885, "learning_rate": 8.660099001505965e-06, "loss": 0.6491, "step": 1623 }, { "epoch": 0.26, "grad_norm": 1.002738068993366, "learning_rate": 8.658320537597901e-06, "loss": 0.6108, "step": 1624 }, { "epoch": 0.26, "grad_norm": 0.9719023566018004, "learning_rate": 8.656541077062483e-06, "loss": 0.515, "step": 1625 }, { "epoch": 0.26, "grad_norm": 1.0422212483749724, "learning_rate": 8.654760620384482e-06, "loss": 0.6412, "step": 1626 }, { "epoch": 0.26, "grad_norm": 1.0728643077025237, "learning_rate": 8.652979168048944e-06, "loss": 0.5881, "step": 1627 }, { "epoch": 0.26, "grad_norm": 1.1472426260470576, "learning_rate": 8.651196720541186e-06, "loss": 0.7363, "step": 1628 }, { "epoch": 0.26, "grad_norm": 1.0274908537313772, "learning_rate": 8.649413278346795e-06, "loss": 0.6964, "step": 1629 }, { "epoch": 0.26, "grad_norm": 1.0645912151847958, "learning_rate": 8.64762884195163e-06, "loss": 0.644, "step": 1630 }, { "epoch": 0.26, "grad_norm": 1.047408954340107, "learning_rate": 8.64584341184182e-06, "loss": 0.6368, "step": 1631 }, { "epoch": 0.26, "grad_norm": 1.1059072759282695, "learning_rate": 8.644056988503769e-06, "loss": 0.6356, "step": 1632 }, { "epoch": 0.26, "grad_norm": 1.032691888462967, "learning_rate": 8.642269572424143e-06, "loss": 0.6946, "step": 1633 }, { "epoch": 0.26, "grad_norm": 1.120360130844582, "learning_rate": 8.640481164089887e-06, "loss": 0.6493, "step": 1634 }, { "epoch": 0.26, "grad_norm": 1.0373968404061225, "learning_rate": 8.63869176398821e-06, "loss": 0.6239, "step": 1635 }, { "epoch": 0.26, "grad_norm": 1.11191931049576, "learning_rate": 8.636901372606596e-06, "loss": 0.6425, "step": 1636 }, { "epoch": 0.26, "grad_norm": 1.0032186564743637, "learning_rate": 8.635109990432797e-06, "loss": 0.6234, "step": 1637 }, { "epoch": 0.26, "grad_norm": 1.0633552308228533, "learning_rate": 8.633317617954832e-06, "loss": 0.5954, "step": 1638 }, { "epoch": 0.26, "grad_norm": 1.002080694788509, "learning_rate": 8.631524255660997e-06, "loss": 0.5875, "step": 1639 }, { "epoch": 0.26, "grad_norm": 0.9908341621652779, "learning_rate": 8.629729904039853e-06, "loss": 0.567, "step": 1640 }, { "epoch": 0.26, "grad_norm": 0.6993224514719861, "learning_rate": 8.627934563580226e-06, "loss": 0.5199, "step": 1641 }, { "epoch": 0.26, "grad_norm": 1.0258685643234537, "learning_rate": 8.62613823477122e-06, "loss": 0.6492, "step": 1642 }, { "epoch": 0.26, "grad_norm": 1.0418792506748107, "learning_rate": 8.624340918102206e-06, "loss": 0.5835, "step": 1643 }, { "epoch": 0.26, "grad_norm": 1.0721806467380202, "learning_rate": 8.622542614062816e-06, "loss": 0.5924, "step": 1644 }, { "epoch": 0.27, "grad_norm": 1.0507311790555824, "learning_rate": 8.620743323142966e-06, "loss": 0.6493, "step": 1645 }, { "epoch": 0.27, "grad_norm": 0.6724881926513512, "learning_rate": 8.618943045832826e-06, "loss": 0.4854, "step": 1646 }, { "epoch": 0.27, "grad_norm": 1.104824565932021, "learning_rate": 8.617141782622844e-06, "loss": 0.6777, "step": 1647 }, { "epoch": 0.27, "grad_norm": 1.0486965750493942, "learning_rate": 8.615339534003735e-06, "loss": 0.6008, "step": 1648 }, { "epoch": 0.27, "grad_norm": 1.161208417107663, "learning_rate": 8.613536300466476e-06, "loss": 0.6905, "step": 1649 }, { "epoch": 0.27, "grad_norm": 1.0695064463705504, "learning_rate": 8.611732082502324e-06, "loss": 0.6688, "step": 1650 }, { "epoch": 0.27, "grad_norm": 1.0856943486246142, "learning_rate": 8.609926880602794e-06, "loss": 0.5956, "step": 1651 }, { "epoch": 0.27, "grad_norm": 0.9986924330537926, "learning_rate": 8.608120695259674e-06, "loss": 0.6165, "step": 1652 }, { "epoch": 0.27, "grad_norm": 1.0726488388280122, "learning_rate": 8.606313526965017e-06, "loss": 0.6646, "step": 1653 }, { "epoch": 0.27, "grad_norm": 1.1011202599420091, "learning_rate": 8.604505376211148e-06, "loss": 0.7196, "step": 1654 }, { "epoch": 0.27, "grad_norm": 1.0752769150535895, "learning_rate": 8.602696243490653e-06, "loss": 0.5833, "step": 1655 }, { "epoch": 0.27, "grad_norm": 1.0512314245295338, "learning_rate": 8.600886129296396e-06, "loss": 0.5489, "step": 1656 }, { "epoch": 0.27, "grad_norm": 1.0134722990641758, "learning_rate": 8.599075034121496e-06, "loss": 0.5853, "step": 1657 }, { "epoch": 0.27, "grad_norm": 1.0275243492764028, "learning_rate": 8.59726295845935e-06, "loss": 0.6627, "step": 1658 }, { "epoch": 0.27, "grad_norm": 0.9461698106458887, "learning_rate": 8.595449902803612e-06, "loss": 0.5607, "step": 1659 }, { "epoch": 0.27, "grad_norm": 0.9762099649357988, "learning_rate": 8.593635867648214e-06, "loss": 0.5623, "step": 1660 }, { "epoch": 0.27, "grad_norm": 1.140689772672225, "learning_rate": 8.591820853487344e-06, "loss": 0.7361, "step": 1661 }, { "epoch": 0.27, "grad_norm": 0.9985142548871073, "learning_rate": 8.590004860815466e-06, "loss": 0.5413, "step": 1662 }, { "epoch": 0.27, "grad_norm": 0.8956616354421242, "learning_rate": 8.588187890127305e-06, "loss": 0.5584, "step": 1663 }, { "epoch": 0.27, "grad_norm": 1.071073436389438, "learning_rate": 8.586369941917852e-06, "loss": 0.6574, "step": 1664 }, { "epoch": 0.27, "grad_norm": 1.0729321275082626, "learning_rate": 8.58455101668237e-06, "loss": 0.6423, "step": 1665 }, { "epoch": 0.27, "grad_norm": 0.9547901415558567, "learning_rate": 8.58273111491638e-06, "loss": 0.6275, "step": 1666 }, { "epoch": 0.27, "grad_norm": 1.010845465546539, "learning_rate": 8.580910237115678e-06, "loss": 0.6286, "step": 1667 }, { "epoch": 0.27, "grad_norm": 1.1076513713081735, "learning_rate": 8.579088383776318e-06, "loss": 0.7099, "step": 1668 }, { "epoch": 0.27, "grad_norm": 0.9815780758241532, "learning_rate": 8.577265555394626e-06, "loss": 0.5853, "step": 1669 }, { "epoch": 0.27, "grad_norm": 1.000518277494108, "learning_rate": 8.575441752467185e-06, "loss": 0.6254, "step": 1670 }, { "epoch": 0.27, "grad_norm": 1.000172503910287, "learning_rate": 8.573616975490855e-06, "loss": 0.6888, "step": 1671 }, { "epoch": 0.27, "grad_norm": 0.9803484356550121, "learning_rate": 8.571791224962754e-06, "loss": 0.6145, "step": 1672 }, { "epoch": 0.27, "grad_norm": 1.0170471121727223, "learning_rate": 8.569964501380266e-06, "loss": 0.649, "step": 1673 }, { "epoch": 0.27, "grad_norm": 1.0161820053555706, "learning_rate": 8.56813680524104e-06, "loss": 0.5567, "step": 1674 }, { "epoch": 0.27, "grad_norm": 1.0937265479865304, "learning_rate": 8.566308137042995e-06, "loss": 0.6646, "step": 1675 }, { "epoch": 0.27, "grad_norm": 1.0252371511959673, "learning_rate": 8.564478497284306e-06, "loss": 0.665, "step": 1676 }, { "epoch": 0.27, "grad_norm": 1.0133537498431278, "learning_rate": 8.562647886463417e-06, "loss": 0.6445, "step": 1677 }, { "epoch": 0.27, "grad_norm": 0.9002123617800141, "learning_rate": 8.560816305079041e-06, "loss": 0.5146, "step": 1678 }, { "epoch": 0.27, "grad_norm": 1.0368346283617285, "learning_rate": 8.558983753630149e-06, "loss": 0.5916, "step": 1679 }, { "epoch": 0.27, "grad_norm": 0.642759485710064, "learning_rate": 8.557150232615977e-06, "loss": 0.4956, "step": 1680 }, { "epoch": 0.27, "grad_norm": 1.1801674712653232, "learning_rate": 8.55531574253603e-06, "loss": 0.7094, "step": 1681 }, { "epoch": 0.27, "grad_norm": 1.045259928674509, "learning_rate": 8.55348028389007e-06, "loss": 0.651, "step": 1682 }, { "epoch": 0.27, "grad_norm": 1.0716332254571803, "learning_rate": 8.55164385717813e-06, "loss": 0.6636, "step": 1683 }, { "epoch": 0.27, "grad_norm": 1.0143646475325296, "learning_rate": 8.549806462900503e-06, "loss": 0.6934, "step": 1684 }, { "epoch": 0.27, "grad_norm": 0.9946778407753984, "learning_rate": 8.547968101557742e-06, "loss": 0.5928, "step": 1685 }, { "epoch": 0.27, "grad_norm": 1.020327734981545, "learning_rate": 8.54612877365067e-06, "loss": 0.6334, "step": 1686 }, { "epoch": 0.27, "grad_norm": 0.9904809554457953, "learning_rate": 8.544288479680371e-06, "loss": 0.5466, "step": 1687 }, { "epoch": 0.27, "grad_norm": 0.9931222332379382, "learning_rate": 8.542447220148191e-06, "loss": 0.6376, "step": 1688 }, { "epoch": 0.27, "grad_norm": 1.1352056130280164, "learning_rate": 8.540604995555741e-06, "loss": 0.6504, "step": 1689 }, { "epoch": 0.27, "grad_norm": 0.9748514467536232, "learning_rate": 8.538761806404892e-06, "loss": 0.5934, "step": 1690 }, { "epoch": 0.27, "grad_norm": 0.7040353558143975, "learning_rate": 8.53691765319778e-06, "loss": 0.4651, "step": 1691 }, { "epoch": 0.27, "grad_norm": 0.9641000802361597, "learning_rate": 8.535072536436805e-06, "loss": 0.6185, "step": 1692 }, { "epoch": 0.27, "grad_norm": 1.0675220196805224, "learning_rate": 8.533226456624624e-06, "loss": 0.5808, "step": 1693 }, { "epoch": 0.27, "grad_norm": 1.045049007010878, "learning_rate": 8.531379414264165e-06, "loss": 0.5948, "step": 1694 }, { "epoch": 0.27, "grad_norm": 1.1690712166726422, "learning_rate": 8.52953140985861e-06, "loss": 0.7885, "step": 1695 }, { "epoch": 0.27, "grad_norm": 0.9691304148377573, "learning_rate": 8.527682443911405e-06, "loss": 0.5909, "step": 1696 }, { "epoch": 0.27, "grad_norm": 1.051325752753385, "learning_rate": 8.525832516926262e-06, "loss": 0.6795, "step": 1697 }, { "epoch": 0.27, "grad_norm": 0.9874207573382526, "learning_rate": 8.52398162940715e-06, "loss": 0.5093, "step": 1698 }, { "epoch": 0.27, "grad_norm": 1.0880948909844912, "learning_rate": 8.522129781858306e-06, "loss": 0.6809, "step": 1699 }, { "epoch": 0.27, "grad_norm": 0.9408074018609945, "learning_rate": 8.52027697478422e-06, "loss": 0.5174, "step": 1700 }, { "epoch": 0.27, "grad_norm": 1.0706556977446764, "learning_rate": 8.518423208689647e-06, "loss": 0.6269, "step": 1701 }, { "epoch": 0.27, "grad_norm": 1.0385456106117927, "learning_rate": 8.516568484079609e-06, "loss": 0.6644, "step": 1702 }, { "epoch": 0.27, "grad_norm": 0.953706524864836, "learning_rate": 8.514712801459379e-06, "loss": 0.6572, "step": 1703 }, { "epoch": 0.27, "grad_norm": 1.1725342513906345, "learning_rate": 8.5128561613345e-06, "loss": 0.6581, "step": 1704 }, { "epoch": 0.27, "grad_norm": 1.0263882645496227, "learning_rate": 8.510998564210769e-06, "loss": 0.5504, "step": 1705 }, { "epoch": 0.27, "grad_norm": 0.9864510468345877, "learning_rate": 8.509140010594248e-06, "loss": 0.6148, "step": 1706 }, { "epoch": 0.28, "grad_norm": 0.9987834851991878, "learning_rate": 8.50728050099126e-06, "loss": 0.5584, "step": 1707 }, { "epoch": 0.28, "grad_norm": 0.9878352585628231, "learning_rate": 8.505420035908383e-06, "loss": 0.6151, "step": 1708 }, { "epoch": 0.28, "grad_norm": 1.0196462045112449, "learning_rate": 8.503558615852461e-06, "loss": 0.6803, "step": 1709 }, { "epoch": 0.28, "grad_norm": 1.0342314761616909, "learning_rate": 8.501696241330594e-06, "loss": 0.6665, "step": 1710 }, { "epoch": 0.28, "grad_norm": 0.9230466365766429, "learning_rate": 8.49983291285015e-06, "loss": 0.6374, "step": 1711 }, { "epoch": 0.28, "grad_norm": 0.9896275839919103, "learning_rate": 8.497968630918743e-06, "loss": 0.6063, "step": 1712 }, { "epoch": 0.28, "grad_norm": 1.0594584151383968, "learning_rate": 8.496103396044262e-06, "loss": 0.6331, "step": 1713 }, { "epoch": 0.28, "grad_norm": 1.0768817228954901, "learning_rate": 8.494237208734843e-06, "loss": 0.6032, "step": 1714 }, { "epoch": 0.28, "grad_norm": 0.9779559942411508, "learning_rate": 8.492370069498892e-06, "loss": 0.6105, "step": 1715 }, { "epoch": 0.28, "grad_norm": 0.9721636982428149, "learning_rate": 8.490501978845064e-06, "loss": 0.5546, "step": 1716 }, { "epoch": 0.28, "grad_norm": 0.9917190224004617, "learning_rate": 8.488632937282281e-06, "loss": 0.6276, "step": 1717 }, { "epoch": 0.28, "grad_norm": 1.0606321311516793, "learning_rate": 8.486762945319722e-06, "loss": 0.6596, "step": 1718 }, { "epoch": 0.28, "grad_norm": 1.0568207882543708, "learning_rate": 8.484892003466823e-06, "loss": 0.6061, "step": 1719 }, { "epoch": 0.28, "grad_norm": 0.9422961481839436, "learning_rate": 8.48302011223328e-06, "loss": 0.5013, "step": 1720 }, { "epoch": 0.28, "grad_norm": 1.1225999886105238, "learning_rate": 8.48114727212905e-06, "loss": 0.6369, "step": 1721 }, { "epoch": 0.28, "grad_norm": 1.0282463592664843, "learning_rate": 8.479273483664344e-06, "loss": 0.6249, "step": 1722 }, { "epoch": 0.28, "grad_norm": 1.056128730276417, "learning_rate": 8.477398747349632e-06, "loss": 0.628, "step": 1723 }, { "epoch": 0.28, "grad_norm": 1.057671165168062, "learning_rate": 8.47552306369565e-06, "loss": 0.6328, "step": 1724 }, { "epoch": 0.28, "grad_norm": 1.022173977872148, "learning_rate": 8.473646433213378e-06, "loss": 0.5946, "step": 1725 }, { "epoch": 0.28, "grad_norm": 0.939260753021954, "learning_rate": 8.471768856414069e-06, "loss": 0.5845, "step": 1726 }, { "epoch": 0.28, "grad_norm": 0.9894676407032139, "learning_rate": 8.469890333809223e-06, "loss": 0.607, "step": 1727 }, { "epoch": 0.28, "grad_norm": 0.961056712135085, "learning_rate": 8.468010865910601e-06, "loss": 0.6008, "step": 1728 }, { "epoch": 0.28, "grad_norm": 1.1253906678005074, "learning_rate": 8.466130453230224e-06, "loss": 0.6585, "step": 1729 }, { "epoch": 0.28, "grad_norm": 0.903592969972972, "learning_rate": 8.464249096280368e-06, "loss": 0.5695, "step": 1730 }, { "epoch": 0.28, "grad_norm": 0.9605140262275187, "learning_rate": 8.462366795573564e-06, "loss": 0.5731, "step": 1731 }, { "epoch": 0.28, "grad_norm": 1.068246171383511, "learning_rate": 8.460483551622606e-06, "loss": 0.66, "step": 1732 }, { "epoch": 0.28, "grad_norm": 1.03825456378861, "learning_rate": 8.458599364940537e-06, "loss": 0.6649, "step": 1733 }, { "epoch": 0.28, "grad_norm": 1.0446250644890465, "learning_rate": 8.456714236040664e-06, "loss": 0.6022, "step": 1734 }, { "epoch": 0.28, "grad_norm": 0.9998660039857896, "learning_rate": 8.45482816543655e-06, "loss": 0.7309, "step": 1735 }, { "epoch": 0.28, "grad_norm": 0.9995765188451078, "learning_rate": 8.45294115364201e-06, "loss": 0.6012, "step": 1736 }, { "epoch": 0.28, "grad_norm": 0.9538894410633489, "learning_rate": 8.45105320117112e-06, "loss": 0.6484, "step": 1737 }, { "epoch": 0.28, "grad_norm": 1.0895376379022634, "learning_rate": 8.449164308538209e-06, "loss": 0.675, "step": 1738 }, { "epoch": 0.28, "grad_norm": 1.0196965965900842, "learning_rate": 8.447274476257863e-06, "loss": 0.6398, "step": 1739 }, { "epoch": 0.28, "grad_norm": 0.6720800490790071, "learning_rate": 8.445383704844925e-06, "loss": 0.4733, "step": 1740 }, { "epoch": 0.28, "grad_norm": 1.0028822644163902, "learning_rate": 8.443491994814493e-06, "loss": 0.7383, "step": 1741 }, { "epoch": 0.28, "grad_norm": 1.1008273233074435, "learning_rate": 8.441599346681921e-06, "loss": 0.6774, "step": 1742 }, { "epoch": 0.28, "grad_norm": 0.9577838293406524, "learning_rate": 8.43970576096282e-06, "loss": 0.5457, "step": 1743 }, { "epoch": 0.28, "grad_norm": 1.0392775422049816, "learning_rate": 8.437811238173053e-06, "loss": 0.665, "step": 1744 }, { "epoch": 0.28, "grad_norm": 0.9314100910470888, "learning_rate": 8.43591577882874e-06, "loss": 0.5046, "step": 1745 }, { "epoch": 0.28, "grad_norm": 1.0213911085769738, "learning_rate": 8.43401938344626e-06, "loss": 0.5563, "step": 1746 }, { "epoch": 0.28, "grad_norm": 1.0215389870134195, "learning_rate": 8.432122052542238e-06, "loss": 0.5047, "step": 1747 }, { "epoch": 0.28, "grad_norm": 1.0332692333333713, "learning_rate": 8.430223786633563e-06, "loss": 0.5708, "step": 1748 }, { "epoch": 0.28, "grad_norm": 1.0382015802913187, "learning_rate": 8.428324586237374e-06, "loss": 0.6369, "step": 1749 }, { "epoch": 0.28, "grad_norm": 1.0182815950317061, "learning_rate": 8.426424451871063e-06, "loss": 0.6505, "step": 1750 }, { "epoch": 0.28, "grad_norm": 1.0545239815752525, "learning_rate": 8.424523384052284e-06, "loss": 0.6761, "step": 1751 }, { "epoch": 0.28, "grad_norm": 0.9844930747306614, "learning_rate": 8.422621383298936e-06, "loss": 0.6457, "step": 1752 }, { "epoch": 0.28, "grad_norm": 1.2458380475432014, "learning_rate": 8.42071845012918e-06, "loss": 0.6377, "step": 1753 }, { "epoch": 0.28, "grad_norm": 1.0278551000822955, "learning_rate": 8.418814585061423e-06, "loss": 0.5611, "step": 1754 }, { "epoch": 0.28, "grad_norm": 0.969035816844534, "learning_rate": 8.416909788614335e-06, "loss": 0.6018, "step": 1755 }, { "epoch": 0.28, "grad_norm": 1.0294185081507161, "learning_rate": 8.415004061306833e-06, "loss": 0.6845, "step": 1756 }, { "epoch": 0.28, "grad_norm": 1.049654602466264, "learning_rate": 8.413097403658089e-06, "loss": 0.6566, "step": 1757 }, { "epoch": 0.28, "grad_norm": 1.0863304861635876, "learning_rate": 8.411189816187528e-06, "loss": 0.6343, "step": 1758 }, { "epoch": 0.28, "grad_norm": 1.1089916865262657, "learning_rate": 8.409281299414833e-06, "loss": 0.5735, "step": 1759 }, { "epoch": 0.28, "grad_norm": 1.0471448937479653, "learning_rate": 8.407371853859935e-06, "loss": 0.6563, "step": 1760 }, { "epoch": 0.28, "grad_norm": 1.0176193029037608, "learning_rate": 8.405461480043019e-06, "loss": 0.5793, "step": 1761 }, { "epoch": 0.28, "grad_norm": 1.0747650784827685, "learning_rate": 8.403550178484521e-06, "loss": 0.6258, "step": 1762 }, { "epoch": 0.28, "grad_norm": 0.737891340712526, "learning_rate": 8.401637949705138e-06, "loss": 0.5266, "step": 1763 }, { "epoch": 0.28, "grad_norm": 0.9987039254659821, "learning_rate": 8.399724794225809e-06, "loss": 0.5305, "step": 1764 }, { "epoch": 0.28, "grad_norm": 1.1605615911053861, "learning_rate": 8.397810712567732e-06, "loss": 0.6274, "step": 1765 }, { "epoch": 0.28, "grad_norm": 1.0138026620404887, "learning_rate": 8.39589570525236e-06, "loss": 0.6854, "step": 1766 }, { "epoch": 0.28, "grad_norm": 1.1234830093166164, "learning_rate": 8.393979772801386e-06, "loss": 0.6971, "step": 1767 }, { "epoch": 0.28, "grad_norm": 0.9897540411751579, "learning_rate": 8.392062915736765e-06, "loss": 0.5885, "step": 1768 }, { "epoch": 0.29, "grad_norm": 1.0395495039921814, "learning_rate": 8.390145134580705e-06, "loss": 0.5927, "step": 1769 }, { "epoch": 0.29, "grad_norm": 1.0345495825272193, "learning_rate": 8.38822642985566e-06, "loss": 0.6257, "step": 1770 }, { "epoch": 0.29, "grad_norm": 0.9850108264967862, "learning_rate": 8.386306802084339e-06, "loss": 0.6265, "step": 1771 }, { "epoch": 0.29, "grad_norm": 0.9567888483302034, "learning_rate": 8.3843862517897e-06, "loss": 0.621, "step": 1772 }, { "epoch": 0.29, "grad_norm": 1.0648065753018972, "learning_rate": 8.382464779494954e-06, "loss": 0.534, "step": 1773 }, { "epoch": 0.29, "grad_norm": 0.9975528294466697, "learning_rate": 8.380542385723566e-06, "loss": 0.591, "step": 1774 }, { "epoch": 0.29, "grad_norm": 1.0405999846229763, "learning_rate": 8.378619070999245e-06, "loss": 0.6167, "step": 1775 }, { "epoch": 0.29, "grad_norm": 0.7944337860919333, "learning_rate": 8.37669483584596e-06, "loss": 0.4972, "step": 1776 }, { "epoch": 0.29, "grad_norm": 1.1668380656422028, "learning_rate": 8.37476968078792e-06, "loss": 0.751, "step": 1777 }, { "epoch": 0.29, "grad_norm": 1.0541695218557057, "learning_rate": 8.372843606349594e-06, "loss": 0.5835, "step": 1778 }, { "epoch": 0.29, "grad_norm": 1.1004276898319358, "learning_rate": 8.370916613055695e-06, "loss": 0.6074, "step": 1779 }, { "epoch": 0.29, "grad_norm": 1.0838407787688848, "learning_rate": 8.368988701431192e-06, "loss": 0.6447, "step": 1780 }, { "epoch": 0.29, "grad_norm": 1.0753627423737728, "learning_rate": 8.3670598720013e-06, "loss": 0.6613, "step": 1781 }, { "epoch": 0.29, "grad_norm": 1.080036292743852, "learning_rate": 8.365130125291485e-06, "loss": 0.6769, "step": 1782 }, { "epoch": 0.29, "grad_norm": 1.0854087123461564, "learning_rate": 8.363199461827464e-06, "loss": 0.7298, "step": 1783 }, { "epoch": 0.29, "grad_norm": 0.9766129535654418, "learning_rate": 8.361267882135203e-06, "loss": 0.5707, "step": 1784 }, { "epoch": 0.29, "grad_norm": 1.100336670122762, "learning_rate": 8.359335386740916e-06, "loss": 0.5962, "step": 1785 }, { "epoch": 0.29, "grad_norm": 0.9589186998604762, "learning_rate": 8.357401976171069e-06, "loss": 0.6151, "step": 1786 }, { "epoch": 0.29, "grad_norm": 0.9308742840867967, "learning_rate": 8.355467650952375e-06, "loss": 0.5709, "step": 1787 }, { "epoch": 0.29, "grad_norm": 1.0525982221057608, "learning_rate": 8.353532411611801e-06, "loss": 0.7567, "step": 1788 }, { "epoch": 0.29, "grad_norm": 0.9844658255154851, "learning_rate": 8.351596258676558e-06, "loss": 0.5985, "step": 1789 }, { "epoch": 0.29, "grad_norm": 1.0626351270358532, "learning_rate": 8.349659192674104e-06, "loss": 0.6399, "step": 1790 }, { "epoch": 0.29, "grad_norm": 0.9713130770029553, "learning_rate": 8.347721214132154e-06, "loss": 0.6431, "step": 1791 }, { "epoch": 0.29, "grad_norm": 1.0879141458714308, "learning_rate": 8.345782323578664e-06, "loss": 0.5821, "step": 1792 }, { "epoch": 0.29, "grad_norm": 1.0363899631405824, "learning_rate": 8.343842521541844e-06, "loss": 0.6696, "step": 1793 }, { "epoch": 0.29, "grad_norm": 1.0570492613741975, "learning_rate": 8.341901808550147e-06, "loss": 0.7025, "step": 1794 }, { "epoch": 0.29, "grad_norm": 1.096954505510325, "learning_rate": 8.339960185132275e-06, "loss": 0.6787, "step": 1795 }, { "epoch": 0.29, "grad_norm": 1.1108423117853232, "learning_rate": 8.338017651817183e-06, "loss": 0.6773, "step": 1796 }, { "epoch": 0.29, "grad_norm": 0.8848227765446441, "learning_rate": 8.336074209134071e-06, "loss": 0.4836, "step": 1797 }, { "epoch": 0.29, "grad_norm": 1.0482380217025193, "learning_rate": 8.334129857612383e-06, "loss": 0.6715, "step": 1798 }, { "epoch": 0.29, "grad_norm": 0.8782054633558585, "learning_rate": 8.332184597781818e-06, "loss": 0.5491, "step": 1799 }, { "epoch": 0.29, "grad_norm": 1.0179283223169913, "learning_rate": 8.330238430172315e-06, "loss": 0.5877, "step": 1800 }, { "epoch": 0.29, "grad_norm": 1.061746729547745, "learning_rate": 8.328291355314067e-06, "loss": 0.6625, "step": 1801 }, { "epoch": 0.29, "grad_norm": 1.0650736576041888, "learning_rate": 8.326343373737506e-06, "loss": 0.581, "step": 1802 }, { "epoch": 0.29, "grad_norm": 1.0370813272018884, "learning_rate": 8.32439448597332e-06, "loss": 0.5894, "step": 1803 }, { "epoch": 0.29, "grad_norm": 1.0550795502537282, "learning_rate": 8.322444692552437e-06, "loss": 0.6963, "step": 1804 }, { "epoch": 0.29, "grad_norm": 1.0230569925726605, "learning_rate": 8.320493994006039e-06, "loss": 0.6726, "step": 1805 }, { "epoch": 0.29, "grad_norm": 1.0440828654983443, "learning_rate": 8.318542390865546e-06, "loss": 0.624, "step": 1806 }, { "epoch": 0.29, "grad_norm": 1.0485655865343806, "learning_rate": 8.316589883662629e-06, "loss": 0.6669, "step": 1807 }, { "epoch": 0.29, "grad_norm": 0.9826038180876443, "learning_rate": 8.314636472929206e-06, "loss": 0.5886, "step": 1808 }, { "epoch": 0.29, "grad_norm": 1.124641565398034, "learning_rate": 8.31268215919744e-06, "loss": 0.6869, "step": 1809 }, { "epoch": 0.29, "grad_norm": 1.0359551621696965, "learning_rate": 8.310726942999736e-06, "loss": 0.67, "step": 1810 }, { "epoch": 0.29, "grad_norm": 0.724045565802701, "learning_rate": 8.308770824868757e-06, "loss": 0.5101, "step": 1811 }, { "epoch": 0.29, "grad_norm": 1.0422446369189036, "learning_rate": 8.306813805337395e-06, "loss": 0.6936, "step": 1812 }, { "epoch": 0.29, "grad_norm": 0.9844521046984838, "learning_rate": 8.3048558849388e-06, "loss": 0.6305, "step": 1813 }, { "epoch": 0.29, "grad_norm": 0.9033114546958253, "learning_rate": 8.302897064206363e-06, "loss": 0.6331, "step": 1814 }, { "epoch": 0.29, "grad_norm": 0.9933380467035439, "learning_rate": 8.300937343673722e-06, "loss": 0.6537, "step": 1815 }, { "epoch": 0.29, "grad_norm": 0.979713352894971, "learning_rate": 8.298976723874757e-06, "loss": 0.5506, "step": 1816 }, { "epoch": 0.29, "grad_norm": 1.1059823706623375, "learning_rate": 8.297015205343595e-06, "loss": 0.5433, "step": 1817 }, { "epoch": 0.29, "grad_norm": 1.0057659927592761, "learning_rate": 8.295052788614608e-06, "loss": 0.6048, "step": 1818 }, { "epoch": 0.29, "grad_norm": 1.0369388908581687, "learning_rate": 8.293089474222414e-06, "loss": 0.5785, "step": 1819 }, { "epoch": 0.29, "grad_norm": 1.1722359854213027, "learning_rate": 8.291125262701874e-06, "loss": 0.7334, "step": 1820 }, { "epoch": 0.29, "grad_norm": 1.062333157494112, "learning_rate": 8.289160154588088e-06, "loss": 0.7139, "step": 1821 }, { "epoch": 0.29, "grad_norm": 1.0873619579903715, "learning_rate": 8.287194150416413e-06, "loss": 0.6306, "step": 1822 }, { "epoch": 0.29, "grad_norm": 1.0539449763129014, "learning_rate": 8.285227250722439e-06, "loss": 0.5959, "step": 1823 }, { "epoch": 0.29, "grad_norm": 1.0232104075733648, "learning_rate": 8.283259456042e-06, "loss": 0.6106, "step": 1824 }, { "epoch": 0.29, "grad_norm": 1.0194748021325402, "learning_rate": 8.281290766911187e-06, "loss": 0.5285, "step": 1825 }, { "epoch": 0.29, "grad_norm": 0.984156354882325, "learning_rate": 8.279321183866317e-06, "loss": 0.6307, "step": 1826 }, { "epoch": 0.29, "grad_norm": 1.0774045140665722, "learning_rate": 8.277350707443964e-06, "loss": 0.6489, "step": 1827 }, { "epoch": 0.29, "grad_norm": 1.0531625800835027, "learning_rate": 8.275379338180936e-06, "loss": 0.5808, "step": 1828 }, { "epoch": 0.29, "grad_norm": 1.0644924304710526, "learning_rate": 8.273407076614288e-06, "loss": 0.6346, "step": 1829 }, { "epoch": 0.29, "grad_norm": 1.009802786104154, "learning_rate": 8.271433923281322e-06, "loss": 0.6556, "step": 1830 }, { "epoch": 0.3, "grad_norm": 1.028178110883418, "learning_rate": 8.26945987871958e-06, "loss": 0.6357, "step": 1831 }, { "epoch": 0.3, "grad_norm": 1.0426849063270975, "learning_rate": 8.26748494346684e-06, "loss": 0.5869, "step": 1832 }, { "epoch": 0.3, "grad_norm": 1.1012827784984658, "learning_rate": 8.265509118061135e-06, "loss": 0.6044, "step": 1833 }, { "epoch": 0.3, "grad_norm": 1.0215863309491484, "learning_rate": 8.26353240304073e-06, "loss": 0.6145, "step": 1834 }, { "epoch": 0.3, "grad_norm": 1.0822053368983189, "learning_rate": 8.26155479894414e-06, "loss": 0.6557, "step": 1835 }, { "epoch": 0.3, "grad_norm": 1.1772159547213141, "learning_rate": 8.259576306310118e-06, "loss": 0.6654, "step": 1836 }, { "epoch": 0.3, "grad_norm": 1.0556004590722161, "learning_rate": 8.257596925677657e-06, "loss": 0.5879, "step": 1837 }, { "epoch": 0.3, "grad_norm": 0.9753247931823891, "learning_rate": 8.255616657586e-06, "loss": 0.5754, "step": 1838 }, { "epoch": 0.3, "grad_norm": 1.0180753187469045, "learning_rate": 8.253635502574623e-06, "loss": 0.6365, "step": 1839 }, { "epoch": 0.3, "grad_norm": 0.985576710942524, "learning_rate": 8.251653461183249e-06, "loss": 0.5927, "step": 1840 }, { "epoch": 0.3, "grad_norm": 1.0959951788768731, "learning_rate": 8.249670533951838e-06, "loss": 0.6575, "step": 1841 }, { "epoch": 0.3, "grad_norm": 1.008307485146341, "learning_rate": 8.247686721420596e-06, "loss": 0.6734, "step": 1842 }, { "epoch": 0.3, "grad_norm": 1.0330964672161271, "learning_rate": 8.24570202412997e-06, "loss": 0.6028, "step": 1843 }, { "epoch": 0.3, "grad_norm": 0.7273470216488935, "learning_rate": 8.243716442620644e-06, "loss": 0.5474, "step": 1844 }, { "epoch": 0.3, "grad_norm": 1.0621734748208893, "learning_rate": 8.241729977433543e-06, "loss": 0.6228, "step": 1845 }, { "epoch": 0.3, "grad_norm": 1.0855446138408735, "learning_rate": 8.239742629109839e-06, "loss": 0.6811, "step": 1846 }, { "epoch": 0.3, "grad_norm": 1.0896725078555813, "learning_rate": 8.237754398190937e-06, "loss": 0.6174, "step": 1847 }, { "epoch": 0.3, "grad_norm": 1.1044736210340818, "learning_rate": 8.235765285218491e-06, "loss": 0.6563, "step": 1848 }, { "epoch": 0.3, "grad_norm": 1.0702517521318489, "learning_rate": 8.233775290734385e-06, "loss": 0.6275, "step": 1849 }, { "epoch": 0.3, "grad_norm": 0.6144811204624225, "learning_rate": 8.23178441528075e-06, "loss": 0.4909, "step": 1850 }, { "epoch": 0.3, "grad_norm": 0.9586766604210359, "learning_rate": 8.229792659399957e-06, "loss": 0.5827, "step": 1851 }, { "epoch": 0.3, "grad_norm": 1.0332765274425348, "learning_rate": 8.227800023634614e-06, "loss": 0.6434, "step": 1852 }, { "epoch": 0.3, "grad_norm": 0.6655779450340815, "learning_rate": 8.225806508527568e-06, "loss": 0.5153, "step": 1853 }, { "epoch": 0.3, "grad_norm": 1.1578013782441694, "learning_rate": 8.22381211462191e-06, "loss": 0.7505, "step": 1854 }, { "epoch": 0.3, "grad_norm": 0.7126967873007504, "learning_rate": 8.221816842460967e-06, "loss": 0.4443, "step": 1855 }, { "epoch": 0.3, "grad_norm": 1.1509064268853675, "learning_rate": 8.219820692588306e-06, "loss": 0.6913, "step": 1856 }, { "epoch": 0.3, "grad_norm": 1.0581320978820739, "learning_rate": 8.217823665547733e-06, "loss": 0.6221, "step": 1857 }, { "epoch": 0.3, "grad_norm": 1.048072805009424, "learning_rate": 8.215825761883295e-06, "loss": 0.6806, "step": 1858 }, { "epoch": 0.3, "grad_norm": 1.0464075588370685, "learning_rate": 8.213826982139273e-06, "loss": 0.6497, "step": 1859 }, { "epoch": 0.3, "grad_norm": 1.081698835502318, "learning_rate": 8.211827326860193e-06, "loss": 0.6691, "step": 1860 }, { "epoch": 0.3, "grad_norm": 0.970227583613651, "learning_rate": 8.209826796590814e-06, "loss": 0.5103, "step": 1861 }, { "epoch": 0.3, "grad_norm": 1.1050004756068814, "learning_rate": 8.207825391876137e-06, "loss": 0.6524, "step": 1862 }, { "epoch": 0.3, "grad_norm": 1.0669457282545567, "learning_rate": 8.205823113261397e-06, "loss": 0.6666, "step": 1863 }, { "epoch": 0.3, "grad_norm": 1.0727784349856202, "learning_rate": 8.203819961292074e-06, "loss": 0.6157, "step": 1864 }, { "epoch": 0.3, "grad_norm": 1.1102033134509754, "learning_rate": 8.20181593651388e-06, "loss": 0.6717, "step": 1865 }, { "epoch": 0.3, "grad_norm": 1.1644967565480249, "learning_rate": 8.199811039472764e-06, "loss": 0.6919, "step": 1866 }, { "epoch": 0.3, "grad_norm": 0.9733643986624221, "learning_rate": 8.197805270714918e-06, "loss": 0.595, "step": 1867 }, { "epoch": 0.3, "grad_norm": 0.9757559127369736, "learning_rate": 8.19579863078677e-06, "loss": 0.5169, "step": 1868 }, { "epoch": 0.3, "grad_norm": 1.0823891491321702, "learning_rate": 8.193791120234982e-06, "loss": 0.6022, "step": 1869 }, { "epoch": 0.3, "grad_norm": 1.0136407057982078, "learning_rate": 8.191782739606455e-06, "loss": 0.6133, "step": 1870 }, { "epoch": 0.3, "grad_norm": 0.8469244577806043, "learning_rate": 8.189773489448328e-06, "loss": 0.5035, "step": 1871 }, { "epoch": 0.3, "grad_norm": 0.996652341703191, "learning_rate": 8.187763370307975e-06, "loss": 0.5763, "step": 1872 }, { "epoch": 0.3, "grad_norm": 0.7852253493185378, "learning_rate": 8.185752382733007e-06, "loss": 0.4269, "step": 1873 }, { "epoch": 0.3, "grad_norm": 1.0916002356645216, "learning_rate": 8.183740527271278e-06, "loss": 0.669, "step": 1874 }, { "epoch": 0.3, "grad_norm": 1.136826792078389, "learning_rate": 8.181727804470867e-06, "loss": 0.6673, "step": 1875 }, { "epoch": 0.3, "grad_norm": 0.9991595438422527, "learning_rate": 8.179714214880096e-06, "loss": 0.6238, "step": 1876 }, { "epoch": 0.3, "grad_norm": 1.0484510568340117, "learning_rate": 8.177699759047525e-06, "loss": 0.6568, "step": 1877 }, { "epoch": 0.3, "grad_norm": 0.9547526290395373, "learning_rate": 8.175684437521946e-06, "loss": 0.6039, "step": 1878 }, { "epoch": 0.3, "grad_norm": 1.0683359057504307, "learning_rate": 8.173668250852389e-06, "loss": 0.6622, "step": 1879 }, { "epoch": 0.3, "grad_norm": 1.0374471957933837, "learning_rate": 8.171651199588118e-06, "loss": 0.6153, "step": 1880 }, { "epoch": 0.3, "grad_norm": 0.7482258719262316, "learning_rate": 8.16963328427863e-06, "loss": 0.4951, "step": 1881 }, { "epoch": 0.3, "grad_norm": 1.0673835289948106, "learning_rate": 8.167614505473667e-06, "loss": 0.648, "step": 1882 }, { "epoch": 0.3, "grad_norm": 1.031267899413725, "learning_rate": 8.165594863723197e-06, "loss": 0.6337, "step": 1883 }, { "epoch": 0.3, "grad_norm": 0.9887973193361107, "learning_rate": 8.163574359577422e-06, "loss": 0.6518, "step": 1884 }, { "epoch": 0.3, "grad_norm": 1.0216816123151, "learning_rate": 8.16155299358679e-06, "loss": 0.6523, "step": 1885 }, { "epoch": 0.3, "grad_norm": 0.9361258937232282, "learning_rate": 8.159530766301974e-06, "loss": 0.523, "step": 1886 }, { "epoch": 0.3, "grad_norm": 1.0822460140835453, "learning_rate": 8.157507678273884e-06, "loss": 0.6098, "step": 1887 }, { "epoch": 0.3, "grad_norm": 0.9846889417721696, "learning_rate": 8.155483730053664e-06, "loss": 0.6812, "step": 1888 }, { "epoch": 0.3, "grad_norm": 1.0472018705696453, "learning_rate": 8.153458922192696e-06, "loss": 0.5772, "step": 1889 }, { "epoch": 0.3, "grad_norm": 0.681976135012787, "learning_rate": 8.15143325524259e-06, "loss": 0.4605, "step": 1890 }, { "epoch": 0.3, "grad_norm": 1.1251818949576624, "learning_rate": 8.149406729755198e-06, "loss": 0.7063, "step": 1891 }, { "epoch": 0.3, "grad_norm": 1.0859060596255317, "learning_rate": 8.147379346282599e-06, "loss": 0.7091, "step": 1892 }, { "epoch": 0.31, "grad_norm": 0.9970457265637536, "learning_rate": 8.145351105377107e-06, "loss": 0.5934, "step": 1893 }, { "epoch": 0.31, "grad_norm": 0.9487372532225709, "learning_rate": 8.14332200759127e-06, "loss": 0.6104, "step": 1894 }, { "epoch": 0.31, "grad_norm": 1.038491933667674, "learning_rate": 8.141292053477873e-06, "loss": 0.6704, "step": 1895 }, { "epoch": 0.31, "grad_norm": 0.9900469637871521, "learning_rate": 8.139261243589933e-06, "loss": 0.676, "step": 1896 }, { "epoch": 0.31, "grad_norm": 0.925498033304616, "learning_rate": 8.137229578480694e-06, "loss": 0.5594, "step": 1897 }, { "epoch": 0.31, "grad_norm": 1.0244901228059946, "learning_rate": 8.135197058703638e-06, "loss": 0.5916, "step": 1898 }, { "epoch": 0.31, "grad_norm": 1.0213745065267665, "learning_rate": 8.133163684812484e-06, "loss": 0.6685, "step": 1899 }, { "epoch": 0.31, "grad_norm": 0.9936728441669446, "learning_rate": 8.131129457361176e-06, "loss": 0.6565, "step": 1900 }, { "epoch": 0.31, "grad_norm": 1.0348157107047065, "learning_rate": 8.129094376903891e-06, "loss": 0.6504, "step": 1901 }, { "epoch": 0.31, "grad_norm": 1.0768488551822888, "learning_rate": 8.127058443995046e-06, "loss": 0.6317, "step": 1902 }, { "epoch": 0.31, "grad_norm": 0.9281023505153545, "learning_rate": 8.125021659189281e-06, "loss": 0.5736, "step": 1903 }, { "epoch": 0.31, "grad_norm": 1.0263393171481847, "learning_rate": 8.122984023041476e-06, "loss": 0.5959, "step": 1904 }, { "epoch": 0.31, "grad_norm": 0.7683028202767916, "learning_rate": 8.120945536106738e-06, "loss": 0.5255, "step": 1905 }, { "epoch": 0.31, "grad_norm": 1.0975191352655638, "learning_rate": 8.118906198940403e-06, "loss": 0.7127, "step": 1906 }, { "epoch": 0.31, "grad_norm": 0.9932971526634823, "learning_rate": 8.11686601209805e-06, "loss": 0.5287, "step": 1907 }, { "epoch": 0.31, "grad_norm": 0.6412213084705646, "learning_rate": 8.114824976135478e-06, "loss": 0.5015, "step": 1908 }, { "epoch": 0.31, "grad_norm": 0.985160308454403, "learning_rate": 8.11278309160872e-06, "loss": 0.6109, "step": 1909 }, { "epoch": 0.31, "grad_norm": 1.1657540101316224, "learning_rate": 8.110740359074046e-06, "loss": 0.7356, "step": 1910 }, { "epoch": 0.31, "grad_norm": 0.968709572572293, "learning_rate": 8.108696779087949e-06, "loss": 0.6632, "step": 1911 }, { "epoch": 0.31, "grad_norm": 0.9303354822475052, "learning_rate": 8.106652352207157e-06, "loss": 0.6414, "step": 1912 }, { "epoch": 0.31, "grad_norm": 0.9523928651950057, "learning_rate": 8.10460707898863e-06, "loss": 0.511, "step": 1913 }, { "epoch": 0.31, "grad_norm": 1.085256702337992, "learning_rate": 8.102560959989554e-06, "loss": 0.6768, "step": 1914 }, { "epoch": 0.31, "grad_norm": 0.9278595028727968, "learning_rate": 8.100513995767352e-06, "loss": 0.5906, "step": 1915 }, { "epoch": 0.31, "grad_norm": 1.100670424495352, "learning_rate": 8.098466186879669e-06, "loss": 0.7041, "step": 1916 }, { "epoch": 0.31, "grad_norm": 1.0863607266027058, "learning_rate": 8.096417533884388e-06, "loss": 0.6143, "step": 1917 }, { "epoch": 0.31, "grad_norm": 0.9766862867669861, "learning_rate": 8.094368037339619e-06, "loss": 0.6118, "step": 1918 }, { "epoch": 0.31, "grad_norm": 0.6595381921918712, "learning_rate": 8.092317697803698e-06, "loss": 0.5112, "step": 1919 }, { "epoch": 0.31, "grad_norm": 0.9613728465613319, "learning_rate": 8.090266515835197e-06, "loss": 0.5977, "step": 1920 }, { "epoch": 0.31, "grad_norm": 1.0375818113270032, "learning_rate": 8.088214491992912e-06, "loss": 0.6795, "step": 1921 }, { "epoch": 0.31, "grad_norm": 0.9221077039518808, "learning_rate": 8.086161626835873e-06, "loss": 0.5526, "step": 1922 }, { "epoch": 0.31, "grad_norm": 0.9923873972020701, "learning_rate": 8.084107920923336e-06, "loss": 0.6165, "step": 1923 }, { "epoch": 0.31, "grad_norm": 1.0272871893814814, "learning_rate": 8.08205337481479e-06, "loss": 0.669, "step": 1924 }, { "epoch": 0.31, "grad_norm": 1.0074420599253882, "learning_rate": 8.079997989069945e-06, "loss": 0.6777, "step": 1925 }, { "epoch": 0.31, "grad_norm": 1.0334715874509215, "learning_rate": 8.077941764248746e-06, "loss": 0.5763, "step": 1926 }, { "epoch": 0.31, "grad_norm": 1.0684268300674156, "learning_rate": 8.075884700911368e-06, "loss": 0.6411, "step": 1927 }, { "epoch": 0.31, "grad_norm": 1.0001782158510906, "learning_rate": 8.07382679961821e-06, "loss": 0.6011, "step": 1928 }, { "epoch": 0.31, "grad_norm": 1.1242012273356614, "learning_rate": 8.071768060929903e-06, "loss": 0.6344, "step": 1929 }, { "epoch": 0.31, "grad_norm": 0.9645809750550852, "learning_rate": 8.0697084854073e-06, "loss": 0.5707, "step": 1930 }, { "epoch": 0.31, "grad_norm": 1.0689899672043581, "learning_rate": 8.06764807361149e-06, "loss": 0.6, "step": 1931 }, { "epoch": 0.31, "grad_norm": 0.992122881121202, "learning_rate": 8.065586826103782e-06, "loss": 0.6058, "step": 1932 }, { "epoch": 0.31, "grad_norm": 0.9874474650573509, "learning_rate": 8.063524743445721e-06, "loss": 0.5869, "step": 1933 }, { "epoch": 0.31, "grad_norm": 1.0046332963029097, "learning_rate": 8.061461826199075e-06, "loss": 0.5953, "step": 1934 }, { "epoch": 0.31, "grad_norm": 1.0613108066732035, "learning_rate": 8.059398074925835e-06, "loss": 0.6438, "step": 1935 }, { "epoch": 0.31, "grad_norm": 1.043381392212319, "learning_rate": 8.057333490188228e-06, "loss": 0.5762, "step": 1936 }, { "epoch": 0.31, "grad_norm": 1.0292761210038501, "learning_rate": 8.055268072548704e-06, "loss": 0.5756, "step": 1937 }, { "epoch": 0.31, "grad_norm": 1.0777478814735153, "learning_rate": 8.053201822569933e-06, "loss": 0.6444, "step": 1938 }, { "epoch": 0.31, "grad_norm": 1.0497761945390296, "learning_rate": 8.051134740814827e-06, "loss": 0.6165, "step": 1939 }, { "epoch": 0.31, "grad_norm": 1.111693470563027, "learning_rate": 8.049066827846513e-06, "loss": 0.6035, "step": 1940 }, { "epoch": 0.31, "grad_norm": 1.0284638581571135, "learning_rate": 8.046998084228347e-06, "loss": 0.6671, "step": 1941 }, { "epoch": 0.31, "grad_norm": 1.0507331996853633, "learning_rate": 8.044928510523911e-06, "loss": 0.6332, "step": 1942 }, { "epoch": 0.31, "grad_norm": 1.07407758394666, "learning_rate": 8.042858107297015e-06, "loss": 0.65, "step": 1943 }, { "epoch": 0.31, "grad_norm": 0.9884761789744048, "learning_rate": 8.040786875111694e-06, "loss": 0.6502, "step": 1944 }, { "epoch": 0.31, "grad_norm": 1.0627479018315698, "learning_rate": 8.03871481453221e-06, "loss": 0.6663, "step": 1945 }, { "epoch": 0.31, "grad_norm": 1.1768227769577873, "learning_rate": 8.036641926123043e-06, "loss": 0.695, "step": 1946 }, { "epoch": 0.31, "grad_norm": 0.9830213721304991, "learning_rate": 8.034568210448914e-06, "loss": 0.5873, "step": 1947 }, { "epoch": 0.31, "grad_norm": 0.9411894878501208, "learning_rate": 8.032493668074756e-06, "loss": 0.6515, "step": 1948 }, { "epoch": 0.31, "grad_norm": 1.0227226684578699, "learning_rate": 8.03041829956573e-06, "loss": 0.6406, "step": 1949 }, { "epoch": 0.31, "grad_norm": 1.0784308805027176, "learning_rate": 8.028342105487226e-06, "loss": 0.7069, "step": 1950 }, { "epoch": 0.31, "grad_norm": 1.0665093746620757, "learning_rate": 8.026265086404856e-06, "loss": 0.6029, "step": 1951 }, { "epoch": 0.31, "grad_norm": 1.0665936853303892, "learning_rate": 8.024187242884456e-06, "loss": 0.6731, "step": 1952 }, { "epoch": 0.31, "grad_norm": 1.0500557340609795, "learning_rate": 8.02210857549209e-06, "loss": 0.6038, "step": 1953 }, { "epoch": 0.31, "grad_norm": 1.0003403707183904, "learning_rate": 8.020029084794043e-06, "loss": 0.6547, "step": 1954 }, { "epoch": 0.31, "grad_norm": 1.0763977392867565, "learning_rate": 8.017948771356824e-06, "loss": 0.6499, "step": 1955 }, { "epoch": 0.32, "grad_norm": 0.9482502217798888, "learning_rate": 8.01586763574717e-06, "loss": 0.5633, "step": 1956 }, { "epoch": 0.32, "grad_norm": 1.0072454045966897, "learning_rate": 8.01378567853204e-06, "loss": 0.6581, "step": 1957 }, { "epoch": 0.32, "grad_norm": 1.1158516183273934, "learning_rate": 8.011702900278614e-06, "loss": 0.6772, "step": 1958 }, { "epoch": 0.32, "grad_norm": 0.9069256411697026, "learning_rate": 8.009619301554303e-06, "loss": 0.5151, "step": 1959 }, { "epoch": 0.32, "grad_norm": 0.9975652633698152, "learning_rate": 8.007534882926731e-06, "loss": 0.526, "step": 1960 }, { "epoch": 0.32, "grad_norm": 1.0210895217525349, "learning_rate": 8.005449644963756e-06, "loss": 0.6013, "step": 1961 }, { "epoch": 0.32, "grad_norm": 1.0261806505993152, "learning_rate": 8.00336358823345e-06, "loss": 0.6103, "step": 1962 }, { "epoch": 0.32, "grad_norm": 0.9369903753274512, "learning_rate": 8.001276713304116e-06, "loss": 0.6392, "step": 1963 }, { "epoch": 0.32, "grad_norm": 1.0411867265022243, "learning_rate": 7.999189020744273e-06, "loss": 0.5983, "step": 1964 }, { "epoch": 0.32, "grad_norm": 0.9235641784399136, "learning_rate": 7.997100511122669e-06, "loss": 0.6123, "step": 1965 }, { "epoch": 0.32, "grad_norm": 1.01542544732557, "learning_rate": 7.99501118500827e-06, "loss": 0.6525, "step": 1966 }, { "epoch": 0.32, "grad_norm": 1.0057105494489462, "learning_rate": 7.992921042970264e-06, "loss": 0.5757, "step": 1967 }, { "epoch": 0.32, "grad_norm": 1.0477621982696093, "learning_rate": 7.990830085578068e-06, "loss": 0.6993, "step": 1968 }, { "epoch": 0.32, "grad_norm": 1.0026430858701145, "learning_rate": 7.988738313401312e-06, "loss": 0.6398, "step": 1969 }, { "epoch": 0.32, "grad_norm": 1.0620971683207427, "learning_rate": 7.986645727009856e-06, "loss": 0.6552, "step": 1970 }, { "epoch": 0.32, "grad_norm": 1.031824942138432, "learning_rate": 7.984552326973776e-06, "loss": 0.6628, "step": 1971 }, { "epoch": 0.32, "grad_norm": 1.0665716750502996, "learning_rate": 7.982458113863373e-06, "loss": 0.6487, "step": 1972 }, { "epoch": 0.32, "grad_norm": 1.0029031995074955, "learning_rate": 7.980363088249167e-06, "loss": 0.6196, "step": 1973 }, { "epoch": 0.32, "grad_norm": 0.9486493328205855, "learning_rate": 7.978267250701904e-06, "loss": 0.5783, "step": 1974 }, { "epoch": 0.32, "grad_norm": 0.9949223714268802, "learning_rate": 7.976170601792543e-06, "loss": 0.7236, "step": 1975 }, { "epoch": 0.32, "grad_norm": 1.016433832809466, "learning_rate": 7.97407314209227e-06, "loss": 0.6337, "step": 1976 }, { "epoch": 0.32, "grad_norm": 0.9402565535052956, "learning_rate": 7.971974872172497e-06, "loss": 0.5083, "step": 1977 }, { "epoch": 0.32, "grad_norm": 1.0132233493236422, "learning_rate": 7.969875792604842e-06, "loss": 0.6626, "step": 1978 }, { "epoch": 0.32, "grad_norm": 0.9951281064603269, "learning_rate": 7.967775903961158e-06, "loss": 0.6049, "step": 1979 }, { "epoch": 0.32, "grad_norm": 1.0167015209018815, "learning_rate": 7.96567520681351e-06, "loss": 0.6799, "step": 1980 }, { "epoch": 0.32, "grad_norm": 1.0533996850244487, "learning_rate": 7.963573701734185e-06, "loss": 0.5744, "step": 1981 }, { "epoch": 0.32, "grad_norm": 1.0090592024541625, "learning_rate": 7.961471389295694e-06, "loss": 0.5835, "step": 1982 }, { "epoch": 0.32, "grad_norm": 1.1383223156297746, "learning_rate": 7.959368270070763e-06, "loss": 0.6271, "step": 1983 }, { "epoch": 0.32, "grad_norm": 1.1116792050877726, "learning_rate": 7.957264344632338e-06, "loss": 0.6601, "step": 1984 }, { "epoch": 0.32, "grad_norm": 1.018476930765761, "learning_rate": 7.95515961355359e-06, "loss": 0.6604, "step": 1985 }, { "epoch": 0.32, "grad_norm": 1.081145993325459, "learning_rate": 7.953054077407903e-06, "loss": 0.6891, "step": 1986 }, { "epoch": 0.32, "grad_norm": 0.9264933505418262, "learning_rate": 7.950947736768884e-06, "loss": 0.5825, "step": 1987 }, { "epoch": 0.32, "grad_norm": 1.0420210854137582, "learning_rate": 7.948840592210358e-06, "loss": 0.7006, "step": 1988 }, { "epoch": 0.32, "grad_norm": 0.9731076134852483, "learning_rate": 7.94673264430637e-06, "loss": 0.6131, "step": 1989 }, { "epoch": 0.32, "grad_norm": 0.9708495311147903, "learning_rate": 7.94462389363118e-06, "loss": 0.5896, "step": 1990 }, { "epoch": 0.32, "grad_norm": 0.9956630272461023, "learning_rate": 7.942514340759275e-06, "loss": 0.6952, "step": 1991 }, { "epoch": 0.32, "grad_norm": 0.7246089343983644, "learning_rate": 7.940403986265353e-06, "loss": 0.5023, "step": 1992 }, { "epoch": 0.32, "grad_norm": 1.068740135715354, "learning_rate": 7.93829283072433e-06, "loss": 0.6365, "step": 1993 }, { "epoch": 0.32, "grad_norm": 0.9761955533982813, "learning_rate": 7.936180874711347e-06, "loss": 0.5576, "step": 1994 }, { "epoch": 0.32, "grad_norm": 1.0197491590903691, "learning_rate": 7.934068118801758e-06, "loss": 0.6007, "step": 1995 }, { "epoch": 0.32, "grad_norm": 1.013614928651729, "learning_rate": 7.931954563571134e-06, "loss": 0.6485, "step": 1996 }, { "epoch": 0.32, "grad_norm": 1.0366744648945945, "learning_rate": 7.92984020959527e-06, "loss": 0.6081, "step": 1997 }, { "epoch": 0.32, "grad_norm": 1.0095285612348635, "learning_rate": 7.92772505745017e-06, "loss": 0.5701, "step": 1998 }, { "epoch": 0.32, "grad_norm": 0.9417884432762371, "learning_rate": 7.925609107712057e-06, "loss": 0.5678, "step": 1999 }, { "epoch": 0.32, "grad_norm": 1.0019134036388098, "learning_rate": 7.923492360957383e-06, "loss": 0.6491, "step": 2000 }, { "epoch": 0.32, "grad_norm": 1.018472058290425, "learning_rate": 7.921374817762801e-06, "loss": 0.6862, "step": 2001 }, { "epoch": 0.32, "grad_norm": 0.967819687624286, "learning_rate": 7.919256478705192e-06, "loss": 0.6792, "step": 2002 }, { "epoch": 0.32, "grad_norm": 0.9329606419877908, "learning_rate": 7.917137344361647e-06, "loss": 0.5299, "step": 2003 }, { "epoch": 0.32, "grad_norm": 1.007616233161508, "learning_rate": 7.91501741530948e-06, "loss": 0.6973, "step": 2004 }, { "epoch": 0.32, "grad_norm": 1.0038678740391376, "learning_rate": 7.912896692126216e-06, "loss": 0.6127, "step": 2005 }, { "epoch": 0.32, "grad_norm": 0.980762374008956, "learning_rate": 7.910775175389595e-06, "loss": 0.6242, "step": 2006 }, { "epoch": 0.32, "grad_norm": 1.0284556986146682, "learning_rate": 7.908652865677584e-06, "loss": 0.6633, "step": 2007 }, { "epoch": 0.32, "grad_norm": 1.0028130598791773, "learning_rate": 7.906529763568354e-06, "loss": 0.5724, "step": 2008 }, { "epoch": 0.32, "grad_norm": 1.0505062226778203, "learning_rate": 7.904405869640296e-06, "loss": 0.597, "step": 2009 }, { "epoch": 0.32, "grad_norm": 1.041478169750078, "learning_rate": 7.902281184472021e-06, "loss": 0.6307, "step": 2010 }, { "epoch": 0.32, "grad_norm": 1.1258947799335886, "learning_rate": 7.900155708642347e-06, "loss": 0.6875, "step": 2011 }, { "epoch": 0.32, "grad_norm": 1.0925877922762228, "learning_rate": 7.898029442730316e-06, "loss": 0.6599, "step": 2012 }, { "epoch": 0.32, "grad_norm": 1.1121577938042486, "learning_rate": 7.89590238731518e-06, "loss": 0.6447, "step": 2013 }, { "epoch": 0.32, "grad_norm": 0.9891122462044993, "learning_rate": 7.893774542976408e-06, "loss": 0.6444, "step": 2014 }, { "epoch": 0.32, "grad_norm": 1.0178244244657477, "learning_rate": 7.891645910293683e-06, "loss": 0.6464, "step": 2015 }, { "epoch": 0.32, "grad_norm": 1.1190640597755588, "learning_rate": 7.889516489846904e-06, "loss": 0.7081, "step": 2016 }, { "epoch": 0.32, "grad_norm": 0.9376282934354488, "learning_rate": 7.887386282216182e-06, "loss": 0.5193, "step": 2017 }, { "epoch": 0.33, "grad_norm": 0.9315979567364392, "learning_rate": 7.885255287981845e-06, "loss": 0.4983, "step": 2018 }, { "epoch": 0.33, "grad_norm": 0.9360925707387926, "learning_rate": 7.883123507724437e-06, "loss": 0.6793, "step": 2019 }, { "epoch": 0.33, "grad_norm": 0.8240866564978023, "learning_rate": 7.88099094202471e-06, "loss": 0.553, "step": 2020 }, { "epoch": 0.33, "grad_norm": 1.0552387252262851, "learning_rate": 7.878857591463636e-06, "loss": 0.6835, "step": 2021 }, { "epoch": 0.33, "grad_norm": 1.0532354399847978, "learning_rate": 7.876723456622399e-06, "loss": 0.6576, "step": 2022 }, { "epoch": 0.33, "grad_norm": 0.975130802775477, "learning_rate": 7.874588538082394e-06, "loss": 0.566, "step": 2023 }, { "epoch": 0.33, "grad_norm": 0.9266834455280066, "learning_rate": 7.872452836425233e-06, "loss": 0.6082, "step": 2024 }, { "epoch": 0.33, "grad_norm": 1.001348759176678, "learning_rate": 7.870316352232739e-06, "loss": 0.6213, "step": 2025 }, { "epoch": 0.33, "grad_norm": 1.1022720059019917, "learning_rate": 7.868179086086951e-06, "loss": 0.6977, "step": 2026 }, { "epoch": 0.33, "grad_norm": 1.0891228964044355, "learning_rate": 7.866041038570117e-06, "loss": 0.6576, "step": 2027 }, { "epoch": 0.33, "grad_norm": 0.7033120574863014, "learning_rate": 7.863902210264702e-06, "loss": 0.4756, "step": 2028 }, { "epoch": 0.33, "grad_norm": 1.0301721011395628, "learning_rate": 7.861762601753379e-06, "loss": 0.5798, "step": 2029 }, { "epoch": 0.33, "grad_norm": 0.9713122289800508, "learning_rate": 7.859622213619043e-06, "loss": 0.6064, "step": 2030 }, { "epoch": 0.33, "grad_norm": 0.9166861836572292, "learning_rate": 7.857481046444785e-06, "loss": 0.5651, "step": 2031 }, { "epoch": 0.33, "grad_norm": 1.088076409534945, "learning_rate": 7.855339100813925e-06, "loss": 0.6263, "step": 2032 }, { "epoch": 0.33, "grad_norm": 1.0616452849750926, "learning_rate": 7.853196377309986e-06, "loss": 0.6214, "step": 2033 }, { "epoch": 0.33, "grad_norm": 0.9562817434125473, "learning_rate": 7.851052876516708e-06, "loss": 0.5707, "step": 2034 }, { "epoch": 0.33, "grad_norm": 0.958410897538622, "learning_rate": 7.848908599018033e-06, "loss": 0.5643, "step": 2035 }, { "epoch": 0.33, "grad_norm": 1.0219612308357566, "learning_rate": 7.846763545398124e-06, "loss": 0.6315, "step": 2036 }, { "epoch": 0.33, "grad_norm": 1.0509593492374971, "learning_rate": 7.844617716241358e-06, "loss": 0.658, "step": 2037 }, { "epoch": 0.33, "grad_norm": 1.0089412867215362, "learning_rate": 7.842471112132311e-06, "loss": 0.5791, "step": 2038 }, { "epoch": 0.33, "grad_norm": 1.114245945367318, "learning_rate": 7.84032373365578e-06, "loss": 0.6294, "step": 2039 }, { "epoch": 0.33, "grad_norm": 0.941736617895979, "learning_rate": 7.83817558139677e-06, "loss": 0.5635, "step": 2040 }, { "epoch": 0.33, "grad_norm": 0.9983443866436168, "learning_rate": 7.836026655940497e-06, "loss": 0.597, "step": 2041 }, { "epoch": 0.33, "grad_norm": 1.0645662242907994, "learning_rate": 7.833876957872388e-06, "loss": 0.6414, "step": 2042 }, { "epoch": 0.33, "grad_norm": 0.9629343154807554, "learning_rate": 7.83172648777808e-06, "loss": 0.5486, "step": 2043 }, { "epoch": 0.33, "grad_norm": 1.0118379257709842, "learning_rate": 7.829575246243417e-06, "loss": 0.5936, "step": 2044 }, { "epoch": 0.33, "grad_norm": 0.9721244453186837, "learning_rate": 7.82742323385446e-06, "loss": 0.5174, "step": 2045 }, { "epoch": 0.33, "grad_norm": 1.0398470026578153, "learning_rate": 7.825270451197477e-06, "loss": 0.6944, "step": 2046 }, { "epoch": 0.33, "grad_norm": 1.3068989131552775, "learning_rate": 7.823116898858945e-06, "loss": 0.5915, "step": 2047 }, { "epoch": 0.33, "grad_norm": 1.0106897674424422, "learning_rate": 7.820962577425548e-06, "loss": 0.5205, "step": 2048 }, { "epoch": 0.33, "grad_norm": 1.1883492742811927, "learning_rate": 7.818807487484186e-06, "loss": 0.7669, "step": 2049 }, { "epoch": 0.33, "grad_norm": 1.1041324044092566, "learning_rate": 7.816651629621963e-06, "loss": 0.6771, "step": 2050 }, { "epoch": 0.33, "grad_norm": 0.9721792400151228, "learning_rate": 7.814495004426195e-06, "loss": 0.6271, "step": 2051 }, { "epoch": 0.33, "grad_norm": 0.9849060740235217, "learning_rate": 7.812337612484404e-06, "loss": 0.5689, "step": 2052 }, { "epoch": 0.33, "grad_norm": 1.0965701090832938, "learning_rate": 7.810179454384326e-06, "loss": 0.6196, "step": 2053 }, { "epoch": 0.33, "grad_norm": 1.0662649921282041, "learning_rate": 7.808020530713902e-06, "loss": 0.6487, "step": 2054 }, { "epoch": 0.33, "grad_norm": 0.9779694843125751, "learning_rate": 7.805860842061282e-06, "loss": 0.6074, "step": 2055 }, { "epoch": 0.33, "grad_norm": 1.0371165085767355, "learning_rate": 7.803700389014824e-06, "loss": 0.5966, "step": 2056 }, { "epoch": 0.33, "grad_norm": 0.9656577001546961, "learning_rate": 7.801539172163097e-06, "loss": 0.6343, "step": 2057 }, { "epoch": 0.33, "grad_norm": 1.0907897309078987, "learning_rate": 7.799377192094872e-06, "loss": 0.5901, "step": 2058 }, { "epoch": 0.33, "grad_norm": 1.0391155945975157, "learning_rate": 7.797214449399136e-06, "loss": 0.6718, "step": 2059 }, { "epoch": 0.33, "grad_norm": 0.970802655665947, "learning_rate": 7.79505094466508e-06, "loss": 0.6106, "step": 2060 }, { "epoch": 0.33, "grad_norm": 1.4975500085063516, "learning_rate": 7.792886678482096e-06, "loss": 0.5436, "step": 2061 }, { "epoch": 0.33, "grad_norm": 1.022620941330031, "learning_rate": 7.7907216514398e-06, "loss": 0.6167, "step": 2062 }, { "epoch": 0.33, "grad_norm": 1.022589008246891, "learning_rate": 7.788555864127995e-06, "loss": 0.6031, "step": 2063 }, { "epoch": 0.33, "grad_norm": 0.9488340893757712, "learning_rate": 7.786389317136708e-06, "loss": 0.6, "step": 2064 }, { "epoch": 0.33, "grad_norm": 1.1107757558635087, "learning_rate": 7.784222011056164e-06, "loss": 0.7038, "step": 2065 }, { "epoch": 0.33, "grad_norm": 1.1408904178479418, "learning_rate": 7.782053946476795e-06, "loss": 0.6719, "step": 2066 }, { "epoch": 0.33, "grad_norm": 0.976330884150759, "learning_rate": 7.779885123989244e-06, "loss": 0.5981, "step": 2067 }, { "epoch": 0.33, "grad_norm": 1.0118914254326963, "learning_rate": 7.777715544184358e-06, "loss": 0.5866, "step": 2068 }, { "epoch": 0.33, "grad_norm": 1.0463059068660445, "learning_rate": 7.775545207653188e-06, "loss": 0.7049, "step": 2069 }, { "epoch": 0.33, "grad_norm": 1.0140843075338823, "learning_rate": 7.773374114986996e-06, "loss": 0.6366, "step": 2070 }, { "epoch": 0.33, "grad_norm": 1.0758735668897645, "learning_rate": 7.771202266777247e-06, "loss": 0.6564, "step": 2071 }, { "epoch": 0.33, "grad_norm": 1.0084112592974492, "learning_rate": 7.76902966361561e-06, "loss": 0.5532, "step": 2072 }, { "epoch": 0.33, "grad_norm": 1.0369849279025734, "learning_rate": 7.766856306093966e-06, "loss": 0.6544, "step": 2073 }, { "epoch": 0.33, "grad_norm": 1.0062071651625457, "learning_rate": 7.764682194804394e-06, "loss": 0.5698, "step": 2074 }, { "epoch": 0.33, "grad_norm": 1.0565988988799921, "learning_rate": 7.762507330339185e-06, "loss": 0.5822, "step": 2075 }, { "epoch": 0.33, "grad_norm": 0.9220190328693751, "learning_rate": 7.76033171329083e-06, "loss": 0.5636, "step": 2076 }, { "epoch": 0.33, "grad_norm": 1.012920946453687, "learning_rate": 7.758155344252025e-06, "loss": 0.558, "step": 2077 }, { "epoch": 0.33, "grad_norm": 0.9366909549974815, "learning_rate": 7.755978223815678e-06, "loss": 0.5931, "step": 2078 }, { "epoch": 0.33, "grad_norm": 0.955611734012265, "learning_rate": 7.753800352574891e-06, "loss": 0.5458, "step": 2079 }, { "epoch": 0.34, "grad_norm": 1.0149920410921363, "learning_rate": 7.751621731122981e-06, "loss": 0.5602, "step": 2080 }, { "epoch": 0.34, "grad_norm": 0.9868284832705365, "learning_rate": 7.749442360053462e-06, "loss": 0.5291, "step": 2081 }, { "epoch": 0.34, "grad_norm": 1.0135731372838592, "learning_rate": 7.747262239960055e-06, "loss": 0.6495, "step": 2082 }, { "epoch": 0.34, "grad_norm": 1.0063418158904514, "learning_rate": 7.745081371436686e-06, "loss": 0.6039, "step": 2083 }, { "epoch": 0.34, "grad_norm": 0.9989108272103109, "learning_rate": 7.742899755077482e-06, "loss": 0.615, "step": 2084 }, { "epoch": 0.34, "grad_norm": 0.9760342977118495, "learning_rate": 7.740717391476778e-06, "loss": 0.5688, "step": 2085 }, { "epoch": 0.34, "grad_norm": 0.6766906224146437, "learning_rate": 7.738534281229106e-06, "loss": 0.4681, "step": 2086 }, { "epoch": 0.34, "grad_norm": 0.9951447643457285, "learning_rate": 7.736350424929209e-06, "loss": 0.5706, "step": 2087 }, { "epoch": 0.34, "grad_norm": 1.1770440409555176, "learning_rate": 7.734165823172028e-06, "loss": 0.6948, "step": 2088 }, { "epoch": 0.34, "grad_norm": 0.969816041287878, "learning_rate": 7.731980476552708e-06, "loss": 0.5987, "step": 2089 }, { "epoch": 0.34, "grad_norm": 0.9880512918059993, "learning_rate": 7.7297943856666e-06, "loss": 0.6257, "step": 2090 }, { "epoch": 0.34, "grad_norm": 0.667036197838884, "learning_rate": 7.727607551109252e-06, "loss": 0.4954, "step": 2091 }, { "epoch": 0.34, "grad_norm": 1.0545500030018207, "learning_rate": 7.725419973476422e-06, "loss": 0.7486, "step": 2092 }, { "epoch": 0.34, "grad_norm": 1.1410138392031859, "learning_rate": 7.723231653364065e-06, "loss": 0.5423, "step": 2093 }, { "epoch": 0.34, "grad_norm": 1.0689512480568013, "learning_rate": 7.72104259136834e-06, "loss": 0.6598, "step": 2094 }, { "epoch": 0.34, "grad_norm": 1.0272364091535449, "learning_rate": 7.718852788085604e-06, "loss": 0.6589, "step": 2095 }, { "epoch": 0.34, "grad_norm": 1.0639373981161628, "learning_rate": 7.716662244112425e-06, "loss": 0.648, "step": 2096 }, { "epoch": 0.34, "grad_norm": 0.9929762039864671, "learning_rate": 7.714470960045565e-06, "loss": 0.6687, "step": 2097 }, { "epoch": 0.34, "grad_norm": 0.61768735296764, "learning_rate": 7.712278936481992e-06, "loss": 0.4606, "step": 2098 }, { "epoch": 0.34, "grad_norm": 1.0090334206433311, "learning_rate": 7.710086174018871e-06, "loss": 0.6218, "step": 2099 }, { "epoch": 0.34, "grad_norm": 1.1210131154810046, "learning_rate": 7.707892673253572e-06, "loss": 0.6655, "step": 2100 }, { "epoch": 0.34, "grad_norm": 0.9515248636097394, "learning_rate": 7.705698434783666e-06, "loss": 0.5694, "step": 2101 }, { "epoch": 0.34, "grad_norm": 0.9654665071364131, "learning_rate": 7.703503459206922e-06, "loss": 0.7025, "step": 2102 }, { "epoch": 0.34, "grad_norm": 1.0099391509418814, "learning_rate": 7.701307747121314e-06, "loss": 0.6657, "step": 2103 }, { "epoch": 0.34, "grad_norm": 1.0808159811481284, "learning_rate": 7.699111299125015e-06, "loss": 0.6455, "step": 2104 }, { "epoch": 0.34, "grad_norm": 1.1068453224189645, "learning_rate": 7.696914115816395e-06, "loss": 0.7246, "step": 2105 }, { "epoch": 0.34, "grad_norm": 0.9860676801074832, "learning_rate": 7.69471619779403e-06, "loss": 0.6059, "step": 2106 }, { "epoch": 0.34, "grad_norm": 1.0564111640366705, "learning_rate": 7.692517545656691e-06, "loss": 0.6151, "step": 2107 }, { "epoch": 0.34, "grad_norm": 1.1464401318846351, "learning_rate": 7.690318160003356e-06, "loss": 0.6211, "step": 2108 }, { "epoch": 0.34, "grad_norm": 0.9964937363368471, "learning_rate": 7.688118041433192e-06, "loss": 0.5264, "step": 2109 }, { "epoch": 0.34, "grad_norm": 1.0636706486692744, "learning_rate": 7.685917190545576e-06, "loss": 0.5781, "step": 2110 }, { "epoch": 0.34, "grad_norm": 1.1369028503260206, "learning_rate": 7.683715607940078e-06, "loss": 0.6927, "step": 2111 }, { "epoch": 0.34, "grad_norm": 1.0010836095958955, "learning_rate": 7.681513294216476e-06, "loss": 0.5885, "step": 2112 }, { "epoch": 0.34, "grad_norm": 0.9882752742069714, "learning_rate": 7.679310249974732e-06, "loss": 0.6154, "step": 2113 }, { "epoch": 0.34, "grad_norm": 1.0545786223824967, "learning_rate": 7.677106475815021e-06, "loss": 0.6523, "step": 2114 }, { "epoch": 0.34, "grad_norm": 1.0700322278876584, "learning_rate": 7.674901972337712e-06, "loss": 0.626, "step": 2115 }, { "epoch": 0.34, "grad_norm": 1.035182276123007, "learning_rate": 7.672696740143372e-06, "loss": 0.6767, "step": 2116 }, { "epoch": 0.34, "grad_norm": 0.9665535157453695, "learning_rate": 7.670490779832767e-06, "loss": 0.591, "step": 2117 }, { "epoch": 0.34, "grad_norm": 0.9659227906536961, "learning_rate": 7.668284092006859e-06, "loss": 0.6228, "step": 2118 }, { "epoch": 0.34, "grad_norm": 1.047978084950697, "learning_rate": 7.666076677266813e-06, "loss": 0.6053, "step": 2119 }, { "epoch": 0.34, "grad_norm": 1.0133906562186499, "learning_rate": 7.66386853621399e-06, "loss": 0.6341, "step": 2120 }, { "epoch": 0.34, "grad_norm": 1.0814288767218916, "learning_rate": 7.661659669449948e-06, "loss": 0.6563, "step": 2121 }, { "epoch": 0.34, "grad_norm": 1.0052283508300457, "learning_rate": 7.659450077576444e-06, "loss": 0.5873, "step": 2122 }, { "epoch": 0.34, "grad_norm": 1.0509371560038792, "learning_rate": 7.657239761195428e-06, "loss": 0.6882, "step": 2123 }, { "epoch": 0.34, "grad_norm": 0.9998048570627717, "learning_rate": 7.655028720909057e-06, "loss": 0.5735, "step": 2124 }, { "epoch": 0.34, "grad_norm": 1.0348978674299782, "learning_rate": 7.652816957319674e-06, "loss": 0.6686, "step": 2125 }, { "epoch": 0.34, "grad_norm": 1.0167365285033512, "learning_rate": 7.650604471029825e-06, "loss": 0.5765, "step": 2126 }, { "epoch": 0.34, "grad_norm": 0.9954668110172916, "learning_rate": 7.648391262642257e-06, "loss": 0.6018, "step": 2127 }, { "epoch": 0.34, "grad_norm": 1.037648377035903, "learning_rate": 7.646177332759906e-06, "loss": 0.5853, "step": 2128 }, { "epoch": 0.34, "grad_norm": 0.996204503450735, "learning_rate": 7.643962681985904e-06, "loss": 0.6715, "step": 2129 }, { "epoch": 0.34, "grad_norm": 1.050585723821499, "learning_rate": 7.641747310923588e-06, "loss": 0.6378, "step": 2130 }, { "epoch": 0.34, "grad_norm": 1.0216530161655883, "learning_rate": 7.639531220176484e-06, "loss": 0.6345, "step": 2131 }, { "epoch": 0.34, "grad_norm": 1.012455957258552, "learning_rate": 7.637314410348315e-06, "loss": 0.5577, "step": 2132 }, { "epoch": 0.34, "grad_norm": 1.0420492122468314, "learning_rate": 7.635096882043006e-06, "loss": 0.6699, "step": 2133 }, { "epoch": 0.34, "grad_norm": 0.9964637629612332, "learning_rate": 7.632878635864666e-06, "loss": 0.6171, "step": 2134 }, { "epoch": 0.34, "grad_norm": 1.0273271574807576, "learning_rate": 7.630659672417613e-06, "loss": 0.6096, "step": 2135 }, { "epoch": 0.34, "grad_norm": 1.025247235444467, "learning_rate": 7.628439992306349e-06, "loss": 0.6155, "step": 2136 }, { "epoch": 0.34, "grad_norm": 1.028249892097847, "learning_rate": 7.626219596135578e-06, "loss": 0.5321, "step": 2137 }, { "epoch": 0.34, "grad_norm": 1.0311433362703426, "learning_rate": 7.623998484510197e-06, "loss": 0.6398, "step": 2138 }, { "epoch": 0.34, "grad_norm": 1.024511871922928, "learning_rate": 7.621776658035298e-06, "loss": 0.6772, "step": 2139 }, { "epoch": 0.34, "grad_norm": 1.0152003532559413, "learning_rate": 7.619554117316165e-06, "loss": 0.6664, "step": 2140 }, { "epoch": 0.34, "grad_norm": 1.0245939333101743, "learning_rate": 7.617330862958287e-06, "loss": 0.6408, "step": 2141 }, { "epoch": 0.35, "grad_norm": 1.0574103834858695, "learning_rate": 7.615106895567331e-06, "loss": 0.6674, "step": 2142 }, { "epoch": 0.35, "grad_norm": 0.9822803377967566, "learning_rate": 7.612882215749172e-06, "loss": 0.5971, "step": 2143 }, { "epoch": 0.35, "grad_norm": 0.9864689838284217, "learning_rate": 7.6106568241098745e-06, "loss": 0.6234, "step": 2144 }, { "epoch": 0.35, "grad_norm": 0.9277234406275513, "learning_rate": 7.608430721255691e-06, "loss": 0.5964, "step": 2145 }, { "epoch": 0.35, "grad_norm": 1.021101640579584, "learning_rate": 7.606203907793081e-06, "loss": 0.6495, "step": 2146 }, { "epoch": 0.35, "grad_norm": 1.118582162934433, "learning_rate": 7.603976384328684e-06, "loss": 0.6569, "step": 2147 }, { "epoch": 0.35, "grad_norm": 0.789696472848177, "learning_rate": 7.601748151469341e-06, "loss": 0.5066, "step": 2148 }, { "epoch": 0.35, "grad_norm": 1.0313553521648877, "learning_rate": 7.599519209822085e-06, "loss": 0.5704, "step": 2149 }, { "epoch": 0.35, "grad_norm": 1.0248594993172224, "learning_rate": 7.59728955999414e-06, "loss": 0.5842, "step": 2150 }, { "epoch": 0.35, "grad_norm": 0.987348512270206, "learning_rate": 7.595059202592923e-06, "loss": 0.6458, "step": 2151 }, { "epoch": 0.35, "grad_norm": 0.9094226558474473, "learning_rate": 7.5928281382260474e-06, "loss": 0.5912, "step": 2152 }, { "epoch": 0.35, "grad_norm": 1.0300689109469887, "learning_rate": 7.590596367501314e-06, "loss": 0.6684, "step": 2153 }, { "epoch": 0.35, "grad_norm": 1.029470695436444, "learning_rate": 7.58836389102672e-06, "loss": 0.57, "step": 2154 }, { "epoch": 0.35, "grad_norm": 1.0607020540061805, "learning_rate": 7.586130709410454e-06, "loss": 0.7084, "step": 2155 }, { "epoch": 0.35, "grad_norm": 0.9373987152064778, "learning_rate": 7.583896823260894e-06, "loss": 0.5209, "step": 2156 }, { "epoch": 0.35, "grad_norm": 1.053382756930419, "learning_rate": 7.581662233186618e-06, "loss": 0.6058, "step": 2157 }, { "epoch": 0.35, "grad_norm": 1.0074022017538398, "learning_rate": 7.5794269397963814e-06, "loss": 0.658, "step": 2158 }, { "epoch": 0.35, "grad_norm": 1.0794665647891577, "learning_rate": 7.577190943699145e-06, "loss": 0.6272, "step": 2159 }, { "epoch": 0.35, "grad_norm": 1.009622520294226, "learning_rate": 7.574954245504056e-06, "loss": 0.5897, "step": 2160 }, { "epoch": 0.35, "grad_norm": 0.9547244302119534, "learning_rate": 7.572716845820452e-06, "loss": 0.5928, "step": 2161 }, { "epoch": 0.35, "grad_norm": 0.9697951688845026, "learning_rate": 7.57047874525786e-06, "loss": 0.6137, "step": 2162 }, { "epoch": 0.35, "grad_norm": 1.035037533934311, "learning_rate": 7.568239944426003e-06, "loss": 0.5882, "step": 2163 }, { "epoch": 0.35, "grad_norm": 0.9502285146991711, "learning_rate": 7.5660004439347916e-06, "loss": 0.6044, "step": 2164 }, { "epoch": 0.35, "grad_norm": 1.1698309555959372, "learning_rate": 7.563760244394325e-06, "loss": 0.6429, "step": 2165 }, { "epoch": 0.35, "grad_norm": 1.0025932875626726, "learning_rate": 7.5615193464149005e-06, "loss": 0.5801, "step": 2166 }, { "epoch": 0.35, "grad_norm": 1.04058948095227, "learning_rate": 7.5592777506069946e-06, "loss": 0.5861, "step": 2167 }, { "epoch": 0.35, "grad_norm": 1.0129456428934478, "learning_rate": 7.557035457581284e-06, "loss": 0.6721, "step": 2168 }, { "epoch": 0.35, "grad_norm": 0.9555877351451456, "learning_rate": 7.5547924679486294e-06, "loss": 0.6082, "step": 2169 }, { "epoch": 0.35, "grad_norm": 1.0656725285070938, "learning_rate": 7.552548782320084e-06, "loss": 0.6366, "step": 2170 }, { "epoch": 0.35, "grad_norm": 1.0544649090672507, "learning_rate": 7.550304401306887e-06, "loss": 0.6519, "step": 2171 }, { "epoch": 0.35, "grad_norm": 1.0730712108810618, "learning_rate": 7.5480593255204725e-06, "loss": 0.5814, "step": 2172 }, { "epoch": 0.35, "grad_norm": 1.0240247141529004, "learning_rate": 7.545813555572461e-06, "loss": 0.6339, "step": 2173 }, { "epoch": 0.35, "grad_norm": 1.0772674605091832, "learning_rate": 7.54356709207466e-06, "loss": 0.6499, "step": 2174 }, { "epoch": 0.35, "grad_norm": 0.9723631317521624, "learning_rate": 7.5413199356390695e-06, "loss": 0.5482, "step": 2175 }, { "epoch": 0.35, "grad_norm": 1.058529015675711, "learning_rate": 7.539072086877877e-06, "loss": 0.5695, "step": 2176 }, { "epoch": 0.35, "grad_norm": 0.9894891115211268, "learning_rate": 7.536823546403458e-06, "loss": 0.6019, "step": 2177 }, { "epoch": 0.35, "grad_norm": 1.0419331223481323, "learning_rate": 7.534574314828376e-06, "loss": 0.6225, "step": 2178 }, { "epoch": 0.35, "grad_norm": 1.0840864420534446, "learning_rate": 7.532324392765387e-06, "loss": 0.6268, "step": 2179 }, { "epoch": 0.35, "grad_norm": 1.0200666520841024, "learning_rate": 7.530073780827427e-06, "loss": 0.6167, "step": 2180 }, { "epoch": 0.35, "grad_norm": 1.0658063472007522, "learning_rate": 7.527822479627629e-06, "loss": 0.5921, "step": 2181 }, { "epoch": 0.35, "grad_norm": 1.0937266938289099, "learning_rate": 7.525570489779307e-06, "loss": 0.5996, "step": 2182 }, { "epoch": 0.35, "grad_norm": 0.9916347833117392, "learning_rate": 7.523317811895965e-06, "loss": 0.5871, "step": 2183 }, { "epoch": 0.35, "grad_norm": 1.1270732830847814, "learning_rate": 7.5210644465912975e-06, "loss": 0.645, "step": 2184 }, { "epoch": 0.35, "grad_norm": 1.042014238832267, "learning_rate": 7.518810394479179e-06, "loss": 0.659, "step": 2185 }, { "epoch": 0.35, "grad_norm": 0.9909956008998193, "learning_rate": 7.516555656173678e-06, "loss": 0.5965, "step": 2186 }, { "epoch": 0.35, "grad_norm": 0.7324249441449703, "learning_rate": 7.514300232289049e-06, "loss": 0.5322, "step": 2187 }, { "epoch": 0.35, "grad_norm": 0.9927544419725202, "learning_rate": 7.512044123439728e-06, "loss": 0.6095, "step": 2188 }, { "epoch": 0.35, "grad_norm": 1.0407282337140056, "learning_rate": 7.509787330240342e-06, "loss": 0.5544, "step": 2189 }, { "epoch": 0.35, "grad_norm": 0.9731759571235095, "learning_rate": 7.507529853305706e-06, "loss": 0.5713, "step": 2190 }, { "epoch": 0.35, "grad_norm": 0.937500977295957, "learning_rate": 7.505271693250817e-06, "loss": 0.6045, "step": 2191 }, { "epoch": 0.35, "grad_norm": 0.9797890327990393, "learning_rate": 7.50301285069086e-06, "loss": 0.6318, "step": 2192 }, { "epoch": 0.35, "grad_norm": 1.132217045604059, "learning_rate": 7.500753326241208e-06, "loss": 0.6241, "step": 2193 }, { "epoch": 0.35, "grad_norm": 1.2278289441722754, "learning_rate": 7.498493120517415e-06, "loss": 0.6196, "step": 2194 }, { "epoch": 0.35, "grad_norm": 0.9990125360783698, "learning_rate": 7.4962322341352256e-06, "loss": 0.6263, "step": 2195 }, { "epoch": 0.35, "grad_norm": 0.9806168080037504, "learning_rate": 7.493970667710566e-06, "loss": 0.6257, "step": 2196 }, { "epoch": 0.35, "grad_norm": 0.9699999360034107, "learning_rate": 7.491708421859549e-06, "loss": 0.614, "step": 2197 }, { "epoch": 0.35, "grad_norm": 0.9865805033343138, "learning_rate": 7.489445497198475e-06, "loss": 0.6396, "step": 2198 }, { "epoch": 0.35, "grad_norm": 0.9982575792646158, "learning_rate": 7.487181894343826e-06, "loss": 0.6293, "step": 2199 }, { "epoch": 0.35, "grad_norm": 1.0809635183991693, "learning_rate": 7.484917613912267e-06, "loss": 0.6394, "step": 2200 }, { "epoch": 0.35, "grad_norm": 0.9647106534349793, "learning_rate": 7.482652656520655e-06, "loss": 0.6543, "step": 2201 }, { "epoch": 0.35, "grad_norm": 1.0943693656857505, "learning_rate": 7.480387022786023e-06, "loss": 0.628, "step": 2202 }, { "epoch": 0.35, "grad_norm": 1.035142011098132, "learning_rate": 7.478120713325595e-06, "loss": 0.6506, "step": 2203 }, { "epoch": 0.36, "grad_norm": 1.0651517783072433, "learning_rate": 7.4758537287567745e-06, "loss": 0.604, "step": 2204 }, { "epoch": 0.36, "grad_norm": 0.9720933860867106, "learning_rate": 7.4735860696971505e-06, "loss": 0.5954, "step": 2205 }, { "epoch": 0.36, "grad_norm": 1.0275337000091138, "learning_rate": 7.471317736764497e-06, "loss": 0.6617, "step": 2206 }, { "epoch": 0.36, "grad_norm": 1.0305658457450282, "learning_rate": 7.46904873057677e-06, "loss": 0.6363, "step": 2207 }, { "epoch": 0.36, "grad_norm": 0.937168114333817, "learning_rate": 7.466779051752107e-06, "loss": 0.5311, "step": 2208 }, { "epoch": 0.36, "grad_norm": 1.0143455204759564, "learning_rate": 7.464508700908836e-06, "loss": 0.569, "step": 2209 }, { "epoch": 0.36, "grad_norm": 1.005320338403505, "learning_rate": 7.4622376786654596e-06, "loss": 0.5877, "step": 2210 }, { "epoch": 0.36, "grad_norm": 0.8966566395654978, "learning_rate": 7.459965985640665e-06, "loss": 0.5638, "step": 2211 }, { "epoch": 0.36, "grad_norm": 1.0295116094114811, "learning_rate": 7.457693622453329e-06, "loss": 0.5714, "step": 2212 }, { "epoch": 0.36, "grad_norm": 0.992821429488196, "learning_rate": 7.455420589722504e-06, "loss": 0.6333, "step": 2213 }, { "epoch": 0.36, "grad_norm": 0.6837528960501983, "learning_rate": 7.453146888067424e-06, "loss": 0.4696, "step": 2214 }, { "epoch": 0.36, "grad_norm": 1.0439748577069399, "learning_rate": 7.450872518107511e-06, "loss": 0.6377, "step": 2215 }, { "epoch": 0.36, "grad_norm": 1.028459715489088, "learning_rate": 7.448597480462366e-06, "loss": 0.6115, "step": 2216 }, { "epoch": 0.36, "grad_norm": 1.0620909463080204, "learning_rate": 7.446321775751772e-06, "loss": 0.6115, "step": 2217 }, { "epoch": 0.36, "grad_norm": 1.013921568198763, "learning_rate": 7.444045404595692e-06, "loss": 0.5665, "step": 2218 }, { "epoch": 0.36, "grad_norm": 0.9817164372032654, "learning_rate": 7.441768367614274e-06, "loss": 0.5139, "step": 2219 }, { "epoch": 0.36, "grad_norm": 0.9564655080395958, "learning_rate": 7.439490665427844e-06, "loss": 0.569, "step": 2220 }, { "epoch": 0.36, "grad_norm": 1.1035241164050016, "learning_rate": 7.437212298656914e-06, "loss": 0.6226, "step": 2221 }, { "epoch": 0.36, "grad_norm": 1.0279100767605824, "learning_rate": 7.434933267922168e-06, "loss": 0.6381, "step": 2222 }, { "epoch": 0.36, "grad_norm": 1.0704208348696829, "learning_rate": 7.432653573844483e-06, "loss": 0.6738, "step": 2223 }, { "epoch": 0.36, "grad_norm": 1.008501522545485, "learning_rate": 7.430373217044909e-06, "loss": 0.6101, "step": 2224 }, { "epoch": 0.36, "grad_norm": 0.9466910912366296, "learning_rate": 7.428092198144674e-06, "loss": 0.5269, "step": 2225 }, { "epoch": 0.36, "grad_norm": 0.680135375678601, "learning_rate": 7.425810517765196e-06, "loss": 0.5063, "step": 2226 }, { "epoch": 0.36, "grad_norm": 0.9697735314082948, "learning_rate": 7.423528176528063e-06, "loss": 0.634, "step": 2227 }, { "epoch": 0.36, "grad_norm": 1.1256424298162675, "learning_rate": 7.4212451750550515e-06, "loss": 0.663, "step": 2228 }, { "epoch": 0.36, "grad_norm": 1.1263628726647679, "learning_rate": 7.418961513968111e-06, "loss": 0.6629, "step": 2229 }, { "epoch": 0.36, "grad_norm": 1.0739052856172258, "learning_rate": 7.416677193889376e-06, "loss": 0.6265, "step": 2230 }, { "epoch": 0.36, "grad_norm": 1.0494167277395259, "learning_rate": 7.4143922154411576e-06, "loss": 0.6321, "step": 2231 }, { "epoch": 0.36, "grad_norm": 0.9797237111807897, "learning_rate": 7.412106579245945e-06, "loss": 0.6439, "step": 2232 }, { "epoch": 0.36, "grad_norm": 0.9536068776665408, "learning_rate": 7.409820285926411e-06, "loss": 0.6505, "step": 2233 }, { "epoch": 0.36, "grad_norm": 0.9168718996040589, "learning_rate": 7.407533336105404e-06, "loss": 0.6062, "step": 2234 }, { "epoch": 0.36, "grad_norm": 1.0781537061886268, "learning_rate": 7.405245730405954e-06, "loss": 0.596, "step": 2235 }, { "epoch": 0.36, "grad_norm": 0.9824668367239546, "learning_rate": 7.402957469451263e-06, "loss": 0.5922, "step": 2236 }, { "epoch": 0.36, "grad_norm": 1.0503225741349496, "learning_rate": 7.4006685538647214e-06, "loss": 0.6386, "step": 2237 }, { "epoch": 0.36, "grad_norm": 0.9934324674450471, "learning_rate": 7.3983789842698894e-06, "loss": 0.5646, "step": 2238 }, { "epoch": 0.36, "grad_norm": 1.0480707553360378, "learning_rate": 7.3960887612905116e-06, "loss": 0.6626, "step": 2239 }, { "epoch": 0.36, "grad_norm": 0.9982594008088528, "learning_rate": 7.393797885550506e-06, "loss": 0.6133, "step": 2240 }, { "epoch": 0.36, "grad_norm": 1.023618108743904, "learning_rate": 7.391506357673972e-06, "loss": 0.655, "step": 2241 }, { "epoch": 0.36, "grad_norm": 0.93858795250245, "learning_rate": 7.389214178285182e-06, "loss": 0.547, "step": 2242 }, { "epoch": 0.36, "grad_norm": 0.9802790735934123, "learning_rate": 7.386921348008592e-06, "loss": 0.5308, "step": 2243 }, { "epoch": 0.36, "grad_norm": 1.1341996182268386, "learning_rate": 7.384627867468831e-06, "loss": 0.6223, "step": 2244 }, { "epoch": 0.36, "grad_norm": 1.0932148014706684, "learning_rate": 7.382333737290708e-06, "loss": 0.5927, "step": 2245 }, { "epoch": 0.36, "grad_norm": 1.0139605520628223, "learning_rate": 7.3800389580992025e-06, "loss": 0.6374, "step": 2246 }, { "epoch": 0.36, "grad_norm": 0.9016158552972428, "learning_rate": 7.37774353051948e-06, "loss": 0.588, "step": 2247 }, { "epoch": 0.36, "grad_norm": 1.0972601640743878, "learning_rate": 7.375447455176877e-06, "loss": 0.6535, "step": 2248 }, { "epoch": 0.36, "grad_norm": 1.0268348104925817, "learning_rate": 7.373150732696907e-06, "loss": 0.6244, "step": 2249 }, { "epoch": 0.36, "grad_norm": 0.9727916070992491, "learning_rate": 7.370853363705261e-06, "loss": 0.5854, "step": 2250 }, { "epoch": 0.36, "grad_norm": 0.9953585514048059, "learning_rate": 7.3685553488278064e-06, "loss": 0.6082, "step": 2251 }, { "epoch": 0.36, "grad_norm": 1.0536536084673818, "learning_rate": 7.366256688690585e-06, "loss": 0.582, "step": 2252 }, { "epoch": 0.36, "grad_norm": 1.0493702495187964, "learning_rate": 7.363957383919815e-06, "loss": 0.5751, "step": 2253 }, { "epoch": 0.36, "grad_norm": 1.174774506365941, "learning_rate": 7.361657435141892e-06, "loss": 0.6766, "step": 2254 }, { "epoch": 0.36, "grad_norm": 1.0670703164117528, "learning_rate": 7.3593568429833825e-06, "loss": 0.6564, "step": 2255 }, { "epoch": 0.36, "grad_norm": 0.9755258278824886, "learning_rate": 7.357055608071034e-06, "loss": 0.5662, "step": 2256 }, { "epoch": 0.36, "grad_norm": 1.0584981662858932, "learning_rate": 7.354753731031765e-06, "loss": 0.6651, "step": 2257 }, { "epoch": 0.36, "grad_norm": 1.121569052863542, "learning_rate": 7.352451212492671e-06, "loss": 0.7275, "step": 2258 }, { "epoch": 0.36, "grad_norm": 1.0476673966220134, "learning_rate": 7.350148053081021e-06, "loss": 0.6499, "step": 2259 }, { "epoch": 0.36, "grad_norm": 1.1155294444916661, "learning_rate": 7.3478442534242565e-06, "loss": 0.6515, "step": 2260 }, { "epoch": 0.36, "grad_norm": 1.0022557002650023, "learning_rate": 7.345539814150002e-06, "loss": 0.6748, "step": 2261 }, { "epoch": 0.36, "grad_norm": 1.0028660692009115, "learning_rate": 7.3432347358860445e-06, "loss": 0.6944, "step": 2262 }, { "epoch": 0.36, "grad_norm": 1.0020792931902724, "learning_rate": 7.340929019260356e-06, "loss": 0.6206, "step": 2263 }, { "epoch": 0.36, "grad_norm": 0.9825732768808498, "learning_rate": 7.338622664901073e-06, "loss": 0.5799, "step": 2264 }, { "epoch": 0.36, "grad_norm": 1.1151849099475757, "learning_rate": 7.336315673436512e-06, "loss": 0.6722, "step": 2265 }, { "epoch": 0.37, "grad_norm": 1.015357200135573, "learning_rate": 7.33400804549516e-06, "loss": 0.5862, "step": 2266 }, { "epoch": 0.37, "grad_norm": 1.0559706548634678, "learning_rate": 7.331699781705679e-06, "loss": 0.6778, "step": 2267 }, { "epoch": 0.37, "grad_norm": 1.0722907487227722, "learning_rate": 7.329390882696904e-06, "loss": 0.702, "step": 2268 }, { "epoch": 0.37, "grad_norm": 1.0439197737276733, "learning_rate": 7.327081349097842e-06, "loss": 0.589, "step": 2269 }, { "epoch": 0.37, "grad_norm": 1.0110139710432422, "learning_rate": 7.324771181537676e-06, "loss": 0.5321, "step": 2270 }, { "epoch": 0.37, "grad_norm": 1.1525438040790654, "learning_rate": 7.322460380645755e-06, "loss": 0.6721, "step": 2271 }, { "epoch": 0.37, "grad_norm": 1.142277853267506, "learning_rate": 7.32014894705161e-06, "loss": 0.5394, "step": 2272 }, { "epoch": 0.37, "grad_norm": 0.7335970205689721, "learning_rate": 7.317836881384934e-06, "loss": 0.5117, "step": 2273 }, { "epoch": 0.37, "grad_norm": 0.9866075911196817, "learning_rate": 7.315524184275602e-06, "loss": 0.6447, "step": 2274 }, { "epoch": 0.37, "grad_norm": 0.917340397854701, "learning_rate": 7.313210856353653e-06, "loss": 0.5889, "step": 2275 }, { "epoch": 0.37, "grad_norm": 0.9569878512424859, "learning_rate": 7.310896898249303e-06, "loss": 0.613, "step": 2276 }, { "epoch": 0.37, "grad_norm": 1.088564601700148, "learning_rate": 7.3085823105929385e-06, "loss": 0.6819, "step": 2277 }, { "epoch": 0.37, "grad_norm": 0.9899626356098077, "learning_rate": 7.306267094015116e-06, "loss": 0.6117, "step": 2278 }, { "epoch": 0.37, "grad_norm": 0.9039913760438909, "learning_rate": 7.303951249146563e-06, "loss": 0.51, "step": 2279 }, { "epoch": 0.37, "grad_norm": 1.0549402674171047, "learning_rate": 7.301634776618183e-06, "loss": 0.7116, "step": 2280 }, { "epoch": 0.37, "grad_norm": 0.9035697292294355, "learning_rate": 7.299317677061043e-06, "loss": 0.5377, "step": 2281 }, { "epoch": 0.37, "grad_norm": 0.9895724294011012, "learning_rate": 7.2969999511063875e-06, "loss": 0.5844, "step": 2282 }, { "epoch": 0.37, "grad_norm": 0.9455445325187327, "learning_rate": 7.294681599385629e-06, "loss": 0.5867, "step": 2283 }, { "epoch": 0.37, "grad_norm": 1.064716577101577, "learning_rate": 7.292362622530347e-06, "loss": 0.6012, "step": 2284 }, { "epoch": 0.37, "grad_norm": 1.0295923136773983, "learning_rate": 7.2900430211723e-06, "loss": 0.6681, "step": 2285 }, { "epoch": 0.37, "grad_norm": 1.118860441117518, "learning_rate": 7.287722795943407e-06, "loss": 0.6798, "step": 2286 }, { "epoch": 0.37, "grad_norm": 1.0525432332213427, "learning_rate": 7.285401947475764e-06, "loss": 0.5869, "step": 2287 }, { "epoch": 0.37, "grad_norm": 0.975589404865857, "learning_rate": 7.283080476401634e-06, "loss": 0.5738, "step": 2288 }, { "epoch": 0.37, "grad_norm": 1.0006130130616144, "learning_rate": 7.280758383353447e-06, "loss": 0.6825, "step": 2289 }, { "epoch": 0.37, "grad_norm": 1.0029200734741783, "learning_rate": 7.278435668963807e-06, "loss": 0.5513, "step": 2290 }, { "epoch": 0.37, "grad_norm": 1.043338981883033, "learning_rate": 7.276112333865485e-06, "loss": 0.6003, "step": 2291 }, { "epoch": 0.37, "grad_norm": 1.0401236749370717, "learning_rate": 7.273788378691422e-06, "loss": 0.6626, "step": 2292 }, { "epoch": 0.37, "grad_norm": 0.6890032070859601, "learning_rate": 7.271463804074728e-06, "loss": 0.4936, "step": 2293 }, { "epoch": 0.37, "grad_norm": 0.9756682529932238, "learning_rate": 7.26913861064868e-06, "loss": 0.5768, "step": 2294 }, { "epoch": 0.37, "grad_norm": 0.9935930566997946, "learning_rate": 7.2668127990467266e-06, "loss": 0.6365, "step": 2295 }, { "epoch": 0.37, "grad_norm": 1.0855618397764537, "learning_rate": 7.26448636990248e-06, "loss": 0.6834, "step": 2296 }, { "epoch": 0.37, "grad_norm": 1.066300092063712, "learning_rate": 7.262159323849725e-06, "loss": 0.6713, "step": 2297 }, { "epoch": 0.37, "grad_norm": 1.029760753288409, "learning_rate": 7.259831661522415e-06, "loss": 0.6249, "step": 2298 }, { "epoch": 0.37, "grad_norm": 0.7170706348975647, "learning_rate": 7.257503383554668e-06, "loss": 0.4782, "step": 2299 }, { "epoch": 0.37, "grad_norm": 1.038851938883799, "learning_rate": 7.25517449058077e-06, "loss": 0.6068, "step": 2300 }, { "epoch": 0.37, "grad_norm": 0.9479056382578903, "learning_rate": 7.252844983235177e-06, "loss": 0.547, "step": 2301 }, { "epoch": 0.37, "grad_norm": 1.044590042440595, "learning_rate": 7.250514862152509e-06, "loss": 0.5581, "step": 2302 }, { "epoch": 0.37, "grad_norm": 1.0397548846106945, "learning_rate": 7.248184127967557e-06, "loss": 0.6095, "step": 2303 }, { "epoch": 0.37, "grad_norm": 0.9838879368610903, "learning_rate": 7.245852781315278e-06, "loss": 0.6542, "step": 2304 }, { "epoch": 0.37, "grad_norm": 0.9953804012244276, "learning_rate": 7.243520822830794e-06, "loss": 0.6014, "step": 2305 }, { "epoch": 0.37, "grad_norm": 0.9967313948299399, "learning_rate": 7.241188253149395e-06, "loss": 0.5697, "step": 2306 }, { "epoch": 0.37, "grad_norm": 1.0202032628876836, "learning_rate": 7.238855072906537e-06, "loss": 0.6795, "step": 2307 }, { "epoch": 0.37, "grad_norm": 0.6526329759037361, "learning_rate": 7.236521282737842e-06, "loss": 0.4914, "step": 2308 }, { "epoch": 0.37, "grad_norm": 0.9928526682224015, "learning_rate": 7.2341868832791e-06, "loss": 0.5976, "step": 2309 }, { "epoch": 0.37, "grad_norm": 1.0281525604653499, "learning_rate": 7.231851875166266e-06, "loss": 0.5862, "step": 2310 }, { "epoch": 0.37, "grad_norm": 0.9643959959834605, "learning_rate": 7.229516259035458e-06, "loss": 0.5179, "step": 2311 }, { "epoch": 0.37, "grad_norm": 1.0602900575715508, "learning_rate": 7.227180035522966e-06, "loss": 0.6703, "step": 2312 }, { "epoch": 0.37, "grad_norm": 1.0425834460317396, "learning_rate": 7.224843205265239e-06, "loss": 0.5748, "step": 2313 }, { "epoch": 0.37, "grad_norm": 1.0754807284672718, "learning_rate": 7.222505768898894e-06, "loss": 0.62, "step": 2314 }, { "epoch": 0.37, "grad_norm": 0.6700798805359844, "learning_rate": 7.220167727060714e-06, "loss": 0.5045, "step": 2315 }, { "epoch": 0.37, "grad_norm": 1.0724473126007261, "learning_rate": 7.217829080387648e-06, "loss": 0.6219, "step": 2316 }, { "epoch": 0.37, "grad_norm": 1.0305351586745446, "learning_rate": 7.2154898295168045e-06, "loss": 0.6645, "step": 2317 }, { "epoch": 0.37, "grad_norm": 1.0803282059123502, "learning_rate": 7.21314997508546e-06, "loss": 0.645, "step": 2318 }, { "epoch": 0.37, "grad_norm": 0.9930170025838548, "learning_rate": 7.210809517731057e-06, "loss": 0.5948, "step": 2319 }, { "epoch": 0.37, "grad_norm": 1.0191853221584506, "learning_rate": 7.2084684580912e-06, "loss": 0.5696, "step": 2320 }, { "epoch": 0.37, "grad_norm": 1.0341977117673886, "learning_rate": 7.206126796803659e-06, "loss": 0.6438, "step": 2321 }, { "epoch": 0.37, "grad_norm": 1.176069048803107, "learning_rate": 7.203784534506364e-06, "loss": 0.6832, "step": 2322 }, { "epoch": 0.37, "grad_norm": 0.9895004832667399, "learning_rate": 7.201441671837417e-06, "loss": 0.6319, "step": 2323 }, { "epoch": 0.37, "grad_norm": 0.9318644353326777, "learning_rate": 7.199098209435073e-06, "loss": 0.6052, "step": 2324 }, { "epoch": 0.37, "grad_norm": 0.6971038331340554, "learning_rate": 7.19675414793776e-06, "loss": 0.5093, "step": 2325 }, { "epoch": 0.37, "grad_norm": 1.0241133214838547, "learning_rate": 7.194409487984063e-06, "loss": 0.5799, "step": 2326 }, { "epoch": 0.37, "grad_norm": 1.0192936716129128, "learning_rate": 7.192064230212733e-06, "loss": 0.5784, "step": 2327 }, { "epoch": 0.38, "grad_norm": 1.128273949257954, "learning_rate": 7.189718375262681e-06, "loss": 0.6741, "step": 2328 }, { "epoch": 0.38, "grad_norm": 1.0169799227839607, "learning_rate": 7.1873719237729835e-06, "loss": 0.6721, "step": 2329 }, { "epoch": 0.38, "grad_norm": 1.0970038244967593, "learning_rate": 7.18502487638288e-06, "loss": 0.6683, "step": 2330 }, { "epoch": 0.38, "grad_norm": 1.0514332467639362, "learning_rate": 7.182677233731768e-06, "loss": 0.6985, "step": 2331 }, { "epoch": 0.38, "grad_norm": 0.9860533829956002, "learning_rate": 7.1803289964592115e-06, "loss": 0.6123, "step": 2332 }, { "epoch": 0.38, "grad_norm": 0.9415702344101353, "learning_rate": 7.177980165204935e-06, "loss": 0.6253, "step": 2333 }, { "epoch": 0.38, "grad_norm": 0.981775889655673, "learning_rate": 7.1756307406088275e-06, "loss": 0.67, "step": 2334 }, { "epoch": 0.38, "grad_norm": 1.0172659585057755, "learning_rate": 7.173280723310932e-06, "loss": 0.6384, "step": 2335 }, { "epoch": 0.38, "grad_norm": 1.0760415073559442, "learning_rate": 7.170930113951462e-06, "loss": 0.6651, "step": 2336 }, { "epoch": 0.38, "grad_norm": 1.1222808027226474, "learning_rate": 7.168578913170788e-06, "loss": 0.6946, "step": 2337 }, { "epoch": 0.38, "grad_norm": 1.03926099578737, "learning_rate": 7.166227121609439e-06, "loss": 0.608, "step": 2338 }, { "epoch": 0.38, "grad_norm": 0.951246740193479, "learning_rate": 7.16387473990811e-06, "loss": 0.5662, "step": 2339 }, { "epoch": 0.38, "grad_norm": 0.9764330286390912, "learning_rate": 7.1615217687076555e-06, "loss": 0.5922, "step": 2340 }, { "epoch": 0.38, "grad_norm": 1.0536564721478936, "learning_rate": 7.159168208649086e-06, "loss": 0.659, "step": 2341 }, { "epoch": 0.38, "grad_norm": 0.9477427708930591, "learning_rate": 7.15681406037358e-06, "loss": 0.4902, "step": 2342 }, { "epoch": 0.38, "grad_norm": 1.0462160185072122, "learning_rate": 7.154459324522474e-06, "loss": 0.6021, "step": 2343 }, { "epoch": 0.38, "grad_norm": 1.0157462260641292, "learning_rate": 7.152104001737254e-06, "loss": 0.6139, "step": 2344 }, { "epoch": 0.38, "grad_norm": 1.0722983663725614, "learning_rate": 7.149748092659585e-06, "loss": 0.7137, "step": 2345 }, { "epoch": 0.38, "grad_norm": 0.940472714708764, "learning_rate": 7.147391597931277e-06, "loss": 0.5548, "step": 2346 }, { "epoch": 0.38, "grad_norm": 0.7476040474905417, "learning_rate": 7.145034518194304e-06, "loss": 0.5133, "step": 2347 }, { "epoch": 0.38, "grad_norm": 0.6878141988429427, "learning_rate": 7.1426768540908e-06, "loss": 0.4676, "step": 2348 }, { "epoch": 0.38, "grad_norm": 1.0651734573065943, "learning_rate": 7.140318606263058e-06, "loss": 0.635, "step": 2349 }, { "epoch": 0.38, "grad_norm": 1.0337203632659062, "learning_rate": 7.137959775353529e-06, "loss": 0.5942, "step": 2350 }, { "epoch": 0.38, "grad_norm": 1.0728951921094223, "learning_rate": 7.135600362004824e-06, "loss": 0.7703, "step": 2351 }, { "epoch": 0.38, "grad_norm": 1.1055960475929942, "learning_rate": 7.133240366859713e-06, "loss": 0.6393, "step": 2352 }, { "epoch": 0.38, "grad_norm": 0.9667227418057721, "learning_rate": 7.130879790561122e-06, "loss": 0.6511, "step": 2353 }, { "epoch": 0.38, "grad_norm": 0.7663740701338713, "learning_rate": 7.128518633752139e-06, "loss": 0.4795, "step": 2354 }, { "epoch": 0.38, "grad_norm": 0.8530005604396605, "learning_rate": 7.126156897076005e-06, "loss": 0.5593, "step": 2355 }, { "epoch": 0.38, "grad_norm": 1.0781745990924998, "learning_rate": 7.123794581176127e-06, "loss": 0.6442, "step": 2356 }, { "epoch": 0.38, "grad_norm": 0.6999974646325233, "learning_rate": 7.121431686696061e-06, "loss": 0.4698, "step": 2357 }, { "epoch": 0.38, "grad_norm": 1.0579815294938772, "learning_rate": 7.119068214279525e-06, "loss": 0.5452, "step": 2358 }, { "epoch": 0.38, "grad_norm": 1.1276000233987107, "learning_rate": 7.116704164570398e-06, "loss": 0.6478, "step": 2359 }, { "epoch": 0.38, "grad_norm": 1.025832808921346, "learning_rate": 7.114339538212707e-06, "loss": 0.7092, "step": 2360 }, { "epoch": 0.38, "grad_norm": 0.9989034423337582, "learning_rate": 7.111974335850644e-06, "loss": 0.5454, "step": 2361 }, { "epoch": 0.38, "grad_norm": 0.6376899780931057, "learning_rate": 7.1096085581285555e-06, "loss": 0.518, "step": 2362 }, { "epoch": 0.38, "grad_norm": 1.0496611821914672, "learning_rate": 7.1072422056909426e-06, "loss": 0.5956, "step": 2363 }, { "epoch": 0.38, "grad_norm": 1.0888408750761576, "learning_rate": 7.104875279182468e-06, "loss": 0.7206, "step": 2364 }, { "epoch": 0.38, "grad_norm": 0.993143639084888, "learning_rate": 7.102507779247947e-06, "loss": 0.5604, "step": 2365 }, { "epoch": 0.38, "grad_norm": 1.056484529223308, "learning_rate": 7.100139706532347e-06, "loss": 0.7084, "step": 2366 }, { "epoch": 0.38, "grad_norm": 1.1199644604555272, "learning_rate": 7.097771061680805e-06, "loss": 0.7009, "step": 2367 }, { "epoch": 0.38, "grad_norm": 1.0338586201100615, "learning_rate": 7.095401845338598e-06, "loss": 0.612, "step": 2368 }, { "epoch": 0.38, "grad_norm": 1.0110404456672186, "learning_rate": 7.093032058151168e-06, "loss": 0.5755, "step": 2369 }, { "epoch": 0.38, "grad_norm": 0.7066641201528922, "learning_rate": 7.090661700764112e-06, "loss": 0.5029, "step": 2370 }, { "epoch": 0.38, "grad_norm": 1.0042353068904448, "learning_rate": 7.088290773823177e-06, "loss": 0.6437, "step": 2371 }, { "epoch": 0.38, "grad_norm": 0.9138896162531205, "learning_rate": 7.085919277974274e-06, "loss": 0.4887, "step": 2372 }, { "epoch": 0.38, "grad_norm": 0.9934873959798128, "learning_rate": 7.083547213863458e-06, "loss": 0.6607, "step": 2373 }, { "epoch": 0.38, "grad_norm": 1.087857260483321, "learning_rate": 7.0811745821369495e-06, "loss": 0.6244, "step": 2374 }, { "epoch": 0.38, "grad_norm": 0.9438686380348272, "learning_rate": 7.0788013834411165e-06, "loss": 0.529, "step": 2375 }, { "epoch": 0.38, "grad_norm": 1.092078836695521, "learning_rate": 7.0764276184224845e-06, "loss": 0.6282, "step": 2376 }, { "epoch": 0.38, "grad_norm": 1.031629186140058, "learning_rate": 7.07405328772773e-06, "loss": 0.6555, "step": 2377 }, { "epoch": 0.38, "grad_norm": 0.9780903942633322, "learning_rate": 7.071678392003691e-06, "loss": 0.6471, "step": 2378 }, { "epoch": 0.38, "grad_norm": 1.0456899084735127, "learning_rate": 7.069302931897352e-06, "loss": 0.6171, "step": 2379 }, { "epoch": 0.38, "grad_norm": 0.9588691913296155, "learning_rate": 7.0669269080558515e-06, "loss": 0.5885, "step": 2380 }, { "epoch": 0.38, "grad_norm": 1.0385791001888502, "learning_rate": 7.064550321126489e-06, "loss": 0.6038, "step": 2381 }, { "epoch": 0.38, "grad_norm": 0.6861688099958749, "learning_rate": 7.0621731717567055e-06, "loss": 0.4653, "step": 2382 }, { "epoch": 0.38, "grad_norm": 1.0446725209149121, "learning_rate": 7.059795460594109e-06, "loss": 0.6505, "step": 2383 }, { "epoch": 0.38, "grad_norm": 0.9220015695075581, "learning_rate": 7.057417188286449e-06, "loss": 0.5945, "step": 2384 }, { "epoch": 0.38, "grad_norm": 0.9625529045516624, "learning_rate": 7.0550383554816345e-06, "loss": 0.6274, "step": 2385 }, { "epoch": 0.38, "grad_norm": 0.9439610736644684, "learning_rate": 7.052658962827724e-06, "loss": 0.6151, "step": 2386 }, { "epoch": 0.38, "grad_norm": 1.0591847510302013, "learning_rate": 7.050279010972932e-06, "loss": 0.6301, "step": 2387 }, { "epoch": 0.38, "grad_norm": 1.0462506749042528, "learning_rate": 7.047898500565619e-06, "loss": 0.5891, "step": 2388 }, { "epoch": 0.38, "grad_norm": 0.6435080393858074, "learning_rate": 7.045517432254304e-06, "loss": 0.444, "step": 2389 }, { "epoch": 0.39, "grad_norm": 0.9358529603801624, "learning_rate": 7.043135806687655e-06, "loss": 0.6055, "step": 2390 }, { "epoch": 0.39, "grad_norm": 1.0585678958383646, "learning_rate": 7.040753624514494e-06, "loss": 0.6812, "step": 2391 }, { "epoch": 0.39, "grad_norm": 0.9806199373411311, "learning_rate": 7.038370886383793e-06, "loss": 0.5832, "step": 2392 }, { "epoch": 0.39, "grad_norm": 0.9960809853376439, "learning_rate": 7.035987592944672e-06, "loss": 0.5949, "step": 2393 }, { "epoch": 0.39, "grad_norm": 0.9371568186470512, "learning_rate": 7.03360374484641e-06, "loss": 0.6317, "step": 2394 }, { "epoch": 0.39, "grad_norm": 1.0861361535105436, "learning_rate": 7.031219342738431e-06, "loss": 0.6498, "step": 2395 }, { "epoch": 0.39, "grad_norm": 0.9834551502830807, "learning_rate": 7.028834387270311e-06, "loss": 0.5666, "step": 2396 }, { "epoch": 0.39, "grad_norm": 1.0212889032906192, "learning_rate": 7.02644887909178e-06, "loss": 0.5985, "step": 2397 }, { "epoch": 0.39, "grad_norm": 1.005482585269343, "learning_rate": 7.024062818852716e-06, "loss": 0.5555, "step": 2398 }, { "epoch": 0.39, "grad_norm": 0.9473593003588137, "learning_rate": 7.021676207203145e-06, "loss": 0.5761, "step": 2399 }, { "epoch": 0.39, "grad_norm": 1.0196190720589509, "learning_rate": 7.019289044793247e-06, "loss": 0.5582, "step": 2400 }, { "epoch": 0.39, "grad_norm": 1.129195407577771, "learning_rate": 7.016901332273352e-06, "loss": 0.7053, "step": 2401 }, { "epoch": 0.39, "grad_norm": 1.0365452022713293, "learning_rate": 7.014513070293938e-06, "loss": 0.6153, "step": 2402 }, { "epoch": 0.39, "grad_norm": 1.0545770627082933, "learning_rate": 7.012124259505633e-06, "loss": 0.5732, "step": 2403 }, { "epoch": 0.39, "grad_norm": 0.9899008600664705, "learning_rate": 7.0097349005592145e-06, "loss": 0.571, "step": 2404 }, { "epoch": 0.39, "grad_norm": 0.9701003512959118, "learning_rate": 7.007344994105612e-06, "loss": 0.5922, "step": 2405 }, { "epoch": 0.39, "grad_norm": 1.0777204599898036, "learning_rate": 7.004954540795899e-06, "loss": 0.6939, "step": 2406 }, { "epoch": 0.39, "grad_norm": 0.9446134143788822, "learning_rate": 7.002563541281302e-06, "loss": 0.6094, "step": 2407 }, { "epoch": 0.39, "grad_norm": 0.970206267487964, "learning_rate": 7.000171996213196e-06, "loss": 0.5696, "step": 2408 }, { "epoch": 0.39, "grad_norm": 1.027960703484031, "learning_rate": 6.997779906243103e-06, "loss": 0.5221, "step": 2409 }, { "epoch": 0.39, "grad_norm": 1.0585753660298387, "learning_rate": 6.995387272022695e-06, "loss": 0.5642, "step": 2410 }, { "epoch": 0.39, "grad_norm": 1.0286105451501615, "learning_rate": 6.99299409420379e-06, "loss": 0.6874, "step": 2411 }, { "epoch": 0.39, "grad_norm": 0.8940586357595608, "learning_rate": 6.9906003734383565e-06, "loss": 0.5707, "step": 2412 }, { "epoch": 0.39, "grad_norm": 1.003678956389507, "learning_rate": 6.98820611037851e-06, "loss": 0.674, "step": 2413 }, { "epoch": 0.39, "grad_norm": 0.9909017986769085, "learning_rate": 6.985811305676515e-06, "loss": 0.6255, "step": 2414 }, { "epoch": 0.39, "grad_norm": 1.0267431188087748, "learning_rate": 6.98341595998478e-06, "loss": 0.5667, "step": 2415 }, { "epoch": 0.39, "grad_norm": 0.9549869812343352, "learning_rate": 6.981020073955866e-06, "loss": 0.5229, "step": 2416 }, { "epoch": 0.39, "grad_norm": 1.0405645009970432, "learning_rate": 6.978623648242474e-06, "loss": 0.6078, "step": 2417 }, { "epoch": 0.39, "grad_norm": 1.0266299686590368, "learning_rate": 6.9762266834974605e-06, "loss": 0.6601, "step": 2418 }, { "epoch": 0.39, "grad_norm": 1.0920923884683573, "learning_rate": 6.973829180373823e-06, "loss": 0.582, "step": 2419 }, { "epoch": 0.39, "grad_norm": 1.0210098897673845, "learning_rate": 6.971431139524709e-06, "loss": 0.6358, "step": 2420 }, { "epoch": 0.39, "grad_norm": 1.0695916584153915, "learning_rate": 6.96903256160341e-06, "loss": 0.5862, "step": 2421 }, { "epoch": 0.39, "grad_norm": 0.9852205104796034, "learning_rate": 6.966633447263362e-06, "loss": 0.6132, "step": 2422 }, { "epoch": 0.39, "grad_norm": 0.9778435179856887, "learning_rate": 6.964233797158155e-06, "loss": 0.5942, "step": 2423 }, { "epoch": 0.39, "grad_norm": 1.05774224895171, "learning_rate": 6.961833611941515e-06, "loss": 0.5974, "step": 2424 }, { "epoch": 0.39, "grad_norm": 1.1036777854551643, "learning_rate": 6.959432892267324e-06, "loss": 0.5907, "step": 2425 }, { "epoch": 0.39, "grad_norm": 1.0125481852489482, "learning_rate": 6.957031638789598e-06, "loss": 0.5499, "step": 2426 }, { "epoch": 0.39, "grad_norm": 0.6909967251649658, "learning_rate": 6.954629852162509e-06, "loss": 0.4617, "step": 2427 }, { "epoch": 0.39, "grad_norm": 1.0469551907163064, "learning_rate": 6.952227533040369e-06, "loss": 0.6015, "step": 2428 }, { "epoch": 0.39, "grad_norm": 1.0748351567085235, "learning_rate": 6.949824682077635e-06, "loss": 0.6474, "step": 2429 }, { "epoch": 0.39, "grad_norm": 0.9342974819635594, "learning_rate": 6.947421299928909e-06, "loss": 0.5626, "step": 2430 }, { "epoch": 0.39, "grad_norm": 1.0208567590937923, "learning_rate": 6.945017387248942e-06, "loss": 0.5843, "step": 2431 }, { "epoch": 0.39, "grad_norm": 0.9491646650236966, "learning_rate": 6.942612944692624e-06, "loss": 0.5344, "step": 2432 }, { "epoch": 0.39, "grad_norm": 1.0033886204305744, "learning_rate": 6.940207972914989e-06, "loss": 0.5614, "step": 2433 }, { "epoch": 0.39, "grad_norm": 1.0115612725130536, "learning_rate": 6.9378024725712225e-06, "loss": 0.5595, "step": 2434 }, { "epoch": 0.39, "grad_norm": 0.9533654936049077, "learning_rate": 6.935396444316646e-06, "loss": 0.5382, "step": 2435 }, { "epoch": 0.39, "grad_norm": 0.6829490251946648, "learning_rate": 6.9329898888067295e-06, "loss": 0.4871, "step": 2436 }, { "epoch": 0.39, "grad_norm": 0.7897422233734538, "learning_rate": 6.930582806697082e-06, "loss": 0.4901, "step": 2437 }, { "epoch": 0.39, "grad_norm": 1.0770851751663981, "learning_rate": 6.928175198643463e-06, "loss": 0.6282, "step": 2438 }, { "epoch": 0.39, "grad_norm": 0.9399303558611184, "learning_rate": 6.9257670653017674e-06, "loss": 0.5918, "step": 2439 }, { "epoch": 0.39, "grad_norm": 0.9824994897873479, "learning_rate": 6.92335840732804e-06, "loss": 0.5416, "step": 2440 }, { "epoch": 0.39, "grad_norm": 1.008968292114502, "learning_rate": 6.9209492253784664e-06, "loss": 0.6124, "step": 2441 }, { "epoch": 0.39, "grad_norm": 1.0217020662128589, "learning_rate": 6.91853952010937e-06, "loss": 0.6114, "step": 2442 }, { "epoch": 0.39, "grad_norm": 1.0667125506308615, "learning_rate": 6.916129292177225e-06, "loss": 0.7157, "step": 2443 }, { "epoch": 0.39, "grad_norm": 1.035584760366978, "learning_rate": 6.913718542238642e-06, "loss": 0.6149, "step": 2444 }, { "epoch": 0.39, "grad_norm": 1.073085884584207, "learning_rate": 6.911307270950376e-06, "loss": 0.5777, "step": 2445 }, { "epoch": 0.39, "grad_norm": 1.0086918945637111, "learning_rate": 6.908895478969324e-06, "loss": 0.6354, "step": 2446 }, { "epoch": 0.39, "grad_norm": 1.0823716969148602, "learning_rate": 6.906483166952526e-06, "loss": 0.6626, "step": 2447 }, { "epoch": 0.39, "grad_norm": 0.996536880399445, "learning_rate": 6.904070335557158e-06, "loss": 0.6027, "step": 2448 }, { "epoch": 0.39, "grad_norm": 0.9309828118928946, "learning_rate": 6.9016569854405466e-06, "loss": 0.5478, "step": 2449 }, { "epoch": 0.39, "grad_norm": 0.9216296862896817, "learning_rate": 6.899243117260153e-06, "loss": 0.568, "step": 2450 }, { "epoch": 0.39, "grad_norm": 1.003238865979167, "learning_rate": 6.896828731673579e-06, "loss": 0.6199, "step": 2451 }, { "epoch": 0.4, "grad_norm": 1.000618321119245, "learning_rate": 6.894413829338576e-06, "loss": 0.66, "step": 2452 }, { "epoch": 0.4, "grad_norm": 0.9693475378799835, "learning_rate": 6.891998410913021e-06, "loss": 0.6286, "step": 2453 }, { "epoch": 0.4, "grad_norm": 1.029799804230898, "learning_rate": 6.88958247705495e-06, "loss": 0.6492, "step": 2454 }, { "epoch": 0.4, "grad_norm": 1.106307507461913, "learning_rate": 6.887166028422524e-06, "loss": 0.5799, "step": 2455 }, { "epoch": 0.4, "grad_norm": 1.0998126940588364, "learning_rate": 6.884749065674051e-06, "loss": 0.6056, "step": 2456 }, { "epoch": 0.4, "grad_norm": 0.9931039313121502, "learning_rate": 6.88233158946798e-06, "loss": 0.5835, "step": 2457 }, { "epoch": 0.4, "grad_norm": 1.0717356054480864, "learning_rate": 6.879913600462898e-06, "loss": 0.6554, "step": 2458 }, { "epoch": 0.4, "grad_norm": 1.0016209251132813, "learning_rate": 6.87749509931753e-06, "loss": 0.5684, "step": 2459 }, { "epoch": 0.4, "grad_norm": 1.0665726340574677, "learning_rate": 6.875076086690744e-06, "loss": 0.6602, "step": 2460 }, { "epoch": 0.4, "grad_norm": 0.9987147061928905, "learning_rate": 6.8726565632415445e-06, "loss": 0.5686, "step": 2461 }, { "epoch": 0.4, "grad_norm": 0.9347817546836513, "learning_rate": 6.8702365296290775e-06, "loss": 0.6116, "step": 2462 }, { "epoch": 0.4, "grad_norm": 0.9682285847422266, "learning_rate": 6.867815986512627e-06, "loss": 0.5283, "step": 2463 }, { "epoch": 0.4, "grad_norm": 1.041555614335137, "learning_rate": 6.865394934551613e-06, "loss": 0.6581, "step": 2464 }, { "epoch": 0.4, "grad_norm": 0.9995520006877903, "learning_rate": 6.862973374405601e-06, "loss": 0.5565, "step": 2465 }, { "epoch": 0.4, "grad_norm": 0.9553043661881615, "learning_rate": 6.860551306734289e-06, "loss": 0.6583, "step": 2466 }, { "epoch": 0.4, "grad_norm": 1.0315210481722183, "learning_rate": 6.858128732197513e-06, "loss": 0.7132, "step": 2467 }, { "epoch": 0.4, "grad_norm": 0.9846072308797008, "learning_rate": 6.855705651455252e-06, "loss": 0.6106, "step": 2468 }, { "epoch": 0.4, "grad_norm": 1.0811711598343194, "learning_rate": 6.853282065167618e-06, "loss": 0.6422, "step": 2469 }, { "epoch": 0.4, "grad_norm": 1.0907309562695893, "learning_rate": 6.850857973994865e-06, "loss": 0.6878, "step": 2470 }, { "epoch": 0.4, "grad_norm": 0.952879298508714, "learning_rate": 6.8484333785973786e-06, "loss": 0.61, "step": 2471 }, { "epoch": 0.4, "grad_norm": 1.058760380792588, "learning_rate": 6.846008279635688e-06, "loss": 0.6212, "step": 2472 }, { "epoch": 0.4, "grad_norm": 1.0080977940179607, "learning_rate": 6.8435826777704575e-06, "loss": 0.5835, "step": 2473 }, { "epoch": 0.4, "grad_norm": 1.0899744275175063, "learning_rate": 6.841156573662486e-06, "loss": 0.6517, "step": 2474 }, { "epoch": 0.4, "grad_norm": 1.0085032895336206, "learning_rate": 6.8387299679727125e-06, "loss": 0.6277, "step": 2475 }, { "epoch": 0.4, "grad_norm": 1.1161474652349777, "learning_rate": 6.836302861362211e-06, "loss": 0.6935, "step": 2476 }, { "epoch": 0.4, "grad_norm": 0.9561434316333456, "learning_rate": 6.8338752544921915e-06, "loss": 0.584, "step": 2477 }, { "epoch": 0.4, "grad_norm": 1.0369621984465147, "learning_rate": 6.831447148024002e-06, "loss": 0.6416, "step": 2478 }, { "epoch": 0.4, "grad_norm": 0.9831594166896802, "learning_rate": 6.829018542619125e-06, "loss": 0.6787, "step": 2479 }, { "epoch": 0.4, "grad_norm": 0.8515645803453231, "learning_rate": 6.82658943893918e-06, "loss": 0.4987, "step": 2480 }, { "epoch": 0.4, "grad_norm": 1.0602170636627577, "learning_rate": 6.824159837645921e-06, "loss": 0.5948, "step": 2481 }, { "epoch": 0.4, "grad_norm": 1.1235667182021651, "learning_rate": 6.821729739401239e-06, "loss": 0.6898, "step": 2482 }, { "epoch": 0.4, "grad_norm": 0.9200206686011301, "learning_rate": 6.8192991448671605e-06, "loss": 0.538, "step": 2483 }, { "epoch": 0.4, "grad_norm": 1.0213212703751973, "learning_rate": 6.8168680547058455e-06, "loss": 0.5104, "step": 2484 }, { "epoch": 0.4, "grad_norm": 0.7255726756540283, "learning_rate": 6.81443646957959e-06, "loss": 0.4639, "step": 2485 }, { "epoch": 0.4, "grad_norm": 1.0265385465932637, "learning_rate": 6.812004390150825e-06, "loss": 0.6828, "step": 2486 }, { "epoch": 0.4, "grad_norm": 0.9685166294120701, "learning_rate": 6.809571817082117e-06, "loss": 0.6158, "step": 2487 }, { "epoch": 0.4, "grad_norm": 0.9651939791120863, "learning_rate": 6.807138751036163e-06, "loss": 0.6089, "step": 2488 }, { "epoch": 0.4, "grad_norm": 1.0473639433561777, "learning_rate": 6.804705192675799e-06, "loss": 0.6069, "step": 2489 }, { "epoch": 0.4, "grad_norm": 0.9550060941541216, "learning_rate": 6.802271142663994e-06, "loss": 0.5115, "step": 2490 }, { "epoch": 0.4, "grad_norm": 0.9774494673745749, "learning_rate": 6.799836601663851e-06, "loss": 0.6043, "step": 2491 }, { "epoch": 0.4, "grad_norm": 0.9417199926922714, "learning_rate": 6.797401570338604e-06, "loss": 0.5986, "step": 2492 }, { "epoch": 0.4, "grad_norm": 0.9403230727109817, "learning_rate": 6.794966049351625e-06, "loss": 0.5489, "step": 2493 }, { "epoch": 0.4, "grad_norm": 1.0420091789920436, "learning_rate": 6.792530039366414e-06, "loss": 0.7169, "step": 2494 }, { "epoch": 0.4, "grad_norm": 0.9880195015039127, "learning_rate": 6.790093541046609e-06, "loss": 0.5736, "step": 2495 }, { "epoch": 0.4, "grad_norm": 0.9606500870149525, "learning_rate": 6.787656555055979e-06, "loss": 0.5969, "step": 2496 }, { "epoch": 0.4, "grad_norm": 0.9948106035363221, "learning_rate": 6.785219082058426e-06, "loss": 0.6565, "step": 2497 }, { "epoch": 0.4, "grad_norm": 0.9872436683254563, "learning_rate": 6.782781122717987e-06, "loss": 0.5967, "step": 2498 }, { "epoch": 0.4, "grad_norm": 1.1118934956516229, "learning_rate": 6.780342677698826e-06, "loss": 0.6268, "step": 2499 }, { "epoch": 0.4, "grad_norm": 1.0499605169795165, "learning_rate": 6.777903747665245e-06, "loss": 0.5868, "step": 2500 }, { "epoch": 0.4, "grad_norm": 1.0579897809382857, "learning_rate": 6.775464333281674e-06, "loss": 0.6756, "step": 2501 }, { "epoch": 0.4, "grad_norm": 1.063498562823953, "learning_rate": 6.773024435212678e-06, "loss": 0.7748, "step": 2502 }, { "epoch": 0.4, "grad_norm": 0.9499005028851514, "learning_rate": 6.770584054122954e-06, "loss": 0.576, "step": 2503 }, { "epoch": 0.4, "grad_norm": 0.9903775626135637, "learning_rate": 6.7681431906773255e-06, "loss": 0.641, "step": 2504 }, { "epoch": 0.4, "grad_norm": 1.0776084816022675, "learning_rate": 6.765701845540753e-06, "loss": 0.6238, "step": 2505 }, { "epoch": 0.4, "grad_norm": 1.0119200822677583, "learning_rate": 6.763260019378325e-06, "loss": 0.6679, "step": 2506 }, { "epoch": 0.4, "grad_norm": 0.9799769655119922, "learning_rate": 6.760817712855266e-06, "loss": 0.5822, "step": 2507 }, { "epoch": 0.4, "grad_norm": 1.0051958202205935, "learning_rate": 6.75837492663692e-06, "loss": 0.5734, "step": 2508 }, { "epoch": 0.4, "grad_norm": 0.9384966957369636, "learning_rate": 6.755931661388778e-06, "loss": 0.4679, "step": 2509 }, { "epoch": 0.4, "grad_norm": 1.007386600122028, "learning_rate": 6.753487917776447e-06, "loss": 0.5655, "step": 2510 }, { "epoch": 0.4, "grad_norm": 1.0806513266957567, "learning_rate": 6.751043696465674e-06, "loss": 0.6519, "step": 2511 }, { "epoch": 0.4, "grad_norm": 1.0542621529396017, "learning_rate": 6.748598998122328e-06, "loss": 0.6419, "step": 2512 }, { "epoch": 0.4, "grad_norm": 0.6961910299745654, "learning_rate": 6.746153823412416e-06, "loss": 0.504, "step": 2513 }, { "epoch": 0.41, "grad_norm": 1.000283417248506, "learning_rate": 6.7437081730020695e-06, "loss": 0.592, "step": 2514 }, { "epoch": 0.41, "grad_norm": 1.1235580595426367, "learning_rate": 6.7412620475575495e-06, "loss": 0.6953, "step": 2515 }, { "epoch": 0.41, "grad_norm": 1.0048097947468948, "learning_rate": 6.7388154477452505e-06, "loss": 0.6361, "step": 2516 }, { "epoch": 0.41, "grad_norm": 1.066195123774335, "learning_rate": 6.736368374231693e-06, "loss": 0.5936, "step": 2517 }, { "epoch": 0.41, "grad_norm": 0.9998213160074564, "learning_rate": 6.733920827683529e-06, "loss": 0.5973, "step": 2518 }, { "epoch": 0.41, "grad_norm": 0.9506109324333402, "learning_rate": 6.731472808767532e-06, "loss": 0.6179, "step": 2519 }, { "epoch": 0.41, "grad_norm": 0.9535603780080965, "learning_rate": 6.729024318150617e-06, "loss": 0.6172, "step": 2520 }, { "epoch": 0.41, "grad_norm": 1.0103086100808496, "learning_rate": 6.726575356499814e-06, "loss": 0.6309, "step": 2521 }, { "epoch": 0.41, "grad_norm": 0.9317522355461826, "learning_rate": 6.724125924482292e-06, "loss": 0.4668, "step": 2522 }, { "epoch": 0.41, "grad_norm": 1.522798330174133, "learning_rate": 6.7216760227653426e-06, "loss": 0.5464, "step": 2523 }, { "epoch": 0.41, "grad_norm": 0.969246569606102, "learning_rate": 6.7192256520163844e-06, "loss": 0.5826, "step": 2524 }, { "epoch": 0.41, "grad_norm": 1.0052706170470391, "learning_rate": 6.7167748129029705e-06, "loss": 0.5466, "step": 2525 }, { "epoch": 0.41, "grad_norm": 1.0227874255285323, "learning_rate": 6.714323506092773e-06, "loss": 0.6505, "step": 2526 }, { "epoch": 0.41, "grad_norm": 0.9505392323519235, "learning_rate": 6.711871732253596e-06, "loss": 0.5293, "step": 2527 }, { "epoch": 0.41, "grad_norm": 0.9377052049095732, "learning_rate": 6.709419492053373e-06, "loss": 0.5919, "step": 2528 }, { "epoch": 0.41, "grad_norm": 0.9932410292132299, "learning_rate": 6.706966786160159e-06, "loss": 0.6502, "step": 2529 }, { "epoch": 0.41, "grad_norm": 1.0480315900430064, "learning_rate": 6.7045136152421395e-06, "loss": 0.6338, "step": 2530 }, { "epoch": 0.41, "grad_norm": 0.9254879553161067, "learning_rate": 6.702059979967627e-06, "loss": 0.5205, "step": 2531 }, { "epoch": 0.41, "grad_norm": 0.9588210869232551, "learning_rate": 6.699605881005058e-06, "loss": 0.5756, "step": 2532 }, { "epoch": 0.41, "grad_norm": 1.0655037342255094, "learning_rate": 6.697151319022996e-06, "loss": 0.6279, "step": 2533 }, { "epoch": 0.41, "grad_norm": 0.9598108868864681, "learning_rate": 6.694696294690133e-06, "loss": 0.6104, "step": 2534 }, { "epoch": 0.41, "grad_norm": 0.8996195883080861, "learning_rate": 6.692240808675286e-06, "loss": 0.6314, "step": 2535 }, { "epoch": 0.41, "grad_norm": 0.9669811778226647, "learning_rate": 6.689784861647395e-06, "loss": 0.6018, "step": 2536 }, { "epoch": 0.41, "grad_norm": 0.6518424321418883, "learning_rate": 6.6873284542755275e-06, "loss": 0.4843, "step": 2537 }, { "epoch": 0.41, "grad_norm": 0.9622863633926381, "learning_rate": 6.684871587228878e-06, "loss": 0.5248, "step": 2538 }, { "epoch": 0.41, "grad_norm": 1.0711655979713472, "learning_rate": 6.682414261176765e-06, "loss": 0.6233, "step": 2539 }, { "epoch": 0.41, "grad_norm": 1.012294791062189, "learning_rate": 6.6799564767886305e-06, "loss": 0.6004, "step": 2540 }, { "epoch": 0.41, "grad_norm": 1.0310829934042711, "learning_rate": 6.677498234734045e-06, "loss": 0.6348, "step": 2541 }, { "epoch": 0.41, "grad_norm": 0.9737609522783973, "learning_rate": 6.675039535682699e-06, "loss": 0.5559, "step": 2542 }, { "epoch": 0.41, "grad_norm": 0.954720512338294, "learning_rate": 6.67258038030441e-06, "loss": 0.5958, "step": 2543 }, { "epoch": 0.41, "grad_norm": 0.9531872991714292, "learning_rate": 6.67012076926912e-06, "loss": 0.5661, "step": 2544 }, { "epoch": 0.41, "grad_norm": 0.9941276543918149, "learning_rate": 6.667660703246897e-06, "loss": 0.6232, "step": 2545 }, { "epoch": 0.41, "grad_norm": 1.0130999863311994, "learning_rate": 6.665200182907928e-06, "loss": 0.5833, "step": 2546 }, { "epoch": 0.41, "grad_norm": 1.0434174066359805, "learning_rate": 6.662739208922529e-06, "loss": 0.6335, "step": 2547 }, { "epoch": 0.41, "grad_norm": 1.0024392468491032, "learning_rate": 6.660277781961135e-06, "loss": 0.6354, "step": 2548 }, { "epoch": 0.41, "grad_norm": 0.9948464686682033, "learning_rate": 6.6578159026943064e-06, "loss": 0.6188, "step": 2549 }, { "epoch": 0.41, "grad_norm": 0.9856599808866728, "learning_rate": 6.655353571792729e-06, "loss": 0.6017, "step": 2550 }, { "epoch": 0.41, "grad_norm": 1.0947400114544834, "learning_rate": 6.652890789927209e-06, "loss": 0.6769, "step": 2551 }, { "epoch": 0.41, "grad_norm": 0.9821868481112924, "learning_rate": 6.650427557768674e-06, "loss": 0.5932, "step": 2552 }, { "epoch": 0.41, "grad_norm": 1.0362479062746677, "learning_rate": 6.647963875988179e-06, "loss": 0.6618, "step": 2553 }, { "epoch": 0.41, "grad_norm": 1.0045773425075395, "learning_rate": 6.645499745256898e-06, "loss": 0.6368, "step": 2554 }, { "epoch": 0.41, "grad_norm": 1.0449898981481693, "learning_rate": 6.643035166246128e-06, "loss": 0.7077, "step": 2555 }, { "epoch": 0.41, "grad_norm": 0.9507740410875614, "learning_rate": 6.640570139627288e-06, "loss": 0.5956, "step": 2556 }, { "epoch": 0.41, "grad_norm": 0.9738974634356756, "learning_rate": 6.638104666071918e-06, "loss": 0.5509, "step": 2557 }, { "epoch": 0.41, "grad_norm": 1.0635617442122571, "learning_rate": 6.635638746251685e-06, "loss": 0.632, "step": 2558 }, { "epoch": 0.41, "grad_norm": 0.9885724221795573, "learning_rate": 6.6331723808383674e-06, "loss": 0.5276, "step": 2559 }, { "epoch": 0.41, "grad_norm": 1.0193579183091526, "learning_rate": 6.630705570503878e-06, "loss": 0.5558, "step": 2560 }, { "epoch": 0.41, "grad_norm": 0.9991712228106975, "learning_rate": 6.628238315920239e-06, "loss": 0.6076, "step": 2561 }, { "epoch": 0.41, "grad_norm": 1.1167738481899345, "learning_rate": 6.6257706177595994e-06, "loss": 0.6637, "step": 2562 }, { "epoch": 0.41, "grad_norm": 0.6284012407550957, "learning_rate": 6.62330247669423e-06, "loss": 0.4411, "step": 2563 }, { "epoch": 0.41, "grad_norm": 0.9905162474639524, "learning_rate": 6.62083389339652e-06, "loss": 0.5821, "step": 2564 }, { "epoch": 0.41, "grad_norm": 0.9685951226184077, "learning_rate": 6.618364868538978e-06, "loss": 0.5848, "step": 2565 }, { "epoch": 0.41, "grad_norm": 1.0103096425202092, "learning_rate": 6.6158954027942345e-06, "loss": 0.6609, "step": 2566 }, { "epoch": 0.41, "grad_norm": 0.994775628196965, "learning_rate": 6.6134254968350434e-06, "loss": 0.6037, "step": 2567 }, { "epoch": 0.41, "grad_norm": 1.023326086655465, "learning_rate": 6.610955151334269e-06, "loss": 0.6006, "step": 2568 }, { "epoch": 0.41, "grad_norm": 0.8936437066842363, "learning_rate": 6.608484366964908e-06, "loss": 0.4609, "step": 2569 }, { "epoch": 0.41, "grad_norm": 0.972262763337525, "learning_rate": 6.606013144400065e-06, "loss": 0.5336, "step": 2570 }, { "epoch": 0.41, "grad_norm": 0.9988413884974223, "learning_rate": 6.603541484312974e-06, "loss": 0.6078, "step": 2571 }, { "epoch": 0.41, "grad_norm": 1.0279655296212966, "learning_rate": 6.601069387376979e-06, "loss": 0.5857, "step": 2572 }, { "epoch": 0.41, "grad_norm": 0.9352353970546785, "learning_rate": 6.5985968542655495e-06, "loss": 0.5483, "step": 2573 }, { "epoch": 0.41, "grad_norm": 1.0647205123561074, "learning_rate": 6.596123885652272e-06, "loss": 0.615, "step": 2574 }, { "epoch": 0.41, "grad_norm": 0.9286199916916759, "learning_rate": 6.593650482210851e-06, "loss": 0.5893, "step": 2575 }, { "epoch": 0.42, "grad_norm": 1.022550424170138, "learning_rate": 6.591176644615108e-06, "loss": 0.5827, "step": 2576 }, { "epoch": 0.42, "grad_norm": 0.9020692570562443, "learning_rate": 6.588702373538987e-06, "loss": 0.5308, "step": 2577 }, { "epoch": 0.42, "grad_norm": 0.9567718875966726, "learning_rate": 6.5862276696565454e-06, "loss": 0.5948, "step": 2578 }, { "epoch": 0.42, "grad_norm": 0.8917139516992794, "learning_rate": 6.583752533641963e-06, "loss": 0.6136, "step": 2579 }, { "epoch": 0.42, "grad_norm": 0.9404116792845437, "learning_rate": 6.581276966169534e-06, "loss": 0.6126, "step": 2580 }, { "epoch": 0.42, "grad_norm": 0.9825803583085433, "learning_rate": 6.57880096791367e-06, "loss": 0.5271, "step": 2581 }, { "epoch": 0.42, "grad_norm": 1.064116122590369, "learning_rate": 6.576324539548904e-06, "loss": 0.6496, "step": 2582 }, { "epoch": 0.42, "grad_norm": 1.0116707884223723, "learning_rate": 6.573847681749881e-06, "loss": 0.5438, "step": 2583 }, { "epoch": 0.42, "grad_norm": 1.0705515854369636, "learning_rate": 6.5713703951913665e-06, "loss": 0.5716, "step": 2584 }, { "epoch": 0.42, "grad_norm": 0.9754768091583119, "learning_rate": 6.5688926805482425e-06, "loss": 0.5629, "step": 2585 }, { "epoch": 0.42, "grad_norm": 1.0264659596369923, "learning_rate": 6.566414538495504e-06, "loss": 0.6195, "step": 2586 }, { "epoch": 0.42, "grad_norm": 0.7355078677122344, "learning_rate": 6.563935969708266e-06, "loss": 0.484, "step": 2587 }, { "epoch": 0.42, "grad_norm": 1.0828541334774135, "learning_rate": 6.561456974861761e-06, "loss": 0.6887, "step": 2588 }, { "epoch": 0.42, "grad_norm": 1.0433305776996245, "learning_rate": 6.558977554631334e-06, "loss": 0.6548, "step": 2589 }, { "epoch": 0.42, "grad_norm": 0.959756042998032, "learning_rate": 6.556497709692447e-06, "loss": 0.6113, "step": 2590 }, { "epoch": 0.42, "grad_norm": 1.0452700204897056, "learning_rate": 6.554017440720679e-06, "loss": 0.6809, "step": 2591 }, { "epoch": 0.42, "grad_norm": 1.0586517651126965, "learning_rate": 6.551536748391724e-06, "loss": 0.5937, "step": 2592 }, { "epoch": 0.42, "grad_norm": 0.9699683269666205, "learning_rate": 6.54905563338139e-06, "loss": 0.596, "step": 2593 }, { "epoch": 0.42, "grad_norm": 1.0143080749930315, "learning_rate": 6.546574096365601e-06, "loss": 0.6051, "step": 2594 }, { "epoch": 0.42, "grad_norm": 1.041784056222085, "learning_rate": 6.544092138020397e-06, "loss": 0.6251, "step": 2595 }, { "epoch": 0.42, "grad_norm": 0.9288845561019567, "learning_rate": 6.541609759021933e-06, "loss": 0.6075, "step": 2596 }, { "epoch": 0.42, "grad_norm": 0.9118238654049805, "learning_rate": 6.539126960046474e-06, "loss": 0.5901, "step": 2597 }, { "epoch": 0.42, "grad_norm": 1.0166587787271026, "learning_rate": 6.536643741770406e-06, "loss": 0.6267, "step": 2598 }, { "epoch": 0.42, "grad_norm": 0.9926945114865702, "learning_rate": 6.534160104870224e-06, "loss": 0.6482, "step": 2599 }, { "epoch": 0.42, "grad_norm": 1.0759394956670143, "learning_rate": 6.531676050022539e-06, "loss": 0.6768, "step": 2600 }, { "epoch": 0.42, "grad_norm": 1.058683229891837, "learning_rate": 6.529191577904079e-06, "loss": 0.7123, "step": 2601 }, { "epoch": 0.42, "grad_norm": 1.0133936734319813, "learning_rate": 6.526706689191681e-06, "loss": 0.6449, "step": 2602 }, { "epoch": 0.42, "grad_norm": 1.0187962886244175, "learning_rate": 6.524221384562295e-06, "loss": 0.6246, "step": 2603 }, { "epoch": 0.42, "grad_norm": 0.6729077622455986, "learning_rate": 6.521735664692989e-06, "loss": 0.4895, "step": 2604 }, { "epoch": 0.42, "grad_norm": 0.9122792620685437, "learning_rate": 6.519249530260943e-06, "loss": 0.5501, "step": 2605 }, { "epoch": 0.42, "grad_norm": 1.111684736362179, "learning_rate": 6.516762981943444e-06, "loss": 0.7472, "step": 2606 }, { "epoch": 0.42, "grad_norm": 1.020036355096479, "learning_rate": 6.514276020417901e-06, "loss": 0.5665, "step": 2607 }, { "epoch": 0.42, "grad_norm": 1.0002754869507022, "learning_rate": 6.511788646361828e-06, "loss": 0.6063, "step": 2608 }, { "epoch": 0.42, "grad_norm": 1.1300316086703024, "learning_rate": 6.509300860452854e-06, "loss": 0.6924, "step": 2609 }, { "epoch": 0.42, "grad_norm": 1.030616281329045, "learning_rate": 6.506812663368722e-06, "loss": 0.6126, "step": 2610 }, { "epoch": 0.42, "grad_norm": 1.1052548555258808, "learning_rate": 6.504324055787285e-06, "loss": 0.6465, "step": 2611 }, { "epoch": 0.42, "grad_norm": 0.9662603377084739, "learning_rate": 6.501835038386509e-06, "loss": 0.6505, "step": 2612 }, { "epoch": 0.42, "grad_norm": 0.9047798069380502, "learning_rate": 6.499345611844471e-06, "loss": 0.6108, "step": 2613 }, { "epoch": 0.42, "grad_norm": 0.8802146366517508, "learning_rate": 6.496855776839357e-06, "loss": 0.5185, "step": 2614 }, { "epoch": 0.42, "grad_norm": 0.9208894234839756, "learning_rate": 6.494365534049469e-06, "loss": 0.5088, "step": 2615 }, { "epoch": 0.42, "grad_norm": 0.9267487781505049, "learning_rate": 6.491874884153217e-06, "loss": 0.5364, "step": 2616 }, { "epoch": 0.42, "grad_norm": 1.0066387163399229, "learning_rate": 6.489383827829122e-06, "loss": 0.7278, "step": 2617 }, { "epoch": 0.42, "grad_norm": 1.0451028431855591, "learning_rate": 6.486892365755819e-06, "loss": 0.6354, "step": 2618 }, { "epoch": 0.42, "grad_norm": 0.90627713695524, "learning_rate": 6.4844004986120465e-06, "loss": 0.5266, "step": 2619 }, { "epoch": 0.42, "grad_norm": 1.042139075169554, "learning_rate": 6.481908227076663e-06, "loss": 0.6029, "step": 2620 }, { "epoch": 0.42, "grad_norm": 0.9213864250898641, "learning_rate": 6.479415551828627e-06, "loss": 0.5656, "step": 2621 }, { "epoch": 0.42, "grad_norm": 1.0568248380324632, "learning_rate": 6.476922473547016e-06, "loss": 0.5514, "step": 2622 }, { "epoch": 0.42, "grad_norm": 1.0729407017376156, "learning_rate": 6.474428992911011e-06, "loss": 0.5717, "step": 2623 }, { "epoch": 0.42, "grad_norm": 1.051282653158837, "learning_rate": 6.471935110599907e-06, "loss": 0.5708, "step": 2624 }, { "epoch": 0.42, "grad_norm": 0.9679742272764066, "learning_rate": 6.469440827293103e-06, "loss": 0.5925, "step": 2625 }, { "epoch": 0.42, "grad_norm": 1.0209790275830017, "learning_rate": 6.466946143670113e-06, "loss": 0.6208, "step": 2626 }, { "epoch": 0.42, "grad_norm": 1.0493256163108942, "learning_rate": 6.464451060410556e-06, "loss": 0.5772, "step": 2627 }, { "epoch": 0.42, "grad_norm": 1.1355684616864483, "learning_rate": 6.461955578194163e-06, "loss": 0.6956, "step": 2628 }, { "epoch": 0.42, "grad_norm": 0.982779263033437, "learning_rate": 6.459459697700772e-06, "loss": 0.5499, "step": 2629 }, { "epoch": 0.42, "grad_norm": 0.9708244861643506, "learning_rate": 6.456963419610327e-06, "loss": 0.6408, "step": 2630 }, { "epoch": 0.42, "grad_norm": 0.9301758824880064, "learning_rate": 6.454466744602888e-06, "loss": 0.496, "step": 2631 }, { "epoch": 0.42, "grad_norm": 0.9881125578914289, "learning_rate": 6.451969673358613e-06, "loss": 0.595, "step": 2632 }, { "epoch": 0.42, "grad_norm": 1.019576817251507, "learning_rate": 6.449472206557776e-06, "loss": 0.5994, "step": 2633 }, { "epoch": 0.42, "grad_norm": 1.1079324193948537, "learning_rate": 6.4469743448807546e-06, "loss": 0.6594, "step": 2634 }, { "epoch": 0.42, "grad_norm": 1.019981864460688, "learning_rate": 6.444476089008037e-06, "loss": 0.615, "step": 2635 }, { "epoch": 0.42, "grad_norm": 1.0170709463386776, "learning_rate": 6.441977439620214e-06, "loss": 0.5923, "step": 2636 }, { "epoch": 0.42, "grad_norm": 1.0698401477799844, "learning_rate": 6.439478397397989e-06, "loss": 0.6591, "step": 2637 }, { "epoch": 0.43, "grad_norm": 1.0712602927839843, "learning_rate": 6.436978963022168e-06, "loss": 0.669, "step": 2638 }, { "epoch": 0.43, "grad_norm": 0.6810625557873233, "learning_rate": 6.434479137173667e-06, "loss": 0.5347, "step": 2639 }, { "epoch": 0.43, "grad_norm": 1.0406801595627877, "learning_rate": 6.431978920533509e-06, "loss": 0.6419, "step": 2640 }, { "epoch": 0.43, "grad_norm": 0.9644160598017195, "learning_rate": 6.429478313782817e-06, "loss": 0.6425, "step": 2641 }, { "epoch": 0.43, "grad_norm": 0.9899673251130092, "learning_rate": 6.426977317602833e-06, "loss": 0.5918, "step": 2642 }, { "epoch": 0.43, "grad_norm": 1.033403924011447, "learning_rate": 6.424475932674889e-06, "loss": 0.5858, "step": 2643 }, { "epoch": 0.43, "grad_norm": 1.0347320925473982, "learning_rate": 6.421974159680437e-06, "loss": 0.6935, "step": 2644 }, { "epoch": 0.43, "grad_norm": 0.9532589680480318, "learning_rate": 6.419471999301027e-06, "loss": 0.5837, "step": 2645 }, { "epoch": 0.43, "grad_norm": 0.9500626662746939, "learning_rate": 6.416969452218317e-06, "loss": 0.6478, "step": 2646 }, { "epoch": 0.43, "grad_norm": 0.9523691191776804, "learning_rate": 6.414466519114068e-06, "loss": 0.5708, "step": 2647 }, { "epoch": 0.43, "grad_norm": 0.9705145228209704, "learning_rate": 6.411963200670148e-06, "loss": 0.6089, "step": 2648 }, { "epoch": 0.43, "grad_norm": 1.0906113182763968, "learning_rate": 6.409459497568533e-06, "loss": 0.7288, "step": 2649 }, { "epoch": 0.43, "grad_norm": 0.9087301013461447, "learning_rate": 6.406955410491298e-06, "loss": 0.5679, "step": 2650 }, { "epoch": 0.43, "grad_norm": 0.9129516490165551, "learning_rate": 6.404450940120628e-06, "loss": 0.3973, "step": 2651 }, { "epoch": 0.43, "grad_norm": 0.9290740070857407, "learning_rate": 6.4019460871388055e-06, "loss": 0.5428, "step": 2652 }, { "epoch": 0.43, "grad_norm": 1.033949366708766, "learning_rate": 6.399440852228226e-06, "loss": 0.5746, "step": 2653 }, { "epoch": 0.43, "grad_norm": 0.8870683885883719, "learning_rate": 6.396935236071381e-06, "loss": 0.5561, "step": 2654 }, { "epoch": 0.43, "grad_norm": 0.9420004205588067, "learning_rate": 6.394429239350872e-06, "loss": 0.5819, "step": 2655 }, { "epoch": 0.43, "grad_norm": 0.9837763290162224, "learning_rate": 6.3919228627494e-06, "loss": 0.5508, "step": 2656 }, { "epoch": 0.43, "grad_norm": 1.0103917971952394, "learning_rate": 6.38941610694977e-06, "loss": 0.5911, "step": 2657 }, { "epoch": 0.43, "grad_norm": 0.9576469251702779, "learning_rate": 6.386908972634897e-06, "loss": 0.5567, "step": 2658 }, { "epoch": 0.43, "grad_norm": 1.1027578881629692, "learning_rate": 6.384401460487787e-06, "loss": 0.6401, "step": 2659 }, { "epoch": 0.43, "grad_norm": 1.1152135785755155, "learning_rate": 6.381893571191558e-06, "loss": 0.6281, "step": 2660 }, { "epoch": 0.43, "grad_norm": 0.9219426771119541, "learning_rate": 6.379385305429428e-06, "loss": 0.517, "step": 2661 }, { "epoch": 0.43, "grad_norm": 1.0318005231477991, "learning_rate": 6.376876663884719e-06, "loss": 0.6773, "step": 2662 }, { "epoch": 0.43, "grad_norm": 0.9814015852072634, "learning_rate": 6.374367647240851e-06, "loss": 0.5852, "step": 2663 }, { "epoch": 0.43, "grad_norm": 1.0946873461959152, "learning_rate": 6.371858256181352e-06, "loss": 0.7085, "step": 2664 }, { "epoch": 0.43, "grad_norm": 1.1257790798105276, "learning_rate": 6.3693484913898494e-06, "loss": 0.6383, "step": 2665 }, { "epoch": 0.43, "grad_norm": 1.0230386951645156, "learning_rate": 6.36683835355007e-06, "loss": 0.5802, "step": 2666 }, { "epoch": 0.43, "grad_norm": 0.9928835525742654, "learning_rate": 6.364327843345847e-06, "loss": 0.6074, "step": 2667 }, { "epoch": 0.43, "grad_norm": 1.0816457454041124, "learning_rate": 6.361816961461111e-06, "loss": 0.5939, "step": 2668 }, { "epoch": 0.43, "grad_norm": 1.0035418526957072, "learning_rate": 6.359305708579897e-06, "loss": 0.5019, "step": 2669 }, { "epoch": 0.43, "grad_norm": 0.9959592208618275, "learning_rate": 6.356794085386337e-06, "loss": 0.6783, "step": 2670 }, { "epoch": 0.43, "grad_norm": 0.9481317734574466, "learning_rate": 6.3542820925646696e-06, "loss": 0.484, "step": 2671 }, { "epoch": 0.43, "grad_norm": 1.0455324909362815, "learning_rate": 6.351769730799227e-06, "loss": 0.6176, "step": 2672 }, { "epoch": 0.43, "grad_norm": 0.9619067550739812, "learning_rate": 6.349257000774452e-06, "loss": 0.5764, "step": 2673 }, { "epoch": 0.43, "grad_norm": 1.0852462274541015, "learning_rate": 6.346743903174872e-06, "loss": 0.6207, "step": 2674 }, { "epoch": 0.43, "grad_norm": 0.9494570703108391, "learning_rate": 6.344230438685134e-06, "loss": 0.5964, "step": 2675 }, { "epoch": 0.43, "grad_norm": 0.9871075296923102, "learning_rate": 6.3417166079899685e-06, "loss": 0.6679, "step": 2676 }, { "epoch": 0.43, "grad_norm": 0.9918370749407301, "learning_rate": 6.339202411774215e-06, "loss": 0.641, "step": 2677 }, { "epoch": 0.43, "grad_norm": 0.9971048416139556, "learning_rate": 6.336687850722809e-06, "loss": 0.5954, "step": 2678 }, { "epoch": 0.43, "grad_norm": 0.9833572599683785, "learning_rate": 6.334172925520785e-06, "loss": 0.6538, "step": 2679 }, { "epoch": 0.43, "grad_norm": 1.10400773530343, "learning_rate": 6.3316576368532814e-06, "loss": 0.5831, "step": 2680 }, { "epoch": 0.43, "grad_norm": 0.9954560921615357, "learning_rate": 6.329141985405529e-06, "loss": 0.6026, "step": 2681 }, { "epoch": 0.43, "grad_norm": 0.9920694254942484, "learning_rate": 6.326625971862863e-06, "loss": 0.6456, "step": 2682 }, { "epoch": 0.43, "grad_norm": 1.1117445777285977, "learning_rate": 6.324109596910713e-06, "loss": 0.6861, "step": 2683 }, { "epoch": 0.43, "grad_norm": 1.0007172599680345, "learning_rate": 6.32159286123461e-06, "loss": 0.6027, "step": 2684 }, { "epoch": 0.43, "grad_norm": 1.0442335287996225, "learning_rate": 6.319075765520179e-06, "loss": 0.7014, "step": 2685 }, { "epoch": 0.43, "grad_norm": 1.024363800862652, "learning_rate": 6.316558310453153e-06, "loss": 0.6181, "step": 2686 }, { "epoch": 0.43, "grad_norm": 0.996700915993852, "learning_rate": 6.314040496719349e-06, "loss": 0.5465, "step": 2687 }, { "epoch": 0.43, "grad_norm": 1.04127223109327, "learning_rate": 6.3115223250046934e-06, "loss": 0.5966, "step": 2688 }, { "epoch": 0.43, "grad_norm": 1.0385564091664412, "learning_rate": 6.309003795995205e-06, "loss": 0.5778, "step": 2689 }, { "epoch": 0.43, "grad_norm": 0.9943036095306566, "learning_rate": 6.306484910376998e-06, "loss": 0.5587, "step": 2690 }, { "epoch": 0.43, "grad_norm": 0.9930851736935974, "learning_rate": 6.303965668836288e-06, "loss": 0.5281, "step": 2691 }, { "epoch": 0.43, "grad_norm": 1.014775318818345, "learning_rate": 6.301446072059386e-06, "loss": 0.5402, "step": 2692 }, { "epoch": 0.43, "grad_norm": 1.0194681029197552, "learning_rate": 6.2989261207327e-06, "loss": 0.6484, "step": 2693 }, { "epoch": 0.43, "grad_norm": 0.9388331733262057, "learning_rate": 6.296405815542732e-06, "loss": 0.635, "step": 2694 }, { "epoch": 0.43, "grad_norm": 0.9511933598242038, "learning_rate": 6.293885157176087e-06, "loss": 0.546, "step": 2695 }, { "epoch": 0.43, "grad_norm": 1.076864411551599, "learning_rate": 6.2913641463194566e-06, "loss": 0.6562, "step": 2696 }, { "epoch": 0.43, "grad_norm": 1.0022117574532048, "learning_rate": 6.288842783659638e-06, "loss": 0.6845, "step": 2697 }, { "epoch": 0.43, "grad_norm": 1.041833791627389, "learning_rate": 6.286321069883517e-06, "loss": 0.6595, "step": 2698 }, { "epoch": 0.43, "grad_norm": 1.0451460609433734, "learning_rate": 6.283799005678077e-06, "loss": 0.6186, "step": 2699 }, { "epoch": 0.44, "grad_norm": 1.0462101091994838, "learning_rate": 6.281276591730403e-06, "loss": 0.6887, "step": 2700 }, { "epoch": 0.44, "grad_norm": 1.0730079743254441, "learning_rate": 6.278753828727664e-06, "loss": 0.5979, "step": 2701 }, { "epoch": 0.44, "grad_norm": 0.9358364909451595, "learning_rate": 6.276230717357136e-06, "loss": 0.6139, "step": 2702 }, { "epoch": 0.44, "grad_norm": 1.043386788523056, "learning_rate": 6.273707258306178e-06, "loss": 0.6397, "step": 2703 }, { "epoch": 0.44, "grad_norm": 1.070217159876887, "learning_rate": 6.271183452262255e-06, "loss": 0.7397, "step": 2704 }, { "epoch": 0.44, "grad_norm": 1.0371873081340126, "learning_rate": 6.268659299912918e-06, "loss": 0.6027, "step": 2705 }, { "epoch": 0.44, "grad_norm": 1.0835037923073132, "learning_rate": 6.266134801945819e-06, "loss": 0.6581, "step": 2706 }, { "epoch": 0.44, "grad_norm": 1.0377717305959842, "learning_rate": 6.263609959048696e-06, "loss": 0.5935, "step": 2707 }, { "epoch": 0.44, "grad_norm": 1.0555480715060301, "learning_rate": 6.261084771909391e-06, "loss": 0.6672, "step": 2708 }, { "epoch": 0.44, "grad_norm": 1.0046978247829903, "learning_rate": 6.25855924121583e-06, "loss": 0.6083, "step": 2709 }, { "epoch": 0.44, "grad_norm": 1.1137995500897437, "learning_rate": 6.25603336765604e-06, "loss": 0.686, "step": 2710 }, { "epoch": 0.44, "grad_norm": 1.0755274642395372, "learning_rate": 6.2535071519181385e-06, "loss": 0.5973, "step": 2711 }, { "epoch": 0.44, "grad_norm": 0.9401479869826727, "learning_rate": 6.250980594690335e-06, "loss": 0.5906, "step": 2712 }, { "epoch": 0.44, "grad_norm": 0.951699673235168, "learning_rate": 6.248453696660934e-06, "loss": 0.5524, "step": 2713 }, { "epoch": 0.44, "grad_norm": 1.0949283102618816, "learning_rate": 6.245926458518333e-06, "loss": 0.6439, "step": 2714 }, { "epoch": 0.44, "grad_norm": 0.8792250544124379, "learning_rate": 6.243398880951019e-06, "loss": 0.508, "step": 2715 }, { "epoch": 0.44, "grad_norm": 1.028113940702658, "learning_rate": 6.240870964647577e-06, "loss": 0.6541, "step": 2716 }, { "epoch": 0.44, "grad_norm": 1.0099267913794, "learning_rate": 6.2383427102966786e-06, "loss": 0.6453, "step": 2717 }, { "epoch": 0.44, "grad_norm": 1.0204399579000307, "learning_rate": 6.2358141185870915e-06, "loss": 0.5835, "step": 2718 }, { "epoch": 0.44, "grad_norm": 0.9616924762999283, "learning_rate": 6.233285190207673e-06, "loss": 0.5441, "step": 2719 }, { "epoch": 0.44, "grad_norm": 1.0113952959848211, "learning_rate": 6.2307559258473716e-06, "loss": 0.639, "step": 2720 }, { "epoch": 0.44, "grad_norm": 1.054723157240936, "learning_rate": 6.228226326195232e-06, "loss": 0.7181, "step": 2721 }, { "epoch": 0.44, "grad_norm": 0.9157772165866018, "learning_rate": 6.225696391940383e-06, "loss": 0.5936, "step": 2722 }, { "epoch": 0.44, "grad_norm": 0.8993306648988618, "learning_rate": 6.223166123772051e-06, "loss": 0.573, "step": 2723 }, { "epoch": 0.44, "grad_norm": 1.0494719444982958, "learning_rate": 6.220635522379551e-06, "loss": 0.6398, "step": 2724 }, { "epoch": 0.44, "grad_norm": 0.721185944079204, "learning_rate": 6.2181045884522876e-06, "loss": 0.5122, "step": 2725 }, { "epoch": 0.44, "grad_norm": 1.022674034357875, "learning_rate": 6.215573322679756e-06, "loss": 0.6005, "step": 2726 }, { "epoch": 0.44, "grad_norm": 0.9943601408064036, "learning_rate": 6.213041725751543e-06, "loss": 0.5862, "step": 2727 }, { "epoch": 0.44, "grad_norm": 0.9925358434272786, "learning_rate": 6.210509798357328e-06, "loss": 0.6031, "step": 2728 }, { "epoch": 0.44, "grad_norm": 0.9451944177379101, "learning_rate": 6.207977541186876e-06, "loss": 0.59, "step": 2729 }, { "epoch": 0.44, "grad_norm": 1.108988356036505, "learning_rate": 6.205444954930043e-06, "loss": 0.5949, "step": 2730 }, { "epoch": 0.44, "grad_norm": 0.8295013596724732, "learning_rate": 6.2029120402767765e-06, "loss": 0.5223, "step": 2731 }, { "epoch": 0.44, "grad_norm": 1.0252469321977662, "learning_rate": 6.2003787979171105e-06, "loss": 0.5757, "step": 2732 }, { "epoch": 0.44, "grad_norm": 1.003224875173133, "learning_rate": 6.197845228541174e-06, "loss": 0.5775, "step": 2733 }, { "epoch": 0.44, "grad_norm": 0.9542687388224328, "learning_rate": 6.195311332839175e-06, "loss": 0.4823, "step": 2734 }, { "epoch": 0.44, "grad_norm": 1.0167995302973238, "learning_rate": 6.192777111501422e-06, "loss": 0.5735, "step": 2735 }, { "epoch": 0.44, "grad_norm": 1.0687491447292161, "learning_rate": 6.190242565218305e-06, "loss": 0.5961, "step": 2736 }, { "epoch": 0.44, "grad_norm": 0.9669557991716592, "learning_rate": 6.187707694680302e-06, "loss": 0.6276, "step": 2737 }, { "epoch": 0.44, "grad_norm": 1.009573939839138, "learning_rate": 6.185172500577986e-06, "loss": 0.6418, "step": 2738 }, { "epoch": 0.44, "grad_norm": 0.9263174770171911, "learning_rate": 6.182636983602009e-06, "loss": 0.5746, "step": 2739 }, { "epoch": 0.44, "grad_norm": 1.070686947758525, "learning_rate": 6.180101144443121e-06, "loss": 0.5811, "step": 2740 }, { "epoch": 0.44, "grad_norm": 1.0476264076354889, "learning_rate": 6.17756498379215e-06, "loss": 0.6015, "step": 2741 }, { "epoch": 0.44, "grad_norm": 1.091797889353649, "learning_rate": 6.175028502340018e-06, "loss": 0.6599, "step": 2742 }, { "epoch": 0.44, "grad_norm": 1.10336487253729, "learning_rate": 6.172491700777732e-06, "loss": 0.597, "step": 2743 }, { "epoch": 0.44, "grad_norm": 1.026731675902572, "learning_rate": 6.169954579796387e-06, "loss": 0.6537, "step": 2744 }, { "epoch": 0.44, "grad_norm": 0.9839566667990703, "learning_rate": 6.167417140087163e-06, "loss": 0.5771, "step": 2745 }, { "epoch": 0.44, "grad_norm": 1.0680208728310914, "learning_rate": 6.164879382341331e-06, "loss": 0.6432, "step": 2746 }, { "epoch": 0.44, "grad_norm": 0.943084573772298, "learning_rate": 6.162341307250246e-06, "loss": 0.6403, "step": 2747 }, { "epoch": 0.44, "grad_norm": 1.0021196809870656, "learning_rate": 6.159802915505347e-06, "loss": 0.602, "step": 2748 }, { "epoch": 0.44, "grad_norm": 1.0148224882586994, "learning_rate": 6.157264207798165e-06, "loss": 0.6468, "step": 2749 }, { "epoch": 0.44, "grad_norm": 0.9945659509598346, "learning_rate": 6.154725184820311e-06, "loss": 0.6677, "step": 2750 }, { "epoch": 0.44, "grad_norm": 1.02768686070851, "learning_rate": 6.15218584726349e-06, "loss": 0.6318, "step": 2751 }, { "epoch": 0.44, "grad_norm": 0.9387833832900156, "learning_rate": 6.149646195819481e-06, "loss": 0.5321, "step": 2752 }, { "epoch": 0.44, "grad_norm": 0.9546907945735594, "learning_rate": 6.147106231180159e-06, "loss": 0.5739, "step": 2753 }, { "epoch": 0.44, "grad_norm": 1.0263359002574455, "learning_rate": 6.144565954037479e-06, "loss": 0.6006, "step": 2754 }, { "epoch": 0.44, "grad_norm": 0.9286238737150959, "learning_rate": 6.142025365083484e-06, "loss": 0.5959, "step": 2755 }, { "epoch": 0.44, "grad_norm": 1.0075802546700396, "learning_rate": 6.139484465010298e-06, "loss": 0.6407, "step": 2756 }, { "epoch": 0.44, "grad_norm": 1.0086601027313675, "learning_rate": 6.136943254510135e-06, "loss": 0.616, "step": 2757 }, { "epoch": 0.44, "grad_norm": 1.0534146431243012, "learning_rate": 6.134401734275288e-06, "loss": 0.5996, "step": 2758 }, { "epoch": 0.44, "grad_norm": 0.9413599539829495, "learning_rate": 6.13185990499814e-06, "loss": 0.5713, "step": 2759 }, { "epoch": 0.44, "grad_norm": 0.9861118215920378, "learning_rate": 6.129317767371153e-06, "loss": 0.633, "step": 2760 }, { "epoch": 0.44, "grad_norm": 1.0876144804774115, "learning_rate": 6.126775322086876e-06, "loss": 0.6214, "step": 2761 }, { "epoch": 0.45, "grad_norm": 0.9469400815668929, "learning_rate": 6.124232569837943e-06, "loss": 0.5566, "step": 2762 }, { "epoch": 0.45, "grad_norm": 1.095637070381988, "learning_rate": 6.121689511317068e-06, "loss": 0.6115, "step": 2763 }, { "epoch": 0.45, "grad_norm": 0.9268069941241458, "learning_rate": 6.11914614721705e-06, "loss": 0.5637, "step": 2764 }, { "epoch": 0.45, "grad_norm": 1.0278119700210662, "learning_rate": 6.116602478230772e-06, "loss": 0.5636, "step": 2765 }, { "epoch": 0.45, "grad_norm": 0.9288487496563996, "learning_rate": 6.1140585050512e-06, "loss": 0.591, "step": 2766 }, { "epoch": 0.45, "grad_norm": 0.9332189838564758, "learning_rate": 6.111514228371381e-06, "loss": 0.5692, "step": 2767 }, { "epoch": 0.45, "grad_norm": 0.8064784567144704, "learning_rate": 6.108969648884449e-06, "loss": 0.4666, "step": 2768 }, { "epoch": 0.45, "grad_norm": 0.9990096279018127, "learning_rate": 6.106424767283615e-06, "loss": 0.5489, "step": 2769 }, { "epoch": 0.45, "grad_norm": 0.6825400765201701, "learning_rate": 6.103879584262176e-06, "loss": 0.4957, "step": 2770 }, { "epoch": 0.45, "grad_norm": 1.034556765734561, "learning_rate": 6.101334100513508e-06, "loss": 0.5957, "step": 2771 }, { "epoch": 0.45, "grad_norm": 0.9979857980217465, "learning_rate": 6.098788316731074e-06, "loss": 0.6099, "step": 2772 }, { "epoch": 0.45, "grad_norm": 0.9479816871057386, "learning_rate": 6.096242233608414e-06, "loss": 0.6554, "step": 2773 }, { "epoch": 0.45, "grad_norm": 1.0210469732623622, "learning_rate": 6.0936958518391505e-06, "loss": 0.621, "step": 2774 }, { "epoch": 0.45, "grad_norm": 0.6353273894022693, "learning_rate": 6.0911491721169906e-06, "loss": 0.5044, "step": 2775 }, { "epoch": 0.45, "grad_norm": 0.9369604918482168, "learning_rate": 6.088602195135717e-06, "loss": 0.5492, "step": 2776 }, { "epoch": 0.45, "grad_norm": 1.0151224502365879, "learning_rate": 6.086054921589198e-06, "loss": 0.6041, "step": 2777 }, { "epoch": 0.45, "grad_norm": 0.9946196746334682, "learning_rate": 6.083507352171382e-06, "loss": 0.5566, "step": 2778 }, { "epoch": 0.45, "grad_norm": 0.9408918036691608, "learning_rate": 6.080959487576297e-06, "loss": 0.5011, "step": 2779 }, { "epoch": 0.45, "grad_norm": 0.8987212710381357, "learning_rate": 6.078411328498049e-06, "loss": 0.5316, "step": 2780 }, { "epoch": 0.45, "grad_norm": 0.9843685472932292, "learning_rate": 6.075862875630829e-06, "loss": 0.5432, "step": 2781 }, { "epoch": 0.45, "grad_norm": 1.10341551933072, "learning_rate": 6.073314129668907e-06, "loss": 0.6315, "step": 2782 }, { "epoch": 0.45, "grad_norm": 0.9665471873690706, "learning_rate": 6.070765091306628e-06, "loss": 0.5118, "step": 2783 }, { "epoch": 0.45, "grad_norm": 0.9694075445181521, "learning_rate": 6.068215761238423e-06, "loss": 0.6138, "step": 2784 }, { "epoch": 0.45, "grad_norm": 0.9658980548077123, "learning_rate": 6.065666140158799e-06, "loss": 0.6251, "step": 2785 }, { "epoch": 0.45, "grad_norm": 1.0337417258632708, "learning_rate": 6.063116228762343e-06, "loss": 0.6551, "step": 2786 }, { "epoch": 0.45, "grad_norm": 0.6889929695094947, "learning_rate": 6.060566027743721e-06, "loss": 0.528, "step": 2787 }, { "epoch": 0.45, "grad_norm": 0.9962554815461884, "learning_rate": 6.058015537797678e-06, "loss": 0.6426, "step": 2788 }, { "epoch": 0.45, "grad_norm": 0.9411791735795947, "learning_rate": 6.055464759619038e-06, "loss": 0.5736, "step": 2789 }, { "epoch": 0.45, "grad_norm": 0.9327620072832649, "learning_rate": 6.052913693902706e-06, "loss": 0.5934, "step": 2790 }, { "epoch": 0.45, "grad_norm": 0.6534439121209137, "learning_rate": 6.050362341343658e-06, "loss": 0.4981, "step": 2791 }, { "epoch": 0.45, "grad_norm": 1.041894309473969, "learning_rate": 6.047810702636956e-06, "loss": 0.5498, "step": 2792 }, { "epoch": 0.45, "grad_norm": 1.067873760685095, "learning_rate": 6.045258778477735e-06, "loss": 0.6422, "step": 2793 }, { "epoch": 0.45, "grad_norm": 0.9260814996905765, "learning_rate": 6.042706569561209e-06, "loss": 0.596, "step": 2794 }, { "epoch": 0.45, "grad_norm": 1.1061981823310687, "learning_rate": 6.040154076582672e-06, "loss": 0.6022, "step": 2795 }, { "epoch": 0.45, "grad_norm": 1.0867869512914343, "learning_rate": 6.0376013002374924e-06, "loss": 0.5846, "step": 2796 }, { "epoch": 0.45, "grad_norm": 1.1323472762612898, "learning_rate": 6.035048241221118e-06, "loss": 0.6493, "step": 2797 }, { "epoch": 0.45, "grad_norm": 0.9956746162157875, "learning_rate": 6.032494900229069e-06, "loss": 0.6626, "step": 2798 }, { "epoch": 0.45, "grad_norm": 1.0220848694246965, "learning_rate": 6.02994127795695e-06, "loss": 0.657, "step": 2799 }, { "epoch": 0.45, "grad_norm": 0.7519369907581069, "learning_rate": 6.027387375100435e-06, "loss": 0.4794, "step": 2800 }, { "epoch": 0.45, "grad_norm": 1.2510138519067262, "learning_rate": 6.024833192355282e-06, "loss": 0.6645, "step": 2801 }, { "epoch": 0.45, "grad_norm": 0.9978137367219454, "learning_rate": 6.022278730417315e-06, "loss": 0.6178, "step": 2802 }, { "epoch": 0.45, "grad_norm": 0.879076805279253, "learning_rate": 6.019723989982444e-06, "loss": 0.6214, "step": 2803 }, { "epoch": 0.45, "grad_norm": 0.9526879217424523, "learning_rate": 6.0171689717466485e-06, "loss": 0.5267, "step": 2804 }, { "epoch": 0.45, "grad_norm": 1.090810141366759, "learning_rate": 6.014613676405985e-06, "loss": 0.6342, "step": 2805 }, { "epoch": 0.45, "grad_norm": 0.9534369894209447, "learning_rate": 6.01205810465659e-06, "loss": 0.616, "step": 2806 }, { "epoch": 0.45, "grad_norm": 1.1192545139697414, "learning_rate": 6.009502257194669e-06, "loss": 0.7187, "step": 2807 }, { "epoch": 0.45, "grad_norm": 0.9557433902679736, "learning_rate": 6.0069461347165084e-06, "loss": 0.5943, "step": 2808 }, { "epoch": 0.45, "grad_norm": 1.0061525776555953, "learning_rate": 6.0043897379184615e-06, "loss": 0.662, "step": 2809 }, { "epoch": 0.45, "grad_norm": 0.7241871984498629, "learning_rate": 6.001833067496964e-06, "loss": 0.5247, "step": 2810 }, { "epoch": 0.45, "grad_norm": 0.9974530270710181, "learning_rate": 5.999276124148525e-06, "loss": 0.5649, "step": 2811 }, { "epoch": 0.45, "grad_norm": 0.9335744127886252, "learning_rate": 5.996718908569723e-06, "loss": 0.6428, "step": 2812 }, { "epoch": 0.45, "grad_norm": 0.6433130535567881, "learning_rate": 5.9941614214572155e-06, "loss": 0.4537, "step": 2813 }, { "epoch": 0.45, "grad_norm": 1.0485140470847456, "learning_rate": 5.9916036635077325e-06, "loss": 0.5831, "step": 2814 }, { "epoch": 0.45, "grad_norm": 0.6681916084973161, "learning_rate": 5.98904563541808e-06, "loss": 0.4931, "step": 2815 }, { "epoch": 0.45, "grad_norm": 0.945187241558846, "learning_rate": 5.986487337885129e-06, "loss": 0.5558, "step": 2816 }, { "epoch": 0.45, "grad_norm": 1.0005775416640104, "learning_rate": 5.983928771605839e-06, "loss": 0.5851, "step": 2817 }, { "epoch": 0.45, "grad_norm": 0.9864974883270706, "learning_rate": 5.981369937277226e-06, "loss": 0.5818, "step": 2818 }, { "epoch": 0.45, "grad_norm": 0.9434374588602612, "learning_rate": 5.978810835596392e-06, "loss": 0.47, "step": 2819 }, { "epoch": 0.45, "grad_norm": 0.9362136348598245, "learning_rate": 5.976251467260505e-06, "loss": 0.588, "step": 2820 }, { "epoch": 0.45, "grad_norm": 0.9543007793584634, "learning_rate": 5.9736918329668094e-06, "loss": 0.6151, "step": 2821 }, { "epoch": 0.45, "grad_norm": 0.9769119770876596, "learning_rate": 5.971131933412617e-06, "loss": 0.5705, "step": 2822 }, { "epoch": 0.45, "grad_norm": 1.065116302963071, "learning_rate": 5.9685717692953175e-06, "loss": 0.7212, "step": 2823 }, { "epoch": 0.46, "grad_norm": 0.9654754066893405, "learning_rate": 5.966011341312368e-06, "loss": 0.5285, "step": 2824 }, { "epoch": 0.46, "grad_norm": 1.0447922236417653, "learning_rate": 5.9634506501613006e-06, "loss": 0.6868, "step": 2825 }, { "epoch": 0.46, "grad_norm": 1.0907522488480732, "learning_rate": 5.960889696539719e-06, "loss": 0.5415, "step": 2826 }, { "epoch": 0.46, "grad_norm": 0.9861072523253314, "learning_rate": 5.958328481145297e-06, "loss": 0.6213, "step": 2827 }, { "epoch": 0.46, "grad_norm": 0.9826569307630312, "learning_rate": 5.955767004675781e-06, "loss": 0.5936, "step": 2828 }, { "epoch": 0.46, "grad_norm": 0.8880778126550937, "learning_rate": 5.953205267828985e-06, "loss": 0.5613, "step": 2829 }, { "epoch": 0.46, "grad_norm": 1.061051830151999, "learning_rate": 5.950643271302802e-06, "loss": 0.666, "step": 2830 }, { "epoch": 0.46, "grad_norm": 0.9862833098359478, "learning_rate": 5.948081015795184e-06, "loss": 0.6226, "step": 2831 }, { "epoch": 0.46, "grad_norm": 1.0115595722546844, "learning_rate": 5.945518502004165e-06, "loss": 0.6191, "step": 2832 }, { "epoch": 0.46, "grad_norm": 0.9693306457139176, "learning_rate": 5.942955730627843e-06, "loss": 0.6121, "step": 2833 }, { "epoch": 0.46, "grad_norm": 0.6950079262184486, "learning_rate": 5.940392702364387e-06, "loss": 0.4948, "step": 2834 }, { "epoch": 0.46, "grad_norm": 1.0067215949947752, "learning_rate": 5.9378294179120355e-06, "loss": 0.6243, "step": 2835 }, { "epoch": 0.46, "grad_norm": 1.0260249904961083, "learning_rate": 5.935265877969101e-06, "loss": 0.6728, "step": 2836 }, { "epoch": 0.46, "grad_norm": 0.6741729777060154, "learning_rate": 5.932702083233959e-06, "loss": 0.4638, "step": 2837 }, { "epoch": 0.46, "grad_norm": 1.0443448028452096, "learning_rate": 5.9301380344050595e-06, "loss": 0.5679, "step": 2838 }, { "epoch": 0.46, "grad_norm": 1.0515865803300575, "learning_rate": 5.927573732180923e-06, "loss": 0.593, "step": 2839 }, { "epoch": 0.46, "grad_norm": 1.0009193903112188, "learning_rate": 5.925009177260128e-06, "loss": 0.5751, "step": 2840 }, { "epoch": 0.46, "grad_norm": 1.040483844679302, "learning_rate": 5.922444370341339e-06, "loss": 0.6243, "step": 2841 }, { "epoch": 0.46, "grad_norm": 1.0243246247365925, "learning_rate": 5.919879312123276e-06, "loss": 0.679, "step": 2842 }, { "epoch": 0.46, "grad_norm": 1.0714410709058184, "learning_rate": 5.91731400330473e-06, "loss": 0.6655, "step": 2843 }, { "epoch": 0.46, "grad_norm": 0.6576500400723551, "learning_rate": 5.914748444584565e-06, "loss": 0.4861, "step": 2844 }, { "epoch": 0.46, "grad_norm": 0.9951407951673544, "learning_rate": 5.912182636661707e-06, "loss": 0.6328, "step": 2845 }, { "epoch": 0.46, "grad_norm": 0.96199168652253, "learning_rate": 5.9096165802351555e-06, "loss": 0.673, "step": 2846 }, { "epoch": 0.46, "grad_norm": 1.0435814384747515, "learning_rate": 5.907050276003974e-06, "loss": 0.6391, "step": 2847 }, { "epoch": 0.46, "grad_norm": 0.9814886288719546, "learning_rate": 5.904483724667294e-06, "loss": 0.6067, "step": 2848 }, { "epoch": 0.46, "grad_norm": 0.9533904715207262, "learning_rate": 5.901916926924314e-06, "loss": 0.6376, "step": 2849 }, { "epoch": 0.46, "grad_norm": 1.14037421516871, "learning_rate": 5.899349883474303e-06, "loss": 0.6104, "step": 2850 }, { "epoch": 0.46, "grad_norm": 1.1356659729894598, "learning_rate": 5.896782595016594e-06, "loss": 0.6797, "step": 2851 }, { "epoch": 0.46, "grad_norm": 0.9571935450125075, "learning_rate": 5.894215062250584e-06, "loss": 0.5546, "step": 2852 }, { "epoch": 0.46, "grad_norm": 1.0448126872165153, "learning_rate": 5.891647285875742e-06, "loss": 0.6283, "step": 2853 }, { "epoch": 0.46, "grad_norm": 1.0202340053770433, "learning_rate": 5.889079266591602e-06, "loss": 0.632, "step": 2854 }, { "epoch": 0.46, "grad_norm": 0.7245235718193215, "learning_rate": 5.886511005097762e-06, "loss": 0.5008, "step": 2855 }, { "epoch": 0.46, "grad_norm": 1.037303779554695, "learning_rate": 5.883942502093887e-06, "loss": 0.7077, "step": 2856 }, { "epoch": 0.46, "grad_norm": 0.9626651622238032, "learning_rate": 5.881373758279709e-06, "loss": 0.5808, "step": 2857 }, { "epoch": 0.46, "grad_norm": 1.0799903077324495, "learning_rate": 5.878804774355024e-06, "loss": 0.5611, "step": 2858 }, { "epoch": 0.46, "grad_norm": 0.9119368640953579, "learning_rate": 5.876235551019692e-06, "loss": 0.5494, "step": 2859 }, { "epoch": 0.46, "grad_norm": 0.9325781799811966, "learning_rate": 5.873666088973644e-06, "loss": 0.6375, "step": 2860 }, { "epoch": 0.46, "grad_norm": 0.9580425266679156, "learning_rate": 5.871096388916873e-06, "loss": 0.5708, "step": 2861 }, { "epoch": 0.46, "grad_norm": 0.9949791635089573, "learning_rate": 5.86852645154943e-06, "loss": 0.6741, "step": 2862 }, { "epoch": 0.46, "grad_norm": 0.8429102684011858, "learning_rate": 5.865956277571441e-06, "loss": 0.5482, "step": 2863 }, { "epoch": 0.46, "grad_norm": 0.7153777408979878, "learning_rate": 5.863385867683093e-06, "loss": 0.4925, "step": 2864 }, { "epoch": 0.46, "grad_norm": 1.021691894737005, "learning_rate": 5.860815222584635e-06, "loss": 0.5671, "step": 2865 }, { "epoch": 0.46, "grad_norm": 1.04003435400023, "learning_rate": 5.858244342976383e-06, "loss": 0.5376, "step": 2866 }, { "epoch": 0.46, "grad_norm": 1.0153299438045589, "learning_rate": 5.855673229558711e-06, "loss": 0.6282, "step": 2867 }, { "epoch": 0.46, "grad_norm": 0.9760824607151967, "learning_rate": 5.853101883032069e-06, "loss": 0.5719, "step": 2868 }, { "epoch": 0.46, "grad_norm": 0.9229037560765895, "learning_rate": 5.850530304096956e-06, "loss": 0.5132, "step": 2869 }, { "epoch": 0.46, "grad_norm": 1.134357325170705, "learning_rate": 5.847958493453941e-06, "loss": 0.6539, "step": 2870 }, { "epoch": 0.46, "grad_norm": 1.0145327777779447, "learning_rate": 5.845386451803661e-06, "loss": 0.6205, "step": 2871 }, { "epoch": 0.46, "grad_norm": 0.9325220494417922, "learning_rate": 5.8428141798468086e-06, "loss": 0.5998, "step": 2872 }, { "epoch": 0.46, "grad_norm": 0.9665798595348647, "learning_rate": 5.84024167828414e-06, "loss": 0.5656, "step": 2873 }, { "epoch": 0.46, "grad_norm": 0.9660355541230701, "learning_rate": 5.837668947816476e-06, "loss": 0.6228, "step": 2874 }, { "epoch": 0.46, "grad_norm": 0.9758271768920974, "learning_rate": 5.835095989144701e-06, "loss": 0.4941, "step": 2875 }, { "epoch": 0.46, "grad_norm": 0.9952800449274192, "learning_rate": 5.83252280296976e-06, "loss": 0.5789, "step": 2876 }, { "epoch": 0.46, "grad_norm": 1.0071803209791454, "learning_rate": 5.8299493899926584e-06, "loss": 0.6329, "step": 2877 }, { "epoch": 0.46, "grad_norm": 1.0330822679323406, "learning_rate": 5.827375750914463e-06, "loss": 0.5953, "step": 2878 }, { "epoch": 0.46, "grad_norm": 0.9920242991794651, "learning_rate": 5.82480188643631e-06, "loss": 0.5618, "step": 2879 }, { "epoch": 0.46, "grad_norm": 0.908463964742955, "learning_rate": 5.822227797259385e-06, "loss": 0.5591, "step": 2880 }, { "epoch": 0.46, "grad_norm": 0.9734249425447155, "learning_rate": 5.819653484084944e-06, "loss": 0.5582, "step": 2881 }, { "epoch": 0.46, "grad_norm": 1.0482688264227953, "learning_rate": 5.817078947614299e-06, "loss": 0.6518, "step": 2882 }, { "epoch": 0.46, "grad_norm": 0.9982005040609158, "learning_rate": 5.814504188548828e-06, "loss": 0.5804, "step": 2883 }, { "epoch": 0.46, "grad_norm": 1.0406878893692733, "learning_rate": 5.811929207589963e-06, "loss": 0.6547, "step": 2884 }, { "epoch": 0.46, "grad_norm": 0.9929599008526521, "learning_rate": 5.8093540054392e-06, "loss": 0.6201, "step": 2885 }, { "epoch": 0.46, "grad_norm": 1.1267583672529835, "learning_rate": 5.806778582798096e-06, "loss": 0.6606, "step": 2886 }, { "epoch": 0.47, "grad_norm": 1.0224621836707042, "learning_rate": 5.804202940368267e-06, "loss": 0.624, "step": 2887 }, { "epoch": 0.47, "grad_norm": 0.9707155779879098, "learning_rate": 5.801627078851391e-06, "loss": 0.4935, "step": 2888 }, { "epoch": 0.47, "grad_norm": 0.6748279549493201, "learning_rate": 5.799050998949199e-06, "loss": 0.4731, "step": 2889 }, { "epoch": 0.47, "grad_norm": 1.05949852412725, "learning_rate": 5.796474701363491e-06, "loss": 0.6366, "step": 2890 }, { "epoch": 0.47, "grad_norm": 0.9535495776999585, "learning_rate": 5.793898186796118e-06, "loss": 0.5655, "step": 2891 }, { "epoch": 0.47, "grad_norm": 0.9198737751283886, "learning_rate": 5.791321455948996e-06, "loss": 0.5518, "step": 2892 }, { "epoch": 0.47, "grad_norm": 1.0418289728507253, "learning_rate": 5.7887445095240954e-06, "loss": 0.6887, "step": 2893 }, { "epoch": 0.47, "grad_norm": 0.9784598122698184, "learning_rate": 5.78616734822345e-06, "loss": 0.5411, "step": 2894 }, { "epoch": 0.47, "grad_norm": 0.9686931736203592, "learning_rate": 5.783589972749149e-06, "loss": 0.6016, "step": 2895 }, { "epoch": 0.47, "grad_norm": 0.7065970217542885, "learning_rate": 5.781012383803337e-06, "loss": 0.4934, "step": 2896 }, { "epoch": 0.47, "grad_norm": 0.9767568755009245, "learning_rate": 5.7784345820882235e-06, "loss": 0.6501, "step": 2897 }, { "epoch": 0.47, "grad_norm": 0.9842211547307382, "learning_rate": 5.775856568306073e-06, "loss": 0.6504, "step": 2898 }, { "epoch": 0.47, "grad_norm": 0.9187687725756604, "learning_rate": 5.773278343159207e-06, "loss": 0.6305, "step": 2899 }, { "epoch": 0.47, "grad_norm": 0.9478112891604635, "learning_rate": 5.770699907350003e-06, "loss": 0.606, "step": 2900 }, { "epoch": 0.47, "grad_norm": 0.9970778068140749, "learning_rate": 5.768121261580901e-06, "loss": 0.6595, "step": 2901 }, { "epoch": 0.47, "grad_norm": 1.0027512925106365, "learning_rate": 5.765542406554394e-06, "loss": 0.611, "step": 2902 }, { "epoch": 0.47, "grad_norm": 0.6473420049760968, "learning_rate": 5.762963342973033e-06, "loss": 0.4431, "step": 2903 }, { "epoch": 0.47, "grad_norm": 0.9995884499091411, "learning_rate": 5.7603840715394264e-06, "loss": 0.616, "step": 2904 }, { "epoch": 0.47, "grad_norm": 0.9601077924899245, "learning_rate": 5.757804592956237e-06, "loss": 0.5939, "step": 2905 }, { "epoch": 0.47, "grad_norm": 0.9412940288685271, "learning_rate": 5.75522490792619e-06, "loss": 0.5359, "step": 2906 }, { "epoch": 0.47, "grad_norm": 0.9398322951543423, "learning_rate": 5.752645017152058e-06, "loss": 0.5587, "step": 2907 }, { "epoch": 0.47, "grad_norm": 1.0515066278753094, "learning_rate": 5.750064921336679e-06, "loss": 0.6334, "step": 2908 }, { "epoch": 0.47, "grad_norm": 0.9814663356076286, "learning_rate": 5.7474846211829376e-06, "loss": 0.5837, "step": 2909 }, { "epoch": 0.47, "grad_norm": 0.975444376733813, "learning_rate": 5.7449041173937825e-06, "loss": 0.5639, "step": 2910 }, { "epoch": 0.47, "grad_norm": 0.9819154176096044, "learning_rate": 5.7423234106722105e-06, "loss": 0.58, "step": 2911 }, { "epoch": 0.47, "grad_norm": 0.9355672498184845, "learning_rate": 5.739742501721282e-06, "loss": 0.6102, "step": 2912 }, { "epoch": 0.47, "grad_norm": 1.042837747463878, "learning_rate": 5.7371613912441015e-06, "loss": 0.6252, "step": 2913 }, { "epoch": 0.47, "grad_norm": 0.9489000653581348, "learning_rate": 5.73458007994384e-06, "loss": 0.5359, "step": 2914 }, { "epoch": 0.47, "grad_norm": 1.0667492205073674, "learning_rate": 5.731998568523716e-06, "loss": 0.5718, "step": 2915 }, { "epoch": 0.47, "grad_norm": 1.0658525638087435, "learning_rate": 5.729416857687002e-06, "loss": 0.572, "step": 2916 }, { "epoch": 0.47, "grad_norm": 0.906314885453991, "learning_rate": 5.726834948137033e-06, "loss": 0.5979, "step": 2917 }, { "epoch": 0.47, "grad_norm": 0.6527965997067648, "learning_rate": 5.7242528405771865e-06, "loss": 0.5006, "step": 2918 }, { "epoch": 0.47, "grad_norm": 0.9745269175018161, "learning_rate": 5.721670535710902e-06, "loss": 0.59, "step": 2919 }, { "epoch": 0.47, "grad_norm": 0.9606615458988463, "learning_rate": 5.7190880342416725e-06, "loss": 0.5138, "step": 2920 }, { "epoch": 0.47, "grad_norm": 0.9215391451222117, "learning_rate": 5.7165053368730395e-06, "loss": 0.5162, "step": 2921 }, { "epoch": 0.47, "grad_norm": 0.9784007233180673, "learning_rate": 5.713922444308601e-06, "loss": 0.6312, "step": 2922 }, { "epoch": 0.47, "grad_norm": 1.0467927211741697, "learning_rate": 5.711339357252011e-06, "loss": 0.6162, "step": 2923 }, { "epoch": 0.47, "grad_norm": 1.0149131340285158, "learning_rate": 5.708756076406972e-06, "loss": 0.6831, "step": 2924 }, { "epoch": 0.47, "grad_norm": 0.9229247982096688, "learning_rate": 5.706172602477239e-06, "loss": 0.565, "step": 2925 }, { "epoch": 0.47, "grad_norm": 0.9466669804342748, "learning_rate": 5.703588936166625e-06, "loss": 0.5746, "step": 2926 }, { "epoch": 0.47, "grad_norm": 1.0500888447560603, "learning_rate": 5.701005078178989e-06, "loss": 0.6598, "step": 2927 }, { "epoch": 0.47, "grad_norm": 0.971042840476184, "learning_rate": 5.6984210292182485e-06, "loss": 0.6464, "step": 2928 }, { "epoch": 0.47, "grad_norm": 0.6460691474961241, "learning_rate": 5.695836789988365e-06, "loss": 0.4842, "step": 2929 }, { "epoch": 0.47, "grad_norm": 0.9339738886626757, "learning_rate": 5.6932523611933595e-06, "loss": 0.5478, "step": 2930 }, { "epoch": 0.47, "grad_norm": 1.0590730115011948, "learning_rate": 5.690667743537303e-06, "loss": 0.6744, "step": 2931 }, { "epoch": 0.47, "grad_norm": 1.0031172910016795, "learning_rate": 5.688082937724314e-06, "loss": 0.5452, "step": 2932 }, { "epoch": 0.47, "grad_norm": 0.9859394480193693, "learning_rate": 5.685497944458564e-06, "loss": 0.6006, "step": 2933 }, { "epoch": 0.47, "grad_norm": 1.0720091468741184, "learning_rate": 5.68291276444428e-06, "loss": 0.686, "step": 2934 }, { "epoch": 0.47, "grad_norm": 1.0638350428115457, "learning_rate": 5.680327398385735e-06, "loss": 0.6147, "step": 2935 }, { "epoch": 0.47, "grad_norm": 1.0443623935295188, "learning_rate": 5.677741846987251e-06, "loss": 0.5785, "step": 2936 }, { "epoch": 0.47, "grad_norm": 0.9460562663119576, "learning_rate": 5.675156110953208e-06, "loss": 0.591, "step": 2937 }, { "epoch": 0.47, "grad_norm": 0.9840086478433832, "learning_rate": 5.672570190988027e-06, "loss": 0.5703, "step": 2938 }, { "epoch": 0.47, "grad_norm": 0.9655825815455247, "learning_rate": 5.6699840877961884e-06, "loss": 0.6394, "step": 2939 }, { "epoch": 0.47, "grad_norm": 0.9383340524609113, "learning_rate": 5.667397802082216e-06, "loss": 0.5584, "step": 2940 }, { "epoch": 0.47, "grad_norm": 0.6824580848495375, "learning_rate": 5.664811334550685e-06, "loss": 0.4696, "step": 2941 }, { "epoch": 0.47, "grad_norm": 0.9734839878046732, "learning_rate": 5.6622246859062205e-06, "loss": 0.6223, "step": 2942 }, { "epoch": 0.47, "grad_norm": 0.9976258656055123, "learning_rate": 5.6596378568534984e-06, "loss": 0.5596, "step": 2943 }, { "epoch": 0.47, "grad_norm": 1.1616181664783893, "learning_rate": 5.657050848097239e-06, "loss": 0.6638, "step": 2944 }, { "epoch": 0.47, "grad_norm": 1.054097453906456, "learning_rate": 5.654463660342219e-06, "loss": 0.5254, "step": 2945 }, { "epoch": 0.47, "grad_norm": 0.9798027750585353, "learning_rate": 5.651876294293256e-06, "loss": 0.5791, "step": 2946 }, { "epoch": 0.47, "grad_norm": 1.034999689480227, "learning_rate": 5.649288750655222e-06, "loss": 0.61, "step": 2947 }, { "epoch": 0.47, "grad_norm": 0.9716036140434269, "learning_rate": 5.646701030133036e-06, "loss": 0.6397, "step": 2948 }, { "epoch": 0.48, "grad_norm": 1.023438271244348, "learning_rate": 5.64411313343166e-06, "loss": 0.6162, "step": 2949 }, { "epoch": 0.48, "grad_norm": 1.011932914375525, "learning_rate": 5.641525061256113e-06, "loss": 0.5884, "step": 2950 }, { "epoch": 0.48, "grad_norm": 1.0566773371453246, "learning_rate": 5.638936814311455e-06, "loss": 0.5718, "step": 2951 }, { "epoch": 0.48, "grad_norm": 0.9660460607165937, "learning_rate": 5.636348393302798e-06, "loss": 0.6475, "step": 2952 }, { "epoch": 0.48, "grad_norm": 0.9415786387167717, "learning_rate": 5.633759798935296e-06, "loss": 0.628, "step": 2953 }, { "epoch": 0.48, "grad_norm": 1.0725500159297616, "learning_rate": 5.631171031914155e-06, "loss": 0.6822, "step": 2954 }, { "epoch": 0.48, "grad_norm": 1.046320208223975, "learning_rate": 5.628582092944628e-06, "loss": 0.5688, "step": 2955 }, { "epoch": 0.48, "grad_norm": 0.984616930248111, "learning_rate": 5.625992982732011e-06, "loss": 0.6191, "step": 2956 }, { "epoch": 0.48, "grad_norm": 1.014814606010456, "learning_rate": 5.62340370198165e-06, "loss": 0.5978, "step": 2957 }, { "epoch": 0.48, "grad_norm": 1.0526415672892597, "learning_rate": 5.620814251398936e-06, "loss": 0.6085, "step": 2958 }, { "epoch": 0.48, "grad_norm": 0.9986443605901345, "learning_rate": 5.6182246316893086e-06, "loss": 0.622, "step": 2959 }, { "epoch": 0.48, "grad_norm": 0.9748661628529793, "learning_rate": 5.615634843558248e-06, "loss": 0.5738, "step": 2960 }, { "epoch": 0.48, "grad_norm": 1.0362174043701942, "learning_rate": 5.613044887711288e-06, "loss": 0.6715, "step": 2961 }, { "epoch": 0.48, "grad_norm": 1.0854737732401636, "learning_rate": 5.6104547648539995e-06, "loss": 0.6683, "step": 2962 }, { "epoch": 0.48, "grad_norm": 0.9787029480476527, "learning_rate": 5.6078644756920066e-06, "loss": 0.5822, "step": 2963 }, { "epoch": 0.48, "grad_norm": 1.0308883299220237, "learning_rate": 5.605274020930975e-06, "loss": 0.6706, "step": 2964 }, { "epoch": 0.48, "grad_norm": 1.0433236334143845, "learning_rate": 5.6026834012766155e-06, "loss": 0.5991, "step": 2965 }, { "epoch": 0.48, "grad_norm": 1.1390599260896506, "learning_rate": 5.600092617434686e-06, "loss": 0.6231, "step": 2966 }, { "epoch": 0.48, "grad_norm": 0.9848717212754493, "learning_rate": 5.597501670110984e-06, "loss": 0.5963, "step": 2967 }, { "epoch": 0.48, "grad_norm": 1.0317528875631188, "learning_rate": 5.594910560011356e-06, "loss": 0.6093, "step": 2968 }, { "epoch": 0.48, "grad_norm": 0.9590709450128417, "learning_rate": 5.592319287841694e-06, "loss": 0.6131, "step": 2969 }, { "epoch": 0.48, "grad_norm": 0.9969229727161726, "learning_rate": 5.589727854307932e-06, "loss": 0.5557, "step": 2970 }, { "epoch": 0.48, "grad_norm": 1.0186536378162427, "learning_rate": 5.587136260116044e-06, "loss": 0.591, "step": 2971 }, { "epoch": 0.48, "grad_norm": 0.9823264422474592, "learning_rate": 5.584544505972056e-06, "loss": 0.6239, "step": 2972 }, { "epoch": 0.48, "grad_norm": 0.9911094580054917, "learning_rate": 5.58195259258203e-06, "loss": 0.5878, "step": 2973 }, { "epoch": 0.48, "grad_norm": 0.9698722558679959, "learning_rate": 5.5793605206520776e-06, "loss": 0.6192, "step": 2974 }, { "epoch": 0.48, "grad_norm": 1.027272684064404, "learning_rate": 5.576768290888348e-06, "loss": 0.6053, "step": 2975 }, { "epoch": 0.48, "grad_norm": 0.9149003480513932, "learning_rate": 5.574175903997038e-06, "loss": 0.5614, "step": 2976 }, { "epoch": 0.48, "grad_norm": 1.011394312007025, "learning_rate": 5.571583360684385e-06, "loss": 0.604, "step": 2977 }, { "epoch": 0.48, "grad_norm": 0.9940550443182878, "learning_rate": 5.568990661656668e-06, "loss": 0.6088, "step": 2978 }, { "epoch": 0.48, "grad_norm": 0.9971869309873015, "learning_rate": 5.566397807620209e-06, "loss": 0.6147, "step": 2979 }, { "epoch": 0.48, "grad_norm": 1.1043937946642919, "learning_rate": 5.563804799281377e-06, "loss": 0.6082, "step": 2980 }, { "epoch": 0.48, "grad_norm": 0.9330190083213578, "learning_rate": 5.561211637346576e-06, "loss": 0.5153, "step": 2981 }, { "epoch": 0.48, "grad_norm": 0.973954067031446, "learning_rate": 5.558618322522253e-06, "loss": 0.6328, "step": 2982 }, { "epoch": 0.48, "grad_norm": 1.0117180263642327, "learning_rate": 5.556024855514904e-06, "loss": 0.6376, "step": 2983 }, { "epoch": 0.48, "grad_norm": 1.0677488480021098, "learning_rate": 5.553431237031054e-06, "loss": 0.6833, "step": 2984 }, { "epoch": 0.48, "grad_norm": 0.6389923541318332, "learning_rate": 5.5508374677772806e-06, "loss": 0.4692, "step": 2985 }, { "epoch": 0.48, "grad_norm": 0.9729807122934176, "learning_rate": 5.548243548460197e-06, "loss": 0.5252, "step": 2986 }, { "epoch": 0.48, "grad_norm": 0.9365279704550078, "learning_rate": 5.545649479786459e-06, "loss": 0.5309, "step": 2987 }, { "epoch": 0.48, "grad_norm": 1.0561609247980046, "learning_rate": 5.543055262462762e-06, "loss": 0.6651, "step": 2988 }, { "epoch": 0.48, "grad_norm": 0.9665930783657178, "learning_rate": 5.540460897195842e-06, "loss": 0.5441, "step": 2989 }, { "epoch": 0.48, "grad_norm": 1.0446924921264358, "learning_rate": 5.537866384692474e-06, "loss": 0.6535, "step": 2990 }, { "epoch": 0.48, "grad_norm": 0.9863250907770889, "learning_rate": 5.535271725659478e-06, "loss": 0.5865, "step": 2991 }, { "epoch": 0.48, "grad_norm": 0.9839658211112265, "learning_rate": 5.53267692080371e-06, "loss": 0.5171, "step": 2992 }, { "epoch": 0.48, "grad_norm": 1.007925281747917, "learning_rate": 5.530081970832063e-06, "loss": 0.6195, "step": 2993 }, { "epoch": 0.48, "grad_norm": 0.609024433343191, "learning_rate": 5.527486876451478e-06, "loss": 0.4639, "step": 2994 }, { "epoch": 0.48, "grad_norm": 0.9271836706238477, "learning_rate": 5.524891638368926e-06, "loss": 0.517, "step": 2995 }, { "epoch": 0.48, "grad_norm": 0.994170977424247, "learning_rate": 5.522296257291422e-06, "loss": 0.6015, "step": 2996 }, { "epoch": 0.48, "grad_norm": 1.1118335253691134, "learning_rate": 5.519700733926022e-06, "loss": 0.6087, "step": 2997 }, { "epoch": 0.48, "grad_norm": 0.6571332502038161, "learning_rate": 5.517105068979816e-06, "loss": 0.4622, "step": 2998 }, { "epoch": 0.48, "grad_norm": 0.9272507738478353, "learning_rate": 5.514509263159935e-06, "loss": 0.6516, "step": 2999 }, { "epoch": 0.48, "grad_norm": 1.0462342525215655, "learning_rate": 5.511913317173548e-06, "loss": 0.6291, "step": 3000 }, { "epoch": 0.48, "grad_norm": 0.936869178129046, "learning_rate": 5.509317231727863e-06, "loss": 0.5081, "step": 3001 }, { "epoch": 0.48, "grad_norm": 1.029494923829399, "learning_rate": 5.506721007530125e-06, "loss": 0.5566, "step": 3002 }, { "epoch": 0.48, "grad_norm": 0.9725678518237109, "learning_rate": 5.504124645287616e-06, "loss": 0.569, "step": 3003 }, { "epoch": 0.48, "grad_norm": 1.0306481944792303, "learning_rate": 5.501528145707657e-06, "loss": 0.5281, "step": 3004 }, { "epoch": 0.48, "grad_norm": 0.9443282740213054, "learning_rate": 5.498931509497607e-06, "loss": 0.6326, "step": 3005 }, { "epoch": 0.48, "grad_norm": 0.9038622756152291, "learning_rate": 5.4963347373648615e-06, "loss": 0.5238, "step": 3006 }, { "epoch": 0.48, "grad_norm": 0.9778305294949731, "learning_rate": 5.49373783001685e-06, "loss": 0.5942, "step": 3007 }, { "epoch": 0.48, "grad_norm": 0.9653584012944149, "learning_rate": 5.4911407881610455e-06, "loss": 0.613, "step": 3008 }, { "epoch": 0.48, "grad_norm": 1.0003753597047107, "learning_rate": 5.48854361250495e-06, "loss": 0.6187, "step": 3009 }, { "epoch": 0.48, "grad_norm": 1.070530346537155, "learning_rate": 5.48594630375611e-06, "loss": 0.6225, "step": 3010 }, { "epoch": 0.49, "grad_norm": 0.9367117191076113, "learning_rate": 5.4833488626221e-06, "loss": 0.5321, "step": 3011 }, { "epoch": 0.49, "grad_norm": 0.7029591564399963, "learning_rate": 5.480751289810537e-06, "loss": 0.5008, "step": 3012 }, { "epoch": 0.49, "grad_norm": 1.0371518262102353, "learning_rate": 5.478153586029069e-06, "loss": 0.5597, "step": 3013 }, { "epoch": 0.49, "grad_norm": 1.070595877657481, "learning_rate": 5.4755557519853854e-06, "loss": 0.6365, "step": 3014 }, { "epoch": 0.49, "grad_norm": 1.0666471393524088, "learning_rate": 5.472957788387206e-06, "loss": 0.6621, "step": 3015 }, { "epoch": 0.49, "grad_norm": 1.013622463994857, "learning_rate": 5.470359695942289e-06, "loss": 0.6112, "step": 3016 }, { "epoch": 0.49, "grad_norm": 1.1751072458337048, "learning_rate": 5.467761475358423e-06, "loss": 0.6891, "step": 3017 }, { "epoch": 0.49, "grad_norm": 0.9358695068154921, "learning_rate": 5.465163127343438e-06, "loss": 0.5813, "step": 3018 }, { "epoch": 0.49, "grad_norm": 1.047044540762912, "learning_rate": 5.462564652605197e-06, "loss": 0.6275, "step": 3019 }, { "epoch": 0.49, "grad_norm": 0.9099149676891721, "learning_rate": 5.459966051851589e-06, "loss": 0.5174, "step": 3020 }, { "epoch": 0.49, "grad_norm": 0.9671128862280134, "learning_rate": 5.457367325790555e-06, "loss": 0.6282, "step": 3021 }, { "epoch": 0.49, "grad_norm": 1.113845901514489, "learning_rate": 5.454768475130051e-06, "loss": 0.646, "step": 3022 }, { "epoch": 0.49, "grad_norm": 1.0459357585850415, "learning_rate": 5.452169500578079e-06, "loss": 0.6456, "step": 3023 }, { "epoch": 0.49, "grad_norm": 0.922231377513865, "learning_rate": 5.449570402842671e-06, "loss": 0.5915, "step": 3024 }, { "epoch": 0.49, "grad_norm": 0.9392697318301192, "learning_rate": 5.446971182631893e-06, "loss": 0.6032, "step": 3025 }, { "epoch": 0.49, "grad_norm": 1.1390953849131236, "learning_rate": 5.444371840653843e-06, "loss": 0.6191, "step": 3026 }, { "epoch": 0.49, "grad_norm": 0.9539435396481226, "learning_rate": 5.441772377616656e-06, "loss": 0.5335, "step": 3027 }, { "epoch": 0.49, "grad_norm": 1.0552984394827207, "learning_rate": 5.439172794228495e-06, "loss": 0.5907, "step": 3028 }, { "epoch": 0.49, "grad_norm": 0.9659820268226376, "learning_rate": 5.436573091197559e-06, "loss": 0.6172, "step": 3029 }, { "epoch": 0.49, "grad_norm": 0.8864818214482333, "learning_rate": 5.43397326923208e-06, "loss": 0.4983, "step": 3030 }, { "epoch": 0.49, "grad_norm": 1.0274360810404572, "learning_rate": 5.431373329040317e-06, "loss": 0.5689, "step": 3031 }, { "epoch": 0.49, "grad_norm": 0.9950136640603137, "learning_rate": 5.42877327133057e-06, "loss": 0.5067, "step": 3032 }, { "epoch": 0.49, "grad_norm": 0.9591756514245183, "learning_rate": 5.426173096811166e-06, "loss": 0.5689, "step": 3033 }, { "epoch": 0.49, "grad_norm": 1.0299236156333356, "learning_rate": 5.423572806190461e-06, "loss": 0.6553, "step": 3034 }, { "epoch": 0.49, "grad_norm": 1.0771270259381647, "learning_rate": 5.420972400176848e-06, "loss": 0.6876, "step": 3035 }, { "epoch": 0.49, "grad_norm": 0.9524806173014413, "learning_rate": 5.41837187947875e-06, "loss": 0.5057, "step": 3036 }, { "epoch": 0.49, "grad_norm": 0.647660965685755, "learning_rate": 5.41577124480462e-06, "loss": 0.4907, "step": 3037 }, { "epoch": 0.49, "grad_norm": 1.0137313883631436, "learning_rate": 5.413170496862941e-06, "loss": 0.6225, "step": 3038 }, { "epoch": 0.49, "grad_norm": 0.986362827004136, "learning_rate": 5.4105696363622284e-06, "loss": 0.6063, "step": 3039 }, { "epoch": 0.49, "grad_norm": 1.1103650922467134, "learning_rate": 5.4079686640110305e-06, "loss": 0.687, "step": 3040 }, { "epoch": 0.49, "grad_norm": 0.9551687305777193, "learning_rate": 5.405367580517924e-06, "loss": 0.6027, "step": 3041 }, { "epoch": 0.49, "grad_norm": 0.9952775875541707, "learning_rate": 5.402766386591511e-06, "loss": 0.5803, "step": 3042 }, { "epoch": 0.49, "grad_norm": 0.966615474914266, "learning_rate": 5.400165082940436e-06, "loss": 0.6194, "step": 3043 }, { "epoch": 0.49, "grad_norm": 1.0046841789499001, "learning_rate": 5.397563670273359e-06, "loss": 0.6401, "step": 3044 }, { "epoch": 0.49, "grad_norm": 0.9366289362668158, "learning_rate": 5.394962149298981e-06, "loss": 0.5246, "step": 3045 }, { "epoch": 0.49, "grad_norm": 0.9783160899744681, "learning_rate": 5.392360520726027e-06, "loss": 0.6505, "step": 3046 }, { "epoch": 0.49, "grad_norm": 0.9770758549753037, "learning_rate": 5.38975878526325e-06, "loss": 0.6113, "step": 3047 }, { "epoch": 0.49, "grad_norm": 1.0213627205930274, "learning_rate": 5.387156943619437e-06, "loss": 0.5857, "step": 3048 }, { "epoch": 0.49, "grad_norm": 1.0750538410657855, "learning_rate": 5.384554996503401e-06, "loss": 0.4824, "step": 3049 }, { "epoch": 0.49, "grad_norm": 0.9825977248911135, "learning_rate": 5.381952944623982e-06, "loss": 0.6208, "step": 3050 }, { "epoch": 0.49, "grad_norm": 0.9226608540394012, "learning_rate": 5.3793507886900535e-06, "loss": 0.604, "step": 3051 }, { "epoch": 0.49, "grad_norm": 0.9753211072132864, "learning_rate": 5.3767485294105135e-06, "loss": 0.596, "step": 3052 }, { "epoch": 0.49, "grad_norm": 1.0082598425233422, "learning_rate": 5.374146167494286e-06, "loss": 0.5621, "step": 3053 }, { "epoch": 0.49, "grad_norm": 0.976669264124965, "learning_rate": 5.371543703650332e-06, "loss": 0.5898, "step": 3054 }, { "epoch": 0.49, "grad_norm": 0.972239347324036, "learning_rate": 5.368941138587631e-06, "loss": 0.5775, "step": 3055 }, { "epoch": 0.49, "grad_norm": 0.9672805681186081, "learning_rate": 5.366338473015195e-06, "loss": 0.6467, "step": 3056 }, { "epoch": 0.49, "grad_norm": 0.9966797134327225, "learning_rate": 5.3637357076420595e-06, "loss": 0.618, "step": 3057 }, { "epoch": 0.49, "grad_norm": 1.02542895058069, "learning_rate": 5.36113284317729e-06, "loss": 0.5879, "step": 3058 }, { "epoch": 0.49, "grad_norm": 1.0151860729866955, "learning_rate": 5.358529880329982e-06, "loss": 0.5825, "step": 3059 }, { "epoch": 0.49, "grad_norm": 1.0601162942182552, "learning_rate": 5.355926819809248e-06, "loss": 0.5881, "step": 3060 }, { "epoch": 0.49, "grad_norm": 0.9645723235164121, "learning_rate": 5.3533236623242405e-06, "loss": 0.5126, "step": 3061 }, { "epoch": 0.49, "grad_norm": 1.0200621614675258, "learning_rate": 5.3507204085841256e-06, "loss": 0.5828, "step": 3062 }, { "epoch": 0.49, "grad_norm": 0.9255197244764944, "learning_rate": 5.348117059298105e-06, "loss": 0.6006, "step": 3063 }, { "epoch": 0.49, "grad_norm": 1.0290562933389957, "learning_rate": 5.345513615175401e-06, "loss": 0.6803, "step": 3064 }, { "epoch": 0.49, "grad_norm": 0.9543669687354582, "learning_rate": 5.342910076925264e-06, "loss": 0.636, "step": 3065 }, { "epoch": 0.49, "grad_norm": 1.1155655322458797, "learning_rate": 5.3403064452569685e-06, "loss": 0.6432, "step": 3066 }, { "epoch": 0.49, "grad_norm": 1.046370806515194, "learning_rate": 5.337702720879819e-06, "loss": 0.6251, "step": 3067 }, { "epoch": 0.49, "grad_norm": 0.6187576687078945, "learning_rate": 5.335098904503139e-06, "loss": 0.4402, "step": 3068 }, { "epoch": 0.49, "grad_norm": 1.0319952196748736, "learning_rate": 5.332494996836279e-06, "loss": 0.5977, "step": 3069 }, { "epoch": 0.49, "grad_norm": 1.0407564670659808, "learning_rate": 5.3298909985886195e-06, "loss": 0.5804, "step": 3070 }, { "epoch": 0.49, "grad_norm": 1.0196830922707836, "learning_rate": 5.327286910469556e-06, "loss": 0.6094, "step": 3071 }, { "epoch": 0.49, "grad_norm": 1.0014782755851042, "learning_rate": 5.3246827331885165e-06, "loss": 0.603, "step": 3072 }, { "epoch": 0.5, "grad_norm": 1.1232367538615327, "learning_rate": 5.322078467454951e-06, "loss": 0.6418, "step": 3073 }, { "epoch": 0.5, "grad_norm": 1.0375479765497255, "learning_rate": 5.3194741139783324e-06, "loss": 0.6545, "step": 3074 }, { "epoch": 0.5, "grad_norm": 0.9727802927555881, "learning_rate": 5.316869673468159e-06, "loss": 0.5534, "step": 3075 }, { "epoch": 0.5, "grad_norm": 1.0580887244480295, "learning_rate": 5.314265146633954e-06, "loss": 0.5961, "step": 3076 }, { "epoch": 0.5, "grad_norm": 1.0156370684938056, "learning_rate": 5.311660534185258e-06, "loss": 0.5958, "step": 3077 }, { "epoch": 0.5, "grad_norm": 1.0269353595009565, "learning_rate": 5.3090558368316415e-06, "loss": 0.6334, "step": 3078 }, { "epoch": 0.5, "grad_norm": 1.009717712809675, "learning_rate": 5.306451055282696e-06, "loss": 0.6815, "step": 3079 }, { "epoch": 0.5, "grad_norm": 1.0712421098110942, "learning_rate": 5.303846190248035e-06, "loss": 0.6059, "step": 3080 }, { "epoch": 0.5, "grad_norm": 0.9813017799082245, "learning_rate": 5.301241242437299e-06, "loss": 0.6531, "step": 3081 }, { "epoch": 0.5, "grad_norm": 1.0460458862001938, "learning_rate": 5.298636212560143e-06, "loss": 0.7166, "step": 3082 }, { "epoch": 0.5, "grad_norm": 1.1095162091104769, "learning_rate": 5.29603110132625e-06, "loss": 0.6719, "step": 3083 }, { "epoch": 0.5, "grad_norm": 1.0901207350495221, "learning_rate": 5.293425909445326e-06, "loss": 0.6551, "step": 3084 }, { "epoch": 0.5, "grad_norm": 1.0677799275461581, "learning_rate": 5.290820637627095e-06, "loss": 0.6109, "step": 3085 }, { "epoch": 0.5, "grad_norm": 1.1125464838436239, "learning_rate": 5.288215286581306e-06, "loss": 0.6873, "step": 3086 }, { "epoch": 0.5, "grad_norm": 1.006909826725494, "learning_rate": 5.28560985701773e-06, "loss": 0.5868, "step": 3087 }, { "epoch": 0.5, "grad_norm": 0.969084402435138, "learning_rate": 5.283004349646154e-06, "loss": 0.6709, "step": 3088 }, { "epoch": 0.5, "grad_norm": 0.8987830500424991, "learning_rate": 5.280398765176392e-06, "loss": 0.6198, "step": 3089 }, { "epoch": 0.5, "grad_norm": 0.9752011274024958, "learning_rate": 5.277793104318279e-06, "loss": 0.641, "step": 3090 }, { "epoch": 0.5, "grad_norm": 0.957811544843052, "learning_rate": 5.275187367781664e-06, "loss": 0.5438, "step": 3091 }, { "epoch": 0.5, "grad_norm": 0.9993246830984682, "learning_rate": 5.272581556276428e-06, "loss": 0.6294, "step": 3092 }, { "epoch": 0.5, "grad_norm": 0.9265692398970193, "learning_rate": 5.26997567051246e-06, "loss": 0.5151, "step": 3093 }, { "epoch": 0.5, "grad_norm": 1.0231563731950963, "learning_rate": 5.267369711199678e-06, "loss": 0.6316, "step": 3094 }, { "epoch": 0.5, "grad_norm": 0.9871853345824139, "learning_rate": 5.2647636790480175e-06, "loss": 0.5517, "step": 3095 }, { "epoch": 0.5, "grad_norm": 1.0448852004407465, "learning_rate": 5.26215757476743e-06, "loss": 0.6879, "step": 3096 }, { "epoch": 0.5, "grad_norm": 1.1488055347980102, "learning_rate": 5.259551399067895e-06, "loss": 0.6267, "step": 3097 }, { "epoch": 0.5, "grad_norm": 1.0150334138255017, "learning_rate": 5.256945152659404e-06, "loss": 0.58, "step": 3098 }, { "epoch": 0.5, "grad_norm": 0.9554687164052483, "learning_rate": 5.254338836251971e-06, "loss": 0.5876, "step": 3099 }, { "epoch": 0.5, "grad_norm": 0.912083311863043, "learning_rate": 5.251732450555626e-06, "loss": 0.6198, "step": 3100 }, { "epoch": 0.5, "grad_norm": 0.9516639827121232, "learning_rate": 5.249125996280426e-06, "loss": 0.5529, "step": 3101 }, { "epoch": 0.5, "grad_norm": 1.0612828208102298, "learning_rate": 5.246519474136433e-06, "loss": 0.6349, "step": 3102 }, { "epoch": 0.5, "grad_norm": 1.0379666184225487, "learning_rate": 5.243912884833744e-06, "loss": 0.6012, "step": 3103 }, { "epoch": 0.5, "grad_norm": 0.9509187204776852, "learning_rate": 5.241306229082459e-06, "loss": 0.558, "step": 3104 }, { "epoch": 0.5, "grad_norm": 1.0056121610890216, "learning_rate": 5.2386995075927074e-06, "loss": 0.5798, "step": 3105 }, { "epoch": 0.5, "grad_norm": 0.7191349098112629, "learning_rate": 5.236092721074629e-06, "loss": 0.4819, "step": 3106 }, { "epoch": 0.5, "grad_norm": 1.0226666027622446, "learning_rate": 5.233485870238385e-06, "loss": 0.571, "step": 3107 }, { "epoch": 0.5, "grad_norm": 1.1289147239160917, "learning_rate": 5.2308789557941546e-06, "loss": 0.5952, "step": 3108 }, { "epoch": 0.5, "grad_norm": 1.010917397290751, "learning_rate": 5.228271978452133e-06, "loss": 0.6295, "step": 3109 }, { "epoch": 0.5, "grad_norm": 1.0888974285976984, "learning_rate": 5.225664938922531e-06, "loss": 0.5459, "step": 3110 }, { "epoch": 0.5, "grad_norm": 0.9572816224649667, "learning_rate": 5.223057837915579e-06, "loss": 0.5315, "step": 3111 }, { "epoch": 0.5, "grad_norm": 0.9918979931515884, "learning_rate": 5.220450676141525e-06, "loss": 0.5214, "step": 3112 }, { "epoch": 0.5, "grad_norm": 0.9856326420778148, "learning_rate": 5.217843454310628e-06, "loss": 0.6189, "step": 3113 }, { "epoch": 0.5, "grad_norm": 0.9542165125860546, "learning_rate": 5.215236173133172e-06, "loss": 0.5794, "step": 3114 }, { "epoch": 0.5, "grad_norm": 0.9316873099032466, "learning_rate": 5.2126288333194455e-06, "loss": 0.551, "step": 3115 }, { "epoch": 0.5, "grad_norm": 0.8982918487614081, "learning_rate": 5.210021435579766e-06, "loss": 0.5019, "step": 3116 }, { "epoch": 0.5, "grad_norm": 1.0252266053892645, "learning_rate": 5.207413980624459e-06, "loss": 0.6235, "step": 3117 }, { "epoch": 0.5, "grad_norm": 1.0880284171151882, "learning_rate": 5.204806469163865e-06, "loss": 0.6904, "step": 3118 }, { "epoch": 0.5, "grad_norm": 1.0381679697602924, "learning_rate": 5.202198901908343e-06, "loss": 0.6294, "step": 3119 }, { "epoch": 0.5, "grad_norm": 0.9200688863420144, "learning_rate": 5.199591279568268e-06, "loss": 0.5163, "step": 3120 }, { "epoch": 0.5, "grad_norm": 0.9293583855976372, "learning_rate": 5.196983602854024e-06, "loss": 0.5243, "step": 3121 }, { "epoch": 0.5, "grad_norm": 1.0254220433600487, "learning_rate": 5.1943758724760185e-06, "loss": 0.6222, "step": 3122 }, { "epoch": 0.5, "grad_norm": 1.047226987845554, "learning_rate": 5.191768089144668e-06, "loss": 0.5761, "step": 3123 }, { "epoch": 0.5, "grad_norm": 0.9147856117445466, "learning_rate": 5.189160253570402e-06, "loss": 0.4939, "step": 3124 }, { "epoch": 0.5, "grad_norm": 0.7415240357698905, "learning_rate": 5.186552366463671e-06, "loss": 0.4881, "step": 3125 }, { "epoch": 0.5, "grad_norm": 0.6322239423134934, "learning_rate": 5.18394442853493e-06, "loss": 0.4703, "step": 3126 }, { "epoch": 0.5, "grad_norm": 0.9832375534920925, "learning_rate": 5.181336440494659e-06, "loss": 0.5202, "step": 3127 }, { "epoch": 0.5, "grad_norm": 1.0929385540764123, "learning_rate": 5.178728403053342e-06, "loss": 0.7039, "step": 3128 }, { "epoch": 0.5, "grad_norm": 1.0110602711965202, "learning_rate": 5.176120316921481e-06, "loss": 0.6393, "step": 3129 }, { "epoch": 0.5, "grad_norm": 1.0634345474252345, "learning_rate": 5.173512182809592e-06, "loss": 0.6479, "step": 3130 }, { "epoch": 0.5, "grad_norm": 1.0545830695103025, "learning_rate": 5.170904001428201e-06, "loss": 0.6091, "step": 3131 }, { "epoch": 0.5, "grad_norm": 0.9651603731949634, "learning_rate": 5.168295773487848e-06, "loss": 0.551, "step": 3132 }, { "epoch": 0.5, "grad_norm": 0.910173616825386, "learning_rate": 5.165687499699088e-06, "loss": 0.5797, "step": 3133 }, { "epoch": 0.5, "grad_norm": 1.0465306525543627, "learning_rate": 5.163079180772486e-06, "loss": 0.6389, "step": 3134 }, { "epoch": 0.51, "grad_norm": 1.0140264239980943, "learning_rate": 5.160470817418619e-06, "loss": 0.5651, "step": 3135 }, { "epoch": 0.51, "grad_norm": 1.0865930450129375, "learning_rate": 5.15786241034808e-06, "loss": 0.7361, "step": 3136 }, { "epoch": 0.51, "grad_norm": 1.073680740320688, "learning_rate": 5.155253960271466e-06, "loss": 0.6918, "step": 3137 }, { "epoch": 0.51, "grad_norm": 0.8618014706965529, "learning_rate": 5.152645467899397e-06, "loss": 0.5424, "step": 3138 }, { "epoch": 0.51, "grad_norm": 1.1115292451014795, "learning_rate": 5.150036933942492e-06, "loss": 0.7406, "step": 3139 }, { "epoch": 0.51, "grad_norm": 1.0586927228750298, "learning_rate": 5.147428359111391e-06, "loss": 0.6194, "step": 3140 }, { "epoch": 0.51, "grad_norm": 0.9786524039263109, "learning_rate": 5.144819744116742e-06, "loss": 0.5958, "step": 3141 }, { "epoch": 0.51, "grad_norm": 0.9043824250121534, "learning_rate": 5.142211089669202e-06, "loss": 0.5477, "step": 3142 }, { "epoch": 0.51, "grad_norm": 1.0340517383668435, "learning_rate": 5.1396023964794415e-06, "loss": 0.5947, "step": 3143 }, { "epoch": 0.51, "grad_norm": 0.9702461993920194, "learning_rate": 5.1369936652581395e-06, "loss": 0.5259, "step": 3144 }, { "epoch": 0.51, "grad_norm": 0.8553168746533659, "learning_rate": 5.134384896715987e-06, "loss": 0.4923, "step": 3145 }, { "epoch": 0.51, "grad_norm": 0.9816506245386065, "learning_rate": 5.131776091563685e-06, "loss": 0.5675, "step": 3146 }, { "epoch": 0.51, "grad_norm": 0.9669388424297567, "learning_rate": 5.129167250511943e-06, "loss": 0.5951, "step": 3147 }, { "epoch": 0.51, "grad_norm": 0.9871660852011059, "learning_rate": 5.12655837427148e-06, "loss": 0.6006, "step": 3148 }, { "epoch": 0.51, "grad_norm": 0.9990625613082773, "learning_rate": 5.123949463553029e-06, "loss": 0.5699, "step": 3149 }, { "epoch": 0.51, "grad_norm": 1.0329120251833712, "learning_rate": 5.121340519067327e-06, "loss": 0.6798, "step": 3150 }, { "epoch": 0.51, "grad_norm": 1.0425169803595384, "learning_rate": 5.118731541525123e-06, "loss": 0.5663, "step": 3151 }, { "epoch": 0.51, "grad_norm": 1.0871819823524007, "learning_rate": 5.116122531637174e-06, "loss": 0.6908, "step": 3152 }, { "epoch": 0.51, "grad_norm": 1.078285683265701, "learning_rate": 5.113513490114246e-06, "loss": 0.676, "step": 3153 }, { "epoch": 0.51, "grad_norm": 1.0036849719504581, "learning_rate": 5.110904417667115e-06, "loss": 0.6757, "step": 3154 }, { "epoch": 0.51, "grad_norm": 1.0388476133534623, "learning_rate": 5.108295315006563e-06, "loss": 0.6373, "step": 3155 }, { "epoch": 0.51, "grad_norm": 1.0630591065915274, "learning_rate": 5.1056861828433815e-06, "loss": 0.5922, "step": 3156 }, { "epoch": 0.51, "grad_norm": 0.9346870041670945, "learning_rate": 5.1030770218883706e-06, "loss": 0.6295, "step": 3157 }, { "epoch": 0.51, "grad_norm": 1.0441141778922003, "learning_rate": 5.1004678328523395e-06, "loss": 0.6233, "step": 3158 }, { "epoch": 0.51, "grad_norm": 1.034541901945305, "learning_rate": 5.097858616446099e-06, "loss": 0.6038, "step": 3159 }, { "epoch": 0.51, "grad_norm": 1.0204589788283998, "learning_rate": 5.0952493733804755e-06, "loss": 0.615, "step": 3160 }, { "epoch": 0.51, "grad_norm": 0.931207029440427, "learning_rate": 5.092640104366297e-06, "loss": 0.5718, "step": 3161 }, { "epoch": 0.51, "grad_norm": 0.9649478944400361, "learning_rate": 5.0900308101143996e-06, "loss": 0.5562, "step": 3162 }, { "epoch": 0.51, "grad_norm": 1.002578387231302, "learning_rate": 5.087421491335629e-06, "loss": 0.6287, "step": 3163 }, { "epoch": 0.51, "grad_norm": 1.0391535216531402, "learning_rate": 5.0848121487408316e-06, "loss": 0.6283, "step": 3164 }, { "epoch": 0.51, "grad_norm": 1.0140145805553982, "learning_rate": 5.08220278304087e-06, "loss": 0.584, "step": 3165 }, { "epoch": 0.51, "grad_norm": 1.015182019752588, "learning_rate": 5.079593394946603e-06, "loss": 0.5474, "step": 3166 }, { "epoch": 0.51, "grad_norm": 0.982440242242882, "learning_rate": 5.076983985168901e-06, "loss": 0.5816, "step": 3167 }, { "epoch": 0.51, "grad_norm": 1.1400493975062624, "learning_rate": 5.074374554418641e-06, "loss": 0.646, "step": 3168 }, { "epoch": 0.51, "grad_norm": 0.9698206550883761, "learning_rate": 5.071765103406702e-06, "loss": 0.5911, "step": 3169 }, { "epoch": 0.51, "grad_norm": 0.9663288582076922, "learning_rate": 5.06915563284397e-06, "loss": 0.6061, "step": 3170 }, { "epoch": 0.51, "grad_norm": 1.073261526336657, "learning_rate": 5.066546143441336e-06, "loss": 0.6141, "step": 3171 }, { "epoch": 0.51, "grad_norm": 1.0030036543102403, "learning_rate": 5.0639366359097e-06, "loss": 0.6036, "step": 3172 }, { "epoch": 0.51, "grad_norm": 1.0723402760222287, "learning_rate": 5.061327110959961e-06, "loss": 0.5882, "step": 3173 }, { "epoch": 0.51, "grad_norm": 0.9133739858984177, "learning_rate": 5.058717569303027e-06, "loss": 0.5806, "step": 3174 }, { "epoch": 0.51, "grad_norm": 1.0029731177779446, "learning_rate": 5.056108011649807e-06, "loss": 0.6411, "step": 3175 }, { "epoch": 0.51, "grad_norm": 1.0338771199589651, "learning_rate": 5.053498438711221e-06, "loss": 0.6601, "step": 3176 }, { "epoch": 0.51, "grad_norm": 0.9925496123672746, "learning_rate": 5.050888851198183e-06, "loss": 0.5516, "step": 3177 }, { "epoch": 0.51, "grad_norm": 1.022091983083876, "learning_rate": 5.0482792498216194e-06, "loss": 0.6339, "step": 3178 }, { "epoch": 0.51, "grad_norm": 0.9656249352266356, "learning_rate": 5.045669635292458e-06, "loss": 0.6412, "step": 3179 }, { "epoch": 0.51, "grad_norm": 1.0099292343527497, "learning_rate": 5.04306000832163e-06, "loss": 0.5973, "step": 3180 }, { "epoch": 0.51, "grad_norm": 0.9672537550692064, "learning_rate": 5.040450369620068e-06, "loss": 0.5604, "step": 3181 }, { "epoch": 0.51, "grad_norm": 1.038917159440407, "learning_rate": 5.03784071989871e-06, "loss": 0.5997, "step": 3182 }, { "epoch": 0.51, "grad_norm": 0.9619137737291765, "learning_rate": 5.0352310598684965e-06, "loss": 0.5323, "step": 3183 }, { "epoch": 0.51, "grad_norm": 1.0424061215150913, "learning_rate": 5.032621390240371e-06, "loss": 0.6757, "step": 3184 }, { "epoch": 0.51, "grad_norm": 0.9815577959975248, "learning_rate": 5.030011711725281e-06, "loss": 0.5911, "step": 3185 }, { "epoch": 0.51, "grad_norm": 1.0146655243825957, "learning_rate": 5.027402025034171e-06, "loss": 0.6315, "step": 3186 }, { "epoch": 0.51, "grad_norm": 1.1577283696456933, "learning_rate": 5.024792330877997e-06, "loss": 0.6354, "step": 3187 }, { "epoch": 0.51, "grad_norm": 0.8999827968225897, "learning_rate": 5.022182629967707e-06, "loss": 0.5922, "step": 3188 }, { "epoch": 0.51, "grad_norm": 0.9843898738010718, "learning_rate": 5.019572923014258e-06, "loss": 0.6193, "step": 3189 }, { "epoch": 0.51, "grad_norm": 0.9105745404761089, "learning_rate": 5.016963210728604e-06, "loss": 0.5151, "step": 3190 }, { "epoch": 0.51, "grad_norm": 0.937101883668275, "learning_rate": 5.014353493821705e-06, "loss": 0.4985, "step": 3191 }, { "epoch": 0.51, "grad_norm": 1.0613820986548346, "learning_rate": 5.011743773004518e-06, "loss": 0.573, "step": 3192 }, { "epoch": 0.51, "grad_norm": 0.9101921625096693, "learning_rate": 5.009134048988004e-06, "loss": 0.5697, "step": 3193 }, { "epoch": 0.51, "grad_norm": 0.9344871409151593, "learning_rate": 5.006524322483123e-06, "loss": 0.5767, "step": 3194 }, { "epoch": 0.51, "grad_norm": 0.9516814451468132, "learning_rate": 5.0039145942008364e-06, "loss": 0.5703, "step": 3195 }, { "epoch": 0.51, "grad_norm": 1.0388140777675396, "learning_rate": 5.001304864852106e-06, "loss": 0.665, "step": 3196 }, { "epoch": 0.52, "grad_norm": 0.9952440897527149, "learning_rate": 4.998695135147896e-06, "loss": 0.6188, "step": 3197 }, { "epoch": 0.52, "grad_norm": 0.9119715043850809, "learning_rate": 4.996085405799166e-06, "loss": 0.5529, "step": 3198 }, { "epoch": 0.52, "grad_norm": 1.050781916803946, "learning_rate": 4.993475677516878e-06, "loss": 0.62, "step": 3199 }, { "epoch": 0.52, "grad_norm": 1.0843701732318667, "learning_rate": 4.990865951011999e-06, "loss": 0.5864, "step": 3200 }, { "epoch": 0.52, "grad_norm": 0.9863101523621242, "learning_rate": 4.9882562269954835e-06, "loss": 0.5736, "step": 3201 }, { "epoch": 0.52, "grad_norm": 0.9186119294044961, "learning_rate": 4.985646506178296e-06, "loss": 0.5541, "step": 3202 }, { "epoch": 0.52, "grad_norm": 0.9620687350222229, "learning_rate": 4.983036789271398e-06, "loss": 0.5768, "step": 3203 }, { "epoch": 0.52, "grad_norm": 1.2517755544620321, "learning_rate": 4.980427076985744e-06, "loss": 0.6364, "step": 3204 }, { "epoch": 0.52, "grad_norm": 0.9517456926756165, "learning_rate": 4.977817370032294e-06, "loss": 0.5365, "step": 3205 }, { "epoch": 0.52, "grad_norm": 1.1757747256515527, "learning_rate": 4.975207669122005e-06, "loss": 0.6424, "step": 3206 }, { "epoch": 0.52, "grad_norm": 0.9722261868761642, "learning_rate": 4.97259797496583e-06, "loss": 0.5859, "step": 3207 }, { "epoch": 0.52, "grad_norm": 0.9288341838015219, "learning_rate": 4.9699882882747205e-06, "loss": 0.6219, "step": 3208 }, { "epoch": 0.52, "grad_norm": 0.993202144225267, "learning_rate": 4.967378609759629e-06, "loss": 0.6632, "step": 3209 }, { "epoch": 0.52, "grad_norm": 1.0292518335109053, "learning_rate": 4.964768940131505e-06, "loss": 0.6361, "step": 3210 }, { "epoch": 0.52, "grad_norm": 0.7057615342850861, "learning_rate": 4.962159280101292e-06, "loss": 0.4751, "step": 3211 }, { "epoch": 0.52, "grad_norm": 1.0525475881626118, "learning_rate": 4.959549630379934e-06, "loss": 0.5953, "step": 3212 }, { "epoch": 0.52, "grad_norm": 1.018774125172252, "learning_rate": 4.9569399916783724e-06, "loss": 0.5984, "step": 3213 }, { "epoch": 0.52, "grad_norm": 0.6713126892689764, "learning_rate": 4.954330364707543e-06, "loss": 0.4722, "step": 3214 }, { "epoch": 0.52, "grad_norm": 1.0152952164386781, "learning_rate": 4.9517207501783805e-06, "loss": 0.6113, "step": 3215 }, { "epoch": 0.52, "grad_norm": 1.0100901898664556, "learning_rate": 4.949111148801819e-06, "loss": 0.6073, "step": 3216 }, { "epoch": 0.52, "grad_norm": 1.1189124080852684, "learning_rate": 4.9465015612887815e-06, "loss": 0.6122, "step": 3217 }, { "epoch": 0.52, "grad_norm": 1.0416949190818834, "learning_rate": 4.9438919883501934e-06, "loss": 0.6916, "step": 3218 }, { "epoch": 0.52, "grad_norm": 1.0052468492721593, "learning_rate": 4.941282430696976e-06, "loss": 0.6138, "step": 3219 }, { "epoch": 0.52, "grad_norm": 1.0211165296109515, "learning_rate": 4.938672889040041e-06, "loss": 0.5694, "step": 3220 }, { "epoch": 0.52, "grad_norm": 1.0033456810706858, "learning_rate": 4.936063364090302e-06, "loss": 0.5724, "step": 3221 }, { "epoch": 0.52, "grad_norm": 1.0531567301778528, "learning_rate": 4.933453856558666e-06, "loss": 0.6856, "step": 3222 }, { "epoch": 0.52, "grad_norm": 1.0257883738659475, "learning_rate": 4.930844367156032e-06, "loss": 0.6476, "step": 3223 }, { "epoch": 0.52, "grad_norm": 0.6845580322843383, "learning_rate": 4.9282348965933e-06, "loss": 0.4974, "step": 3224 }, { "epoch": 0.52, "grad_norm": 0.7260622160382391, "learning_rate": 4.925625445581361e-06, "loss": 0.4977, "step": 3225 }, { "epoch": 0.52, "grad_norm": 1.1068457428246035, "learning_rate": 4.923016014831099e-06, "loss": 0.5365, "step": 3226 }, { "epoch": 0.52, "grad_norm": 0.9134538705991346, "learning_rate": 4.9204066050533975e-06, "loss": 0.5367, "step": 3227 }, { "epoch": 0.52, "grad_norm": 0.9905446525890542, "learning_rate": 4.91779721695913e-06, "loss": 0.5526, "step": 3228 }, { "epoch": 0.52, "grad_norm": 1.0281560905594054, "learning_rate": 4.915187851259169e-06, "loss": 0.4344, "step": 3229 }, { "epoch": 0.52, "grad_norm": 0.9233087237956519, "learning_rate": 4.9125785086643726e-06, "loss": 0.5204, "step": 3230 }, { "epoch": 0.52, "grad_norm": 1.1068483085479515, "learning_rate": 4.9099691898856e-06, "loss": 0.659, "step": 3231 }, { "epoch": 0.52, "grad_norm": 0.6528755477179455, "learning_rate": 4.907359895633705e-06, "loss": 0.4504, "step": 3232 }, { "epoch": 0.52, "grad_norm": 1.0446952793453832, "learning_rate": 4.904750626619525e-06, "loss": 0.6392, "step": 3233 }, { "epoch": 0.52, "grad_norm": 1.065630310872365, "learning_rate": 4.902141383553901e-06, "loss": 0.6076, "step": 3234 }, { "epoch": 0.52, "grad_norm": 0.9767672369534619, "learning_rate": 4.899532167147662e-06, "loss": 0.5521, "step": 3235 }, { "epoch": 0.52, "grad_norm": 0.948386066730927, "learning_rate": 4.89692297811163e-06, "loss": 0.5934, "step": 3236 }, { "epoch": 0.52, "grad_norm": 1.0257924378445473, "learning_rate": 4.8943138171566184e-06, "loss": 0.5762, "step": 3237 }, { "epoch": 0.52, "grad_norm": 1.0828713787209612, "learning_rate": 4.89170468499344e-06, "loss": 0.5946, "step": 3238 }, { "epoch": 0.52, "grad_norm": 1.0160062577190154, "learning_rate": 4.889095582332887e-06, "loss": 0.6492, "step": 3239 }, { "epoch": 0.52, "grad_norm": 0.9624144541492015, "learning_rate": 4.886486509885755e-06, "loss": 0.5235, "step": 3240 }, { "epoch": 0.52, "grad_norm": 1.0337714842579497, "learning_rate": 4.883877468362828e-06, "loss": 0.6651, "step": 3241 }, { "epoch": 0.52, "grad_norm": 0.8944878521173915, "learning_rate": 4.88126845847488e-06, "loss": 0.5663, "step": 3242 }, { "epoch": 0.52, "grad_norm": 0.8898746825233013, "learning_rate": 4.878659480932674e-06, "loss": 0.5219, "step": 3243 }, { "epoch": 0.52, "grad_norm": 1.0036830134717925, "learning_rate": 4.876050536446973e-06, "loss": 0.5806, "step": 3244 }, { "epoch": 0.52, "grad_norm": 1.0040041799254726, "learning_rate": 4.8734416257285215e-06, "loss": 0.6191, "step": 3245 }, { "epoch": 0.52, "grad_norm": 0.941530140974121, "learning_rate": 4.870832749488058e-06, "loss": 0.5844, "step": 3246 }, { "epoch": 0.52, "grad_norm": 1.0787821864249088, "learning_rate": 4.868223908436316e-06, "loss": 0.6222, "step": 3247 }, { "epoch": 0.52, "grad_norm": 1.0279489181115704, "learning_rate": 4.865615103284014e-06, "loss": 0.6378, "step": 3248 }, { "epoch": 0.52, "grad_norm": 0.9474272284976109, "learning_rate": 4.863006334741861e-06, "loss": 0.5683, "step": 3249 }, { "epoch": 0.52, "grad_norm": 1.0845749118054895, "learning_rate": 4.860397603520559e-06, "loss": 0.7109, "step": 3250 }, { "epoch": 0.52, "grad_norm": 1.0129500644018108, "learning_rate": 4.8577889103308e-06, "loss": 0.5639, "step": 3251 }, { "epoch": 0.52, "grad_norm": 0.8838735599091417, "learning_rate": 4.855180255883259e-06, "loss": 0.5763, "step": 3252 }, { "epoch": 0.52, "grad_norm": 0.9813482493430584, "learning_rate": 4.852571640888609e-06, "loss": 0.5639, "step": 3253 }, { "epoch": 0.52, "grad_norm": 1.0430583684115946, "learning_rate": 4.84996306605751e-06, "loss": 0.6719, "step": 3254 }, { "epoch": 0.52, "grad_norm": 1.0370844068598817, "learning_rate": 4.847354532100606e-06, "loss": 0.668, "step": 3255 }, { "epoch": 0.52, "grad_norm": 0.8569399735079533, "learning_rate": 4.844746039728535e-06, "loss": 0.5862, "step": 3256 }, { "epoch": 0.52, "grad_norm": 0.9243387475529347, "learning_rate": 4.842137589651923e-06, "loss": 0.5553, "step": 3257 }, { "epoch": 0.52, "grad_norm": 0.9799758505005088, "learning_rate": 4.8395291825813824e-06, "loss": 0.5284, "step": 3258 }, { "epoch": 0.53, "grad_norm": 1.0533121346361678, "learning_rate": 4.836920819227514e-06, "loss": 0.6536, "step": 3259 }, { "epoch": 0.53, "grad_norm": 0.9686528667314844, "learning_rate": 4.834312500300915e-06, "loss": 0.5889, "step": 3260 }, { "epoch": 0.53, "grad_norm": 0.9546470682833343, "learning_rate": 4.831704226512153e-06, "loss": 0.589, "step": 3261 }, { "epoch": 0.53, "grad_norm": 1.0321028834380768, "learning_rate": 4.829095998571801e-06, "loss": 0.6534, "step": 3262 }, { "epoch": 0.53, "grad_norm": 1.035261629521912, "learning_rate": 4.826487817190411e-06, "loss": 0.6646, "step": 3263 }, { "epoch": 0.53, "grad_norm": 0.9813499141933322, "learning_rate": 4.8238796830785206e-06, "loss": 0.5839, "step": 3264 }, { "epoch": 0.53, "grad_norm": 1.0050005536683733, "learning_rate": 4.821271596946659e-06, "loss": 0.5929, "step": 3265 }, { "epoch": 0.53, "grad_norm": 0.9756024965647901, "learning_rate": 4.818663559505341e-06, "loss": 0.571, "step": 3266 }, { "epoch": 0.53, "grad_norm": 0.9410676858082435, "learning_rate": 4.816055571465071e-06, "loss": 0.5833, "step": 3267 }, { "epoch": 0.53, "grad_norm": 1.092260096522639, "learning_rate": 4.813447633536331e-06, "loss": 0.5887, "step": 3268 }, { "epoch": 0.53, "grad_norm": 0.6489076504273431, "learning_rate": 4.810839746429598e-06, "loss": 0.4856, "step": 3269 }, { "epoch": 0.53, "grad_norm": 0.986971256592421, "learning_rate": 4.808231910855335e-06, "loss": 0.5939, "step": 3270 }, { "epoch": 0.53, "grad_norm": 1.0511774516352927, "learning_rate": 4.805624127523982e-06, "loss": 0.6158, "step": 3271 }, { "epoch": 0.53, "grad_norm": 1.0591539859023469, "learning_rate": 4.8030163971459765e-06, "loss": 0.5564, "step": 3272 }, { "epoch": 0.53, "grad_norm": 0.9158695754390288, "learning_rate": 4.800408720431735e-06, "loss": 0.4901, "step": 3273 }, { "epoch": 0.53, "grad_norm": 0.9316861937665096, "learning_rate": 4.797801098091659e-06, "loss": 0.6633, "step": 3274 }, { "epoch": 0.53, "grad_norm": 0.9837307286668362, "learning_rate": 4.795193530836136e-06, "loss": 0.6788, "step": 3275 }, { "epoch": 0.53, "grad_norm": 0.8716925796675218, "learning_rate": 4.792586019375543e-06, "loss": 0.5431, "step": 3276 }, { "epoch": 0.53, "grad_norm": 0.98272962390744, "learning_rate": 4.789978564420235e-06, "loss": 0.5959, "step": 3277 }, { "epoch": 0.53, "grad_norm": 1.096442303039265, "learning_rate": 4.787371166680555e-06, "loss": 0.5049, "step": 3278 }, { "epoch": 0.53, "grad_norm": 0.9383788517359899, "learning_rate": 4.784763826866831e-06, "loss": 0.5299, "step": 3279 }, { "epoch": 0.53, "grad_norm": 0.9420626470428047, "learning_rate": 4.782156545689373e-06, "loss": 0.5454, "step": 3280 }, { "epoch": 0.53, "grad_norm": 0.9999786944956942, "learning_rate": 4.779549323858476e-06, "loss": 0.6702, "step": 3281 }, { "epoch": 0.53, "grad_norm": 0.9382697957196202, "learning_rate": 4.776942162084423e-06, "loss": 0.5119, "step": 3282 }, { "epoch": 0.53, "grad_norm": 1.0501426760544719, "learning_rate": 4.77433506107747e-06, "loss": 0.6212, "step": 3283 }, { "epoch": 0.53, "grad_norm": 1.0119859481314675, "learning_rate": 4.771728021547868e-06, "loss": 0.534, "step": 3284 }, { "epoch": 0.53, "grad_norm": 0.9955955700051046, "learning_rate": 4.769121044205847e-06, "loss": 0.5964, "step": 3285 }, { "epoch": 0.53, "grad_norm": 1.0270516391757718, "learning_rate": 4.766514129761616e-06, "loss": 0.5367, "step": 3286 }, { "epoch": 0.53, "grad_norm": 0.9794585512204484, "learning_rate": 4.763907278925372e-06, "loss": 0.6205, "step": 3287 }, { "epoch": 0.53, "grad_norm": 0.9887812462142733, "learning_rate": 4.761300492407293e-06, "loss": 0.6344, "step": 3288 }, { "epoch": 0.53, "grad_norm": 0.9956573898457196, "learning_rate": 4.758693770917543e-06, "loss": 0.6225, "step": 3289 }, { "epoch": 0.53, "grad_norm": 1.0156228494740815, "learning_rate": 4.756087115166257e-06, "loss": 0.5827, "step": 3290 }, { "epoch": 0.53, "grad_norm": 0.9688455199353587, "learning_rate": 4.753480525863566e-06, "loss": 0.5658, "step": 3291 }, { "epoch": 0.53, "grad_norm": 1.0028057696677601, "learning_rate": 4.750874003719577e-06, "loss": 0.6364, "step": 3292 }, { "epoch": 0.53, "grad_norm": 1.0241697916084316, "learning_rate": 4.7482675494443745e-06, "loss": 0.6023, "step": 3293 }, { "epoch": 0.53, "grad_norm": 1.0284079232661567, "learning_rate": 4.745661163748031e-06, "loss": 0.6009, "step": 3294 }, { "epoch": 0.53, "grad_norm": 0.97762384613863, "learning_rate": 4.743054847340598e-06, "loss": 0.5339, "step": 3295 }, { "epoch": 0.53, "grad_norm": 0.8942975996140001, "learning_rate": 4.740448600932107e-06, "loss": 0.5167, "step": 3296 }, { "epoch": 0.53, "grad_norm": 1.02946323026704, "learning_rate": 4.73784242523257e-06, "loss": 0.6356, "step": 3297 }, { "epoch": 0.53, "grad_norm": 0.9673391064227175, "learning_rate": 4.735236320951986e-06, "loss": 0.6504, "step": 3298 }, { "epoch": 0.53, "grad_norm": 1.0053082070466397, "learning_rate": 4.7326302888003224e-06, "loss": 0.6361, "step": 3299 }, { "epoch": 0.53, "grad_norm": 1.01238243880198, "learning_rate": 4.730024329487541e-06, "loss": 0.6365, "step": 3300 }, { "epoch": 0.53, "grad_norm": 1.079870481415195, "learning_rate": 4.7274184437235744e-06, "loss": 0.6607, "step": 3301 }, { "epoch": 0.53, "grad_norm": 1.0158539646895635, "learning_rate": 4.7248126322183364e-06, "loss": 0.5392, "step": 3302 }, { "epoch": 0.53, "grad_norm": 1.0954856586988109, "learning_rate": 4.722206895681723e-06, "loss": 0.5435, "step": 3303 }, { "epoch": 0.53, "grad_norm": 0.9715745977540837, "learning_rate": 4.71960123482361e-06, "loss": 0.6219, "step": 3304 }, { "epoch": 0.53, "grad_norm": 1.0108381823401609, "learning_rate": 4.716995650353847e-06, "loss": 0.6217, "step": 3305 }, { "epoch": 0.53, "grad_norm": 1.0106127951154473, "learning_rate": 4.714390142982272e-06, "loss": 0.6135, "step": 3306 }, { "epoch": 0.53, "grad_norm": 1.0648348077552454, "learning_rate": 4.711784713418694e-06, "loss": 0.7526, "step": 3307 }, { "epoch": 0.53, "grad_norm": 0.9697685467554417, "learning_rate": 4.709179362372906e-06, "loss": 0.5674, "step": 3308 }, { "epoch": 0.53, "grad_norm": 1.030153011804173, "learning_rate": 4.706574090554676e-06, "loss": 0.648, "step": 3309 }, { "epoch": 0.53, "grad_norm": 0.9772541393119392, "learning_rate": 4.70396889867375e-06, "loss": 0.5966, "step": 3310 }, { "epoch": 0.53, "grad_norm": 0.9598646896327911, "learning_rate": 4.70136378743986e-06, "loss": 0.6555, "step": 3311 }, { "epoch": 0.53, "grad_norm": 1.0508878543051865, "learning_rate": 4.698758757562703e-06, "loss": 0.5364, "step": 3312 }, { "epoch": 0.53, "grad_norm": 0.8655864938193625, "learning_rate": 4.696153809751964e-06, "loss": 0.5215, "step": 3313 }, { "epoch": 0.53, "grad_norm": 0.7203948335746837, "learning_rate": 4.693548944717305e-06, "loss": 0.4929, "step": 3314 }, { "epoch": 0.53, "grad_norm": 1.0472455215044212, "learning_rate": 4.69094416316836e-06, "loss": 0.6037, "step": 3315 }, { "epoch": 0.53, "grad_norm": 1.057798255155583, "learning_rate": 4.688339465814744e-06, "loss": 0.6236, "step": 3316 }, { "epoch": 0.53, "grad_norm": 1.0698489687274464, "learning_rate": 4.685734853366049e-06, "loss": 0.6243, "step": 3317 }, { "epoch": 0.53, "grad_norm": 1.0206028978032256, "learning_rate": 4.683130326531842e-06, "loss": 0.6048, "step": 3318 }, { "epoch": 0.53, "grad_norm": 1.0458362084130033, "learning_rate": 4.6805258860216675e-06, "loss": 0.6091, "step": 3319 }, { "epoch": 0.53, "grad_norm": 0.9851619905752268, "learning_rate": 4.677921532545052e-06, "loss": 0.6214, "step": 3320 }, { "epoch": 0.54, "grad_norm": 1.0055450197061575, "learning_rate": 4.675317266811485e-06, "loss": 0.6123, "step": 3321 }, { "epoch": 0.54, "grad_norm": 1.0690971487930612, "learning_rate": 4.672713089530445e-06, "loss": 0.5572, "step": 3322 }, { "epoch": 0.54, "grad_norm": 1.06309775329184, "learning_rate": 4.670109001411383e-06, "loss": 0.6638, "step": 3323 }, { "epoch": 0.54, "grad_norm": 0.9820673733588248, "learning_rate": 4.6675050031637216e-06, "loss": 0.635, "step": 3324 }, { "epoch": 0.54, "grad_norm": 0.9899523766928819, "learning_rate": 4.664901095496863e-06, "loss": 0.608, "step": 3325 }, { "epoch": 0.54, "grad_norm": 1.0711515637688442, "learning_rate": 4.662297279120181e-06, "loss": 0.6321, "step": 3326 }, { "epoch": 0.54, "grad_norm": 1.0245246481248924, "learning_rate": 4.659693554743032e-06, "loss": 0.6343, "step": 3327 }, { "epoch": 0.54, "grad_norm": 1.0193057985995089, "learning_rate": 4.657089923074737e-06, "loss": 0.5832, "step": 3328 }, { "epoch": 0.54, "grad_norm": 0.849949264677271, "learning_rate": 4.6544863848246e-06, "loss": 0.4311, "step": 3329 }, { "epoch": 0.54, "grad_norm": 1.0023897712017473, "learning_rate": 4.651882940701897e-06, "loss": 0.5576, "step": 3330 }, { "epoch": 0.54, "grad_norm": 1.0759814866015502, "learning_rate": 4.649279591415876e-06, "loss": 0.6501, "step": 3331 }, { "epoch": 0.54, "grad_norm": 1.0042620186316635, "learning_rate": 4.646676337675762e-06, "loss": 0.677, "step": 3332 }, { "epoch": 0.54, "grad_norm": 0.728400960748559, "learning_rate": 4.644073180190753e-06, "loss": 0.4642, "step": 3333 }, { "epoch": 0.54, "grad_norm": 0.9771090554812161, "learning_rate": 4.64147011967002e-06, "loss": 0.532, "step": 3334 }, { "epoch": 0.54, "grad_norm": 0.9699961074665734, "learning_rate": 4.638867156822711e-06, "loss": 0.5055, "step": 3335 }, { "epoch": 0.54, "grad_norm": 1.0131552898961167, "learning_rate": 4.636264292357943e-06, "loss": 0.5773, "step": 3336 }, { "epoch": 0.54, "grad_norm": 0.992264366617067, "learning_rate": 4.633661526984808e-06, "loss": 0.6353, "step": 3337 }, { "epoch": 0.54, "grad_norm": 0.9507375563520974, "learning_rate": 4.63105886141237e-06, "loss": 0.5232, "step": 3338 }, { "epoch": 0.54, "grad_norm": 0.9884793775634488, "learning_rate": 4.628456296349669e-06, "loss": 0.6421, "step": 3339 }, { "epoch": 0.54, "grad_norm": 0.9765297203754258, "learning_rate": 4.6258538325057145e-06, "loss": 0.6018, "step": 3340 }, { "epoch": 0.54, "grad_norm": 0.917021338555848, "learning_rate": 4.623251470589488e-06, "loss": 0.6092, "step": 3341 }, { "epoch": 0.54, "grad_norm": 1.1175379450255176, "learning_rate": 4.62064921130995e-06, "loss": 0.66, "step": 3342 }, { "epoch": 0.54, "grad_norm": 1.0230437865126476, "learning_rate": 4.618047055376019e-06, "loss": 0.5707, "step": 3343 }, { "epoch": 0.54, "grad_norm": 0.8731650199015155, "learning_rate": 4.615445003496602e-06, "loss": 0.504, "step": 3344 }, { "epoch": 0.54, "grad_norm": 1.0984480208303067, "learning_rate": 4.612843056380564e-06, "loss": 0.5766, "step": 3345 }, { "epoch": 0.54, "grad_norm": 1.0019576403667154, "learning_rate": 4.610241214736751e-06, "loss": 0.4803, "step": 3346 }, { "epoch": 0.54, "grad_norm": 1.0358686086219877, "learning_rate": 4.607639479273976e-06, "loss": 0.5832, "step": 3347 }, { "epoch": 0.54, "grad_norm": 0.9844074066270181, "learning_rate": 4.605037850701019e-06, "loss": 0.6311, "step": 3348 }, { "epoch": 0.54, "grad_norm": 1.0050693618593929, "learning_rate": 4.602436329726643e-06, "loss": 0.6498, "step": 3349 }, { "epoch": 0.54, "grad_norm": 0.9800617155975043, "learning_rate": 4.599834917059565e-06, "loss": 0.6901, "step": 3350 }, { "epoch": 0.54, "grad_norm": 0.9202540112595412, "learning_rate": 4.597233613408488e-06, "loss": 0.5405, "step": 3351 }, { "epoch": 0.54, "grad_norm": 1.0028899741437136, "learning_rate": 4.5946324194820795e-06, "loss": 0.6265, "step": 3352 }, { "epoch": 0.54, "grad_norm": 0.9885265067391772, "learning_rate": 4.592031335988971e-06, "loss": 0.6625, "step": 3353 }, { "epoch": 0.54, "grad_norm": 0.9184750724152221, "learning_rate": 4.589430363637773e-06, "loss": 0.5673, "step": 3354 }, { "epoch": 0.54, "grad_norm": 0.8810295814819818, "learning_rate": 4.5868295031370625e-06, "loss": 0.5849, "step": 3355 }, { "epoch": 0.54, "grad_norm": 0.9551424365884923, "learning_rate": 4.584228755195382e-06, "loss": 0.5919, "step": 3356 }, { "epoch": 0.54, "grad_norm": 0.939564277898375, "learning_rate": 4.581628120521251e-06, "loss": 0.5476, "step": 3357 }, { "epoch": 0.54, "grad_norm": 1.0297112285903718, "learning_rate": 4.579027599823153e-06, "loss": 0.5699, "step": 3358 }, { "epoch": 0.54, "grad_norm": 0.9664894153938252, "learning_rate": 4.5764271938095405e-06, "loss": 0.5522, "step": 3359 }, { "epoch": 0.54, "grad_norm": 1.0272219839356316, "learning_rate": 4.573826903188836e-06, "loss": 0.6462, "step": 3360 }, { "epoch": 0.54, "grad_norm": 0.6298293614313001, "learning_rate": 4.571226728669432e-06, "loss": 0.4497, "step": 3361 }, { "epoch": 0.54, "grad_norm": 0.9555385825041479, "learning_rate": 4.568626670959684e-06, "loss": 0.5651, "step": 3362 }, { "epoch": 0.54, "grad_norm": 1.0653822428767306, "learning_rate": 4.566026730767922e-06, "loss": 0.6795, "step": 3363 }, { "epoch": 0.54, "grad_norm": 1.0785622232610894, "learning_rate": 4.5634269088024434e-06, "loss": 0.6548, "step": 3364 }, { "epoch": 0.54, "grad_norm": 1.0189051865487468, "learning_rate": 4.560827205771506e-06, "loss": 0.5992, "step": 3365 }, { "epoch": 0.54, "grad_norm": 0.9748788212494538, "learning_rate": 4.558227622383345e-06, "loss": 0.5657, "step": 3366 }, { "epoch": 0.54, "grad_norm": 0.9933371883944114, "learning_rate": 4.555628159346158e-06, "loss": 0.6283, "step": 3367 }, { "epoch": 0.54, "grad_norm": 1.0467909845670023, "learning_rate": 4.55302881736811e-06, "loss": 0.6138, "step": 3368 }, { "epoch": 0.54, "grad_norm": 1.0160078135696642, "learning_rate": 4.550429597157331e-06, "loss": 0.5951, "step": 3369 }, { "epoch": 0.54, "grad_norm": 1.0118974586137033, "learning_rate": 4.547830499421921e-06, "loss": 0.5559, "step": 3370 }, { "epoch": 0.54, "grad_norm": 0.9811066188380705, "learning_rate": 4.545231524869952e-06, "loss": 0.5351, "step": 3371 }, { "epoch": 0.54, "grad_norm": 0.9789421129622694, "learning_rate": 4.542632674209447e-06, "loss": 0.5498, "step": 3372 }, { "epoch": 0.54, "grad_norm": 0.9327448785732281, "learning_rate": 4.54003394814841e-06, "loss": 0.4642, "step": 3373 }, { "epoch": 0.54, "grad_norm": 1.0037809812317164, "learning_rate": 4.537435347394807e-06, "loss": 0.5621, "step": 3374 }, { "epoch": 0.54, "grad_norm": 0.9440147702420087, "learning_rate": 4.534836872656563e-06, "loss": 0.5742, "step": 3375 }, { "epoch": 0.54, "grad_norm": 1.0015228739286448, "learning_rate": 4.532238524641578e-06, "loss": 0.5516, "step": 3376 }, { "epoch": 0.54, "grad_norm": 0.94497275046947, "learning_rate": 4.529640304057714e-06, "loss": 0.5333, "step": 3377 }, { "epoch": 0.54, "grad_norm": 1.0944652085870414, "learning_rate": 4.527042211612796e-06, "loss": 0.6344, "step": 3378 }, { "epoch": 0.54, "grad_norm": 0.9708824750761053, "learning_rate": 4.524444248014615e-06, "loss": 0.6294, "step": 3379 }, { "epoch": 0.54, "grad_norm": 1.1014475061735598, "learning_rate": 4.521846413970932e-06, "loss": 0.6365, "step": 3380 }, { "epoch": 0.54, "grad_norm": 1.0909982689524242, "learning_rate": 4.519248710189465e-06, "loss": 0.6249, "step": 3381 }, { "epoch": 0.54, "grad_norm": 0.6756509597258938, "learning_rate": 4.516651137377902e-06, "loss": 0.523, "step": 3382 }, { "epoch": 0.55, "grad_norm": 1.0012806898756457, "learning_rate": 4.514053696243893e-06, "loss": 0.6284, "step": 3383 }, { "epoch": 0.55, "grad_norm": 1.054176104004274, "learning_rate": 4.511456387495052e-06, "loss": 0.6339, "step": 3384 }, { "epoch": 0.55, "grad_norm": 0.9832878833525789, "learning_rate": 4.508859211838955e-06, "loss": 0.5306, "step": 3385 }, { "epoch": 0.55, "grad_norm": 1.0438038956253828, "learning_rate": 4.50626216998315e-06, "loss": 0.6075, "step": 3386 }, { "epoch": 0.55, "grad_norm": 0.9999624363758526, "learning_rate": 4.503665262635141e-06, "loss": 0.5194, "step": 3387 }, { "epoch": 0.55, "grad_norm": 1.005021878357175, "learning_rate": 4.501068490502394e-06, "loss": 0.5998, "step": 3388 }, { "epoch": 0.55, "grad_norm": 0.981212671047851, "learning_rate": 4.498471854292344e-06, "loss": 0.5643, "step": 3389 }, { "epoch": 0.55, "grad_norm": 0.9639825568043952, "learning_rate": 4.495875354712386e-06, "loss": 0.5536, "step": 3390 }, { "epoch": 0.55, "grad_norm": 1.0418486837601173, "learning_rate": 4.493278992469877e-06, "loss": 0.7085, "step": 3391 }, { "epoch": 0.55, "grad_norm": 1.0306037469831133, "learning_rate": 4.4906827682721375e-06, "loss": 0.5731, "step": 3392 }, { "epoch": 0.55, "grad_norm": 1.0268770565798349, "learning_rate": 4.488086682826454e-06, "loss": 0.6459, "step": 3393 }, { "epoch": 0.55, "grad_norm": 0.9843481887490956, "learning_rate": 4.485490736840066e-06, "loss": 0.6283, "step": 3394 }, { "epoch": 0.55, "grad_norm": 0.7154780086435534, "learning_rate": 4.482894931020185e-06, "loss": 0.4953, "step": 3395 }, { "epoch": 0.55, "grad_norm": 1.0165835042789737, "learning_rate": 4.48029926607398e-06, "loss": 0.633, "step": 3396 }, { "epoch": 0.55, "grad_norm": 1.0775379195150174, "learning_rate": 4.477703742708579e-06, "loss": 0.6045, "step": 3397 }, { "epoch": 0.55, "grad_norm": 1.0117171399178724, "learning_rate": 4.475108361631076e-06, "loss": 0.5863, "step": 3398 }, { "epoch": 0.55, "grad_norm": 1.0064772000644282, "learning_rate": 4.472513123548525e-06, "loss": 0.6036, "step": 3399 }, { "epoch": 0.55, "grad_norm": 1.0637247101489924, "learning_rate": 4.469918029167939e-06, "loss": 0.6411, "step": 3400 }, { "epoch": 0.55, "grad_norm": 1.0126899482249887, "learning_rate": 4.467323079196292e-06, "loss": 0.6374, "step": 3401 }, { "epoch": 0.55, "grad_norm": 0.9913440953164321, "learning_rate": 4.464728274340525e-06, "loss": 0.5539, "step": 3402 }, { "epoch": 0.55, "grad_norm": 0.9362319292359428, "learning_rate": 4.462133615307527e-06, "loss": 0.5683, "step": 3403 }, { "epoch": 0.55, "grad_norm": 1.0612359807843708, "learning_rate": 4.45953910280416e-06, "loss": 0.6249, "step": 3404 }, { "epoch": 0.55, "grad_norm": 0.9448788436125708, "learning_rate": 4.4569447375372396e-06, "loss": 0.5398, "step": 3405 }, { "epoch": 0.55, "grad_norm": 0.9947467746395797, "learning_rate": 4.454350520213543e-06, "loss": 0.5844, "step": 3406 }, { "epoch": 0.55, "grad_norm": 0.6441468058866067, "learning_rate": 4.451756451539804e-06, "loss": 0.4942, "step": 3407 }, { "epoch": 0.55, "grad_norm": 1.0237361743752325, "learning_rate": 4.44916253222272e-06, "loss": 0.5599, "step": 3408 }, { "epoch": 0.55, "grad_norm": 0.9830253723537834, "learning_rate": 4.446568762968947e-06, "loss": 0.6138, "step": 3409 }, { "epoch": 0.55, "grad_norm": 1.011656096356316, "learning_rate": 4.443975144485099e-06, "loss": 0.6024, "step": 3410 }, { "epoch": 0.55, "grad_norm": 1.007068221894919, "learning_rate": 4.441381677477748e-06, "loss": 0.6564, "step": 3411 }, { "epoch": 0.55, "grad_norm": 1.0135195242171868, "learning_rate": 4.438788362653426e-06, "loss": 0.6029, "step": 3412 }, { "epoch": 0.55, "grad_norm": 1.028571937740489, "learning_rate": 4.436195200718625e-06, "loss": 0.5722, "step": 3413 }, { "epoch": 0.55, "grad_norm": 0.979875824782129, "learning_rate": 4.43360219237979e-06, "loss": 0.5387, "step": 3414 }, { "epoch": 0.55, "grad_norm": 1.0245338249646139, "learning_rate": 4.431009338343335e-06, "loss": 0.5966, "step": 3415 }, { "epoch": 0.55, "grad_norm": 0.9860851904399448, "learning_rate": 4.428416639315616e-06, "loss": 0.6053, "step": 3416 }, { "epoch": 0.55, "grad_norm": 0.992850656210538, "learning_rate": 4.425824096002962e-06, "loss": 0.575, "step": 3417 }, { "epoch": 0.55, "grad_norm": 1.026673132900477, "learning_rate": 4.423231709111653e-06, "loss": 0.6488, "step": 3418 }, { "epoch": 0.55, "grad_norm": 0.9146066478984853, "learning_rate": 4.420639479347924e-06, "loss": 0.5271, "step": 3419 }, { "epoch": 0.55, "grad_norm": 1.0383789795319225, "learning_rate": 4.4180474074179705e-06, "loss": 0.6633, "step": 3420 }, { "epoch": 0.55, "grad_norm": 1.0144456681292144, "learning_rate": 4.415455494027946e-06, "loss": 0.5719, "step": 3421 }, { "epoch": 0.55, "grad_norm": 1.0103578156144246, "learning_rate": 4.412863739883958e-06, "loss": 0.6137, "step": 3422 }, { "epoch": 0.55, "grad_norm": 1.0063630950354066, "learning_rate": 4.410272145692069e-06, "loss": 0.6186, "step": 3423 }, { "epoch": 0.55, "grad_norm": 1.0293821623419879, "learning_rate": 4.407680712158308e-06, "loss": 0.7032, "step": 3424 }, { "epoch": 0.55, "grad_norm": 0.6757480844729872, "learning_rate": 4.405089439988645e-06, "loss": 0.4973, "step": 3425 }, { "epoch": 0.55, "grad_norm": 1.061662291940576, "learning_rate": 4.402498329889018e-06, "loss": 0.5593, "step": 3426 }, { "epoch": 0.55, "grad_norm": 0.9970134233280021, "learning_rate": 4.399907382565316e-06, "loss": 0.6097, "step": 3427 }, { "epoch": 0.55, "grad_norm": 0.892037453657559, "learning_rate": 4.397316598723385e-06, "loss": 0.5661, "step": 3428 }, { "epoch": 0.55, "grad_norm": 1.1205638085893797, "learning_rate": 4.394725979069026e-06, "loss": 0.5886, "step": 3429 }, { "epoch": 0.55, "grad_norm": 1.021584968969128, "learning_rate": 4.392135524307993e-06, "loss": 0.5904, "step": 3430 }, { "epoch": 0.55, "grad_norm": 1.0056127530205334, "learning_rate": 4.389545235146003e-06, "loss": 0.5391, "step": 3431 }, { "epoch": 0.55, "grad_norm": 0.9808933888149748, "learning_rate": 4.386955112288714e-06, "loss": 0.6426, "step": 3432 }, { "epoch": 0.55, "grad_norm": 1.0552660627224566, "learning_rate": 4.384365156441752e-06, "loss": 0.5795, "step": 3433 }, { "epoch": 0.55, "grad_norm": 1.3802332678351499, "learning_rate": 4.381775368310694e-06, "loss": 0.543, "step": 3434 }, { "epoch": 0.55, "grad_norm": 1.0804355686806488, "learning_rate": 4.3791857486010655e-06, "loss": 0.6103, "step": 3435 }, { "epoch": 0.55, "grad_norm": 1.0482504271482156, "learning_rate": 4.3765962980183515e-06, "loss": 0.6564, "step": 3436 }, { "epoch": 0.55, "grad_norm": 1.0255054973674034, "learning_rate": 4.374007017267991e-06, "loss": 0.5455, "step": 3437 }, { "epoch": 0.55, "grad_norm": 0.9489932088476616, "learning_rate": 4.371417907055373e-06, "loss": 0.5522, "step": 3438 }, { "epoch": 0.55, "grad_norm": 1.0175646448103681, "learning_rate": 4.3688289680858456e-06, "loss": 0.6356, "step": 3439 }, { "epoch": 0.55, "grad_norm": 1.0588952872130237, "learning_rate": 4.366240201064705e-06, "loss": 0.6179, "step": 3440 }, { "epoch": 0.55, "grad_norm": 0.973997577847627, "learning_rate": 4.363651606697204e-06, "loss": 0.5551, "step": 3441 }, { "epoch": 0.55, "grad_norm": 0.9578925516715439, "learning_rate": 4.361063185688546e-06, "loss": 0.5291, "step": 3442 }, { "epoch": 0.55, "grad_norm": 1.0363903348962193, "learning_rate": 4.358474938743889e-06, "loss": 0.6254, "step": 3443 }, { "epoch": 0.55, "grad_norm": 1.0615438370162738, "learning_rate": 4.355886866568342e-06, "loss": 0.6722, "step": 3444 }, { "epoch": 0.56, "grad_norm": 0.9227947161993313, "learning_rate": 4.353298969866966e-06, "loss": 0.5703, "step": 3445 }, { "epoch": 0.56, "grad_norm": 0.9539663780722605, "learning_rate": 4.350711249344778e-06, "loss": 0.5724, "step": 3446 }, { "epoch": 0.56, "grad_norm": 1.0945919909583495, "learning_rate": 4.348123705706745e-06, "loss": 0.6078, "step": 3447 }, { "epoch": 0.56, "grad_norm": 0.9609037824822799, "learning_rate": 4.345536339657783e-06, "loss": 0.5971, "step": 3448 }, { "epoch": 0.56, "grad_norm": 0.9621630552697684, "learning_rate": 4.342949151902761e-06, "loss": 0.5836, "step": 3449 }, { "epoch": 0.56, "grad_norm": 1.0440108468976308, "learning_rate": 4.340362143146504e-06, "loss": 0.5329, "step": 3450 }, { "epoch": 0.56, "grad_norm": 1.0398718137167298, "learning_rate": 4.337775314093781e-06, "loss": 0.6393, "step": 3451 }, { "epoch": 0.56, "grad_norm": 1.1203469021416326, "learning_rate": 4.335188665449316e-06, "loss": 0.6242, "step": 3452 }, { "epoch": 0.56, "grad_norm": 0.9763641048524768, "learning_rate": 4.3326021979177865e-06, "loss": 0.5806, "step": 3453 }, { "epoch": 0.56, "grad_norm": 1.0863615471001702, "learning_rate": 4.330015912203812e-06, "loss": 0.5869, "step": 3454 }, { "epoch": 0.56, "grad_norm": 1.11805496575195, "learning_rate": 4.327429809011973e-06, "loss": 0.6434, "step": 3455 }, { "epoch": 0.56, "grad_norm": 0.9528676039345545, "learning_rate": 4.324843889046795e-06, "loss": 0.6018, "step": 3456 }, { "epoch": 0.56, "grad_norm": 1.0746723173661208, "learning_rate": 4.322258153012751e-06, "loss": 0.6162, "step": 3457 }, { "epoch": 0.56, "grad_norm": 1.0703732010328004, "learning_rate": 4.319672601614268e-06, "loss": 0.6293, "step": 3458 }, { "epoch": 0.56, "grad_norm": 1.0053156864252262, "learning_rate": 4.317087235555722e-06, "loss": 0.6019, "step": 3459 }, { "epoch": 0.56, "grad_norm": 0.9783536035908239, "learning_rate": 4.314502055541437e-06, "loss": 0.5834, "step": 3460 }, { "epoch": 0.56, "grad_norm": 1.0588505318675427, "learning_rate": 4.311917062275688e-06, "loss": 0.6204, "step": 3461 }, { "epoch": 0.56, "grad_norm": 1.083726156048756, "learning_rate": 4.309332256462699e-06, "loss": 0.5946, "step": 3462 }, { "epoch": 0.56, "grad_norm": 1.0111729871369115, "learning_rate": 4.306747638806641e-06, "loss": 0.6374, "step": 3463 }, { "epoch": 0.56, "grad_norm": 0.991857460491777, "learning_rate": 4.304163210011636e-06, "loss": 0.6378, "step": 3464 }, { "epoch": 0.56, "grad_norm": 0.9798702516168581, "learning_rate": 4.301578970781753e-06, "loss": 0.6079, "step": 3465 }, { "epoch": 0.56, "grad_norm": 0.9378730855677958, "learning_rate": 4.2989949218210125e-06, "loss": 0.5304, "step": 3466 }, { "epoch": 0.56, "grad_norm": 1.029181924622109, "learning_rate": 4.2964110638333755e-06, "loss": 0.6053, "step": 3467 }, { "epoch": 0.56, "grad_norm": 1.1268163214369247, "learning_rate": 4.29382739752276e-06, "loss": 0.6988, "step": 3468 }, { "epoch": 0.56, "grad_norm": 1.0241383694819495, "learning_rate": 4.2912439235930305e-06, "loss": 0.5318, "step": 3469 }, { "epoch": 0.56, "grad_norm": 0.9936435664348292, "learning_rate": 4.2886606427479905e-06, "loss": 0.5999, "step": 3470 }, { "epoch": 0.56, "grad_norm": 1.0208262019693501, "learning_rate": 4.2860775556913995e-06, "loss": 0.6244, "step": 3471 }, { "epoch": 0.56, "grad_norm": 0.984765494006994, "learning_rate": 4.283494663126962e-06, "loss": 0.576, "step": 3472 }, { "epoch": 0.56, "grad_norm": 1.046474820262225, "learning_rate": 4.28091196575833e-06, "loss": 0.6252, "step": 3473 }, { "epoch": 0.56, "grad_norm": 1.010594877905954, "learning_rate": 4.278329464289098e-06, "loss": 0.6172, "step": 3474 }, { "epoch": 0.56, "grad_norm": 0.9429245023346525, "learning_rate": 4.275747159422815e-06, "loss": 0.5658, "step": 3475 }, { "epoch": 0.56, "grad_norm": 0.9897712266118776, "learning_rate": 4.273165051862969e-06, "loss": 0.5238, "step": 3476 }, { "epoch": 0.56, "grad_norm": 1.031600282400674, "learning_rate": 4.270583142312998e-06, "loss": 0.6536, "step": 3477 }, { "epoch": 0.56, "grad_norm": 1.0006061102746264, "learning_rate": 4.268001431476286e-06, "loss": 0.6083, "step": 3478 }, { "epoch": 0.56, "grad_norm": 0.9932404654108857, "learning_rate": 4.265419920056162e-06, "loss": 0.6448, "step": 3479 }, { "epoch": 0.56, "grad_norm": 1.0086057263584498, "learning_rate": 4.262838608755899e-06, "loss": 0.6089, "step": 3480 }, { "epoch": 0.56, "grad_norm": 1.0471477291438624, "learning_rate": 4.2602574982787216e-06, "loss": 0.6173, "step": 3481 }, { "epoch": 0.56, "grad_norm": 1.017407652146029, "learning_rate": 4.257676589327791e-06, "loss": 0.5096, "step": 3482 }, { "epoch": 0.56, "grad_norm": 1.0065396804957187, "learning_rate": 4.255095882606219e-06, "loss": 0.5754, "step": 3483 }, { "epoch": 0.56, "grad_norm": 1.0247080178098593, "learning_rate": 4.252515378817062e-06, "loss": 0.6433, "step": 3484 }, { "epoch": 0.56, "grad_norm": 1.0642729450424664, "learning_rate": 4.2499350786633235e-06, "loss": 0.6071, "step": 3485 }, { "epoch": 0.56, "grad_norm": 0.997166962929344, "learning_rate": 4.247354982847942e-06, "loss": 0.6168, "step": 3486 }, { "epoch": 0.56, "grad_norm": 0.9611128289541894, "learning_rate": 4.244775092073811e-06, "loss": 0.445, "step": 3487 }, { "epoch": 0.56, "grad_norm": 1.0471474507389487, "learning_rate": 4.242195407043763e-06, "loss": 0.6741, "step": 3488 }, { "epoch": 0.56, "grad_norm": 0.9978503201383152, "learning_rate": 4.239615928460574e-06, "loss": 0.5656, "step": 3489 }, { "epoch": 0.56, "grad_norm": 0.66782530124989, "learning_rate": 4.2370366570269676e-06, "loss": 0.4941, "step": 3490 }, { "epoch": 0.56, "grad_norm": 1.000474006680463, "learning_rate": 4.234457593445608e-06, "loss": 0.5469, "step": 3491 }, { "epoch": 0.56, "grad_norm": 1.0182215005965956, "learning_rate": 4.2318787384191e-06, "loss": 0.5881, "step": 3492 }, { "epoch": 0.56, "grad_norm": 0.9944937184679367, "learning_rate": 4.229300092649997e-06, "loss": 0.6067, "step": 3493 }, { "epoch": 0.56, "grad_norm": 0.9841926805046696, "learning_rate": 4.226721656840796e-06, "loss": 0.6258, "step": 3494 }, { "epoch": 0.56, "grad_norm": 0.6554381127531465, "learning_rate": 4.224143431693929e-06, "loss": 0.5057, "step": 3495 }, { "epoch": 0.56, "grad_norm": 0.9548726496469082, "learning_rate": 4.2215654179117765e-06, "loss": 0.6388, "step": 3496 }, { "epoch": 0.56, "grad_norm": 1.0564436382185023, "learning_rate": 4.2189876161966655e-06, "loss": 0.6052, "step": 3497 }, { "epoch": 0.56, "grad_norm": 0.9518821541021305, "learning_rate": 4.216410027250853e-06, "loss": 0.5519, "step": 3498 }, { "epoch": 0.56, "grad_norm": 1.0375068284240994, "learning_rate": 4.213832651776551e-06, "loss": 0.6657, "step": 3499 }, { "epoch": 0.56, "grad_norm": 0.9552643862231862, "learning_rate": 4.211255490475905e-06, "loss": 0.5794, "step": 3500 }, { "epoch": 0.56, "grad_norm": 1.1619851083376305, "learning_rate": 4.208678544051005e-06, "loss": 0.7086, "step": 3501 }, { "epoch": 0.56, "grad_norm": 0.9559149368495451, "learning_rate": 4.2061018132038825e-06, "loss": 0.6336, "step": 3502 }, { "epoch": 0.56, "grad_norm": 1.0075580160388367, "learning_rate": 4.203525298636512e-06, "loss": 0.6515, "step": 3503 }, { "epoch": 0.56, "grad_norm": 0.9957626187323152, "learning_rate": 4.2009490010508026e-06, "loss": 0.6039, "step": 3504 }, { "epoch": 0.56, "grad_norm": 1.003979400032938, "learning_rate": 4.19837292114861e-06, "loss": 0.5286, "step": 3505 }, { "epoch": 0.56, "grad_norm": 0.9512930436167252, "learning_rate": 4.195797059631733e-06, "loss": 0.501, "step": 3506 }, { "epoch": 0.57, "grad_norm": 0.9785113594830841, "learning_rate": 4.1932214172019056e-06, "loss": 0.6031, "step": 3507 }, { "epoch": 0.57, "grad_norm": 1.0802192359246439, "learning_rate": 4.190645994560802e-06, "loss": 0.6857, "step": 3508 }, { "epoch": 0.57, "grad_norm": 1.0677456288820848, "learning_rate": 4.188070792410039e-06, "loss": 0.5758, "step": 3509 }, { "epoch": 0.57, "grad_norm": 1.0527727098660917, "learning_rate": 4.185495811451175e-06, "loss": 0.6547, "step": 3510 }, { "epoch": 0.57, "grad_norm": 1.0358121332402606, "learning_rate": 4.182921052385702e-06, "loss": 0.4871, "step": 3511 }, { "epoch": 0.57, "grad_norm": 0.9389598835819879, "learning_rate": 4.180346515915057e-06, "loss": 0.5542, "step": 3512 }, { "epoch": 0.57, "grad_norm": 0.9216510573199962, "learning_rate": 4.177772202740617e-06, "loss": 0.5612, "step": 3513 }, { "epoch": 0.57, "grad_norm": 0.91114928686773, "learning_rate": 4.175198113563692e-06, "loss": 0.5907, "step": 3514 }, { "epoch": 0.57, "grad_norm": 1.0660798702695307, "learning_rate": 4.172624249085537e-06, "loss": 0.5298, "step": 3515 }, { "epoch": 0.57, "grad_norm": 1.0037855041451813, "learning_rate": 4.170050610007344e-06, "loss": 0.4824, "step": 3516 }, { "epoch": 0.57, "grad_norm": 0.9942996574008681, "learning_rate": 4.167477197030242e-06, "loss": 0.559, "step": 3517 }, { "epoch": 0.57, "grad_norm": 1.0305142592115522, "learning_rate": 4.164904010855299e-06, "loss": 0.6005, "step": 3518 }, { "epoch": 0.57, "grad_norm": 1.039342522561636, "learning_rate": 4.162331052183526e-06, "loss": 0.589, "step": 3519 }, { "epoch": 0.57, "grad_norm": 1.022367113488299, "learning_rate": 4.159758321715862e-06, "loss": 0.5999, "step": 3520 }, { "epoch": 0.57, "grad_norm": 1.1344523916684348, "learning_rate": 4.157185820153193e-06, "loss": 0.6175, "step": 3521 }, { "epoch": 0.57, "grad_norm": 0.9002174403567248, "learning_rate": 4.154613548196341e-06, "loss": 0.5595, "step": 3522 }, { "epoch": 0.57, "grad_norm": 0.9545828627968804, "learning_rate": 4.15204150654606e-06, "loss": 0.5816, "step": 3523 }, { "epoch": 0.57, "grad_norm": 0.969562756038056, "learning_rate": 4.149469695903047e-06, "loss": 0.5626, "step": 3524 }, { "epoch": 0.57, "grad_norm": 0.989309040393606, "learning_rate": 4.146898116967932e-06, "loss": 0.5724, "step": 3525 }, { "epoch": 0.57, "grad_norm": 1.0436471992420429, "learning_rate": 4.1443267704412895e-06, "loss": 0.5546, "step": 3526 }, { "epoch": 0.57, "grad_norm": 0.9799223718088593, "learning_rate": 4.141755657023618e-06, "loss": 0.5413, "step": 3527 }, { "epoch": 0.57, "grad_norm": 0.955671423575773, "learning_rate": 4.139184777415365e-06, "loss": 0.4824, "step": 3528 }, { "epoch": 0.57, "grad_norm": 1.0084400676403826, "learning_rate": 4.1366141323169076e-06, "loss": 0.6352, "step": 3529 }, { "epoch": 0.57, "grad_norm": 0.991495550938125, "learning_rate": 4.1340437224285594e-06, "loss": 0.5215, "step": 3530 }, { "epoch": 0.57, "grad_norm": 1.0321700138078431, "learning_rate": 4.131473548450571e-06, "loss": 0.6117, "step": 3531 }, { "epoch": 0.57, "grad_norm": 1.0529163700653292, "learning_rate": 4.12890361108313e-06, "loss": 0.6181, "step": 3532 }, { "epoch": 0.57, "grad_norm": 1.0788572242787289, "learning_rate": 4.126333911026357e-06, "loss": 0.638, "step": 3533 }, { "epoch": 0.57, "grad_norm": 1.0270532378003343, "learning_rate": 4.123764448980308e-06, "loss": 0.5967, "step": 3534 }, { "epoch": 0.57, "grad_norm": 1.1040814437559843, "learning_rate": 4.1211952256449796e-06, "loss": 0.6446, "step": 3535 }, { "epoch": 0.57, "grad_norm": 0.9512877802554228, "learning_rate": 4.118626241720293e-06, "loss": 0.5045, "step": 3536 }, { "epoch": 0.57, "grad_norm": 1.0681199635328404, "learning_rate": 4.116057497906114e-06, "loss": 0.594, "step": 3537 }, { "epoch": 0.57, "grad_norm": 0.9467210690602159, "learning_rate": 4.11348899490224e-06, "loss": 0.5965, "step": 3538 }, { "epoch": 0.57, "grad_norm": 1.0219927264139612, "learning_rate": 4.1109207334084e-06, "loss": 0.5924, "step": 3539 }, { "epoch": 0.57, "grad_norm": 0.9286100046742463, "learning_rate": 4.1083527141242594e-06, "loss": 0.4836, "step": 3540 }, { "epoch": 0.57, "grad_norm": 0.930663264630551, "learning_rate": 4.105784937749419e-06, "loss": 0.5266, "step": 3541 }, { "epoch": 0.57, "grad_norm": 0.6851119629114353, "learning_rate": 4.103217404983409e-06, "loss": 0.476, "step": 3542 }, { "epoch": 0.57, "grad_norm": 1.0943651099493412, "learning_rate": 4.100650116525698e-06, "loss": 0.5977, "step": 3543 }, { "epoch": 0.57, "grad_norm": 1.0136223504813284, "learning_rate": 4.098083073075686e-06, "loss": 0.6293, "step": 3544 }, { "epoch": 0.57, "grad_norm": 1.0694835676136176, "learning_rate": 4.0955162753327085e-06, "loss": 0.5317, "step": 3545 }, { "epoch": 0.57, "grad_norm": 1.034493704087418, "learning_rate": 4.092949723996028e-06, "loss": 0.6597, "step": 3546 }, { "epoch": 0.57, "grad_norm": 0.9557363981776348, "learning_rate": 4.0903834197648444e-06, "loss": 0.5837, "step": 3547 }, { "epoch": 0.57, "grad_norm": 1.0476567015524871, "learning_rate": 4.087817363338294e-06, "loss": 0.5776, "step": 3548 }, { "epoch": 0.57, "grad_norm": 1.0357392109925678, "learning_rate": 4.085251555415437e-06, "loss": 0.5624, "step": 3549 }, { "epoch": 0.57, "grad_norm": 1.1162700070216063, "learning_rate": 4.08268599669527e-06, "loss": 0.6595, "step": 3550 }, { "epoch": 0.57, "grad_norm": 1.0642952795890377, "learning_rate": 4.080120687876726e-06, "loss": 0.5921, "step": 3551 }, { "epoch": 0.57, "grad_norm": 1.0513458247298086, "learning_rate": 4.077555629658662e-06, "loss": 0.584, "step": 3552 }, { "epoch": 0.57, "grad_norm": 1.0129740644197829, "learning_rate": 4.074990822739871e-06, "loss": 0.5914, "step": 3553 }, { "epoch": 0.57, "grad_norm": 0.920181825425386, "learning_rate": 4.072426267819081e-06, "loss": 0.5212, "step": 3554 }, { "epoch": 0.57, "grad_norm": 0.9690598513532722, "learning_rate": 4.069861965594941e-06, "loss": 0.589, "step": 3555 }, { "epoch": 0.57, "grad_norm": 0.9973525279064102, "learning_rate": 4.067297916766042e-06, "loss": 0.5975, "step": 3556 }, { "epoch": 0.57, "grad_norm": 1.0645226484452224, "learning_rate": 4.0647341220309024e-06, "loss": 0.618, "step": 3557 }, { "epoch": 0.57, "grad_norm": 0.9400911540186323, "learning_rate": 4.062170582087965e-06, "loss": 0.5783, "step": 3558 }, { "epoch": 0.57, "grad_norm": 1.0386451100228673, "learning_rate": 4.059607297635615e-06, "loss": 0.592, "step": 3559 }, { "epoch": 0.57, "grad_norm": 1.0033600419007418, "learning_rate": 4.057044269372159e-06, "loss": 0.638, "step": 3560 }, { "epoch": 0.57, "grad_norm": 1.0538372979874435, "learning_rate": 4.054481497995836e-06, "loss": 0.6638, "step": 3561 }, { "epoch": 0.57, "grad_norm": 0.9525545187657446, "learning_rate": 4.051918984204817e-06, "loss": 0.5553, "step": 3562 }, { "epoch": 0.57, "grad_norm": 0.9285094776304778, "learning_rate": 4.049356728697199e-06, "loss": 0.6766, "step": 3563 }, { "epoch": 0.57, "grad_norm": 0.6687189343103905, "learning_rate": 4.046794732171016e-06, "loss": 0.4598, "step": 3564 }, { "epoch": 0.57, "grad_norm": 0.926237272006665, "learning_rate": 4.04423299532422e-06, "loss": 0.5816, "step": 3565 }, { "epoch": 0.57, "grad_norm": 0.9623821162953281, "learning_rate": 4.041671518854704e-06, "loss": 0.6149, "step": 3566 }, { "epoch": 0.57, "grad_norm": 0.870138482166875, "learning_rate": 4.0391103034602826e-06, "loss": 0.4738, "step": 3567 }, { "epoch": 0.57, "grad_norm": 1.0343250936188184, "learning_rate": 4.036549349838701e-06, "loss": 0.6726, "step": 3568 }, { "epoch": 0.58, "grad_norm": 0.9308460059310422, "learning_rate": 4.033988658687634e-06, "loss": 0.5156, "step": 3569 }, { "epoch": 0.58, "grad_norm": 0.9493513943874937, "learning_rate": 4.031428230704686e-06, "loss": 0.5419, "step": 3570 }, { "epoch": 0.58, "grad_norm": 1.0127058052191331, "learning_rate": 4.028868066587384e-06, "loss": 0.5775, "step": 3571 }, { "epoch": 0.58, "grad_norm": 1.0794241081298137, "learning_rate": 4.026308167033191e-06, "loss": 0.6077, "step": 3572 }, { "epoch": 0.58, "grad_norm": 0.6378646439566517, "learning_rate": 4.023748532739496e-06, "loss": 0.4758, "step": 3573 }, { "epoch": 0.58, "grad_norm": 1.0945332890865889, "learning_rate": 4.0211891644036085e-06, "loss": 0.6466, "step": 3574 }, { "epoch": 0.58, "grad_norm": 0.669016074431678, "learning_rate": 4.018630062722774e-06, "loss": 0.5188, "step": 3575 }, { "epoch": 0.58, "grad_norm": 1.082421646202419, "learning_rate": 4.016071228394164e-06, "loss": 0.6084, "step": 3576 }, { "epoch": 0.58, "grad_norm": 0.6510619577249291, "learning_rate": 4.013512662114871e-06, "loss": 0.5087, "step": 3577 }, { "epoch": 0.58, "grad_norm": 0.8774765745410158, "learning_rate": 4.010954364581921e-06, "loss": 0.6094, "step": 3578 }, { "epoch": 0.58, "grad_norm": 1.0372123766379873, "learning_rate": 4.008396336492269e-06, "loss": 0.6835, "step": 3579 }, { "epoch": 0.58, "grad_norm": 0.9911761701962242, "learning_rate": 4.005838578542785e-06, "loss": 0.5903, "step": 3580 }, { "epoch": 0.58, "grad_norm": 1.029811107875484, "learning_rate": 4.003281091430279e-06, "loss": 0.6103, "step": 3581 }, { "epoch": 0.58, "grad_norm": 0.8812597368343738, "learning_rate": 4.000723875851477e-06, "loss": 0.5839, "step": 3582 }, { "epoch": 0.58, "grad_norm": 1.0692610629856572, "learning_rate": 3.998166932503037e-06, "loss": 0.6543, "step": 3583 }, { "epoch": 0.58, "grad_norm": 1.0636536097536027, "learning_rate": 3.99561026208154e-06, "loss": 0.5822, "step": 3584 }, { "epoch": 0.58, "grad_norm": 1.0355956029911022, "learning_rate": 3.993053865283492e-06, "loss": 0.6446, "step": 3585 }, { "epoch": 0.58, "grad_norm": 0.9493343317904043, "learning_rate": 3.9904977428053325e-06, "loss": 0.4843, "step": 3586 }, { "epoch": 0.58, "grad_norm": 0.9952773328140285, "learning_rate": 3.9879418953434105e-06, "loss": 0.5672, "step": 3587 }, { "epoch": 0.58, "grad_norm": 1.0224048810022972, "learning_rate": 3.9853863235940145e-06, "loss": 0.582, "step": 3588 }, { "epoch": 0.58, "grad_norm": 1.025766569928607, "learning_rate": 3.982831028253354e-06, "loss": 0.5604, "step": 3589 }, { "epoch": 0.58, "grad_norm": 0.8726111707183295, "learning_rate": 3.980276010017559e-06, "loss": 0.4965, "step": 3590 }, { "epoch": 0.58, "grad_norm": 0.9606217338927152, "learning_rate": 3.977721269582686e-06, "loss": 0.5225, "step": 3591 }, { "epoch": 0.58, "grad_norm": 0.9886663499745755, "learning_rate": 3.975166807644721e-06, "loss": 0.6256, "step": 3592 }, { "epoch": 0.58, "grad_norm": 1.0166462128536062, "learning_rate": 3.972612624899566e-06, "loss": 0.6158, "step": 3593 }, { "epoch": 0.58, "grad_norm": 0.6613396165630537, "learning_rate": 3.97005872204305e-06, "loss": 0.4713, "step": 3594 }, { "epoch": 0.58, "grad_norm": 0.9502643499529743, "learning_rate": 3.967505099770932e-06, "loss": 0.5892, "step": 3595 }, { "epoch": 0.58, "grad_norm": 0.960340871389481, "learning_rate": 3.964951758778885e-06, "loss": 0.654, "step": 3596 }, { "epoch": 0.58, "grad_norm": 1.0456755589248152, "learning_rate": 3.962398699762508e-06, "loss": 0.6544, "step": 3597 }, { "epoch": 0.58, "grad_norm": 0.7261441018983972, "learning_rate": 3.95984592341733e-06, "loss": 0.4937, "step": 3598 }, { "epoch": 0.58, "grad_norm": 0.9485254547007809, "learning_rate": 3.957293430438792e-06, "loss": 0.4863, "step": 3599 }, { "epoch": 0.58, "grad_norm": 0.9374146634641488, "learning_rate": 3.954741221522266e-06, "loss": 0.5973, "step": 3600 }, { "epoch": 0.58, "grad_norm": 1.045305859404248, "learning_rate": 3.952189297363047e-06, "loss": 0.6296, "step": 3601 }, { "epoch": 0.58, "grad_norm": 1.0073437740663223, "learning_rate": 3.949637658656343e-06, "loss": 0.63, "step": 3602 }, { "epoch": 0.58, "grad_norm": 1.029205877083787, "learning_rate": 3.947086306097295e-06, "loss": 0.6812, "step": 3603 }, { "epoch": 0.58, "grad_norm": 0.9654550308141081, "learning_rate": 3.944535240380961e-06, "loss": 0.5523, "step": 3604 }, { "epoch": 0.58, "grad_norm": 0.95038839953288, "learning_rate": 3.941984462202323e-06, "loss": 0.5832, "step": 3605 }, { "epoch": 0.58, "grad_norm": 1.0247128814822408, "learning_rate": 3.939433972256281e-06, "loss": 0.6253, "step": 3606 }, { "epoch": 0.58, "grad_norm": 1.006875444451758, "learning_rate": 3.936883771237658e-06, "loss": 0.5686, "step": 3607 }, { "epoch": 0.58, "grad_norm": 1.0501704226400725, "learning_rate": 3.934333859841204e-06, "loss": 0.6426, "step": 3608 }, { "epoch": 0.58, "grad_norm": 1.0335160748811831, "learning_rate": 3.931784238761579e-06, "loss": 0.6587, "step": 3609 }, { "epoch": 0.58, "grad_norm": 1.0398723581307618, "learning_rate": 3.929234908693373e-06, "loss": 0.6476, "step": 3610 }, { "epoch": 0.58, "grad_norm": 0.9720836322917509, "learning_rate": 3.9266858703310965e-06, "loss": 0.5354, "step": 3611 }, { "epoch": 0.58, "grad_norm": 1.0235817592106464, "learning_rate": 3.924137124369172e-06, "loss": 0.581, "step": 3612 }, { "epoch": 0.58, "grad_norm": 0.9344374769381187, "learning_rate": 3.9215886715019525e-06, "loss": 0.5651, "step": 3613 }, { "epoch": 0.58, "grad_norm": 1.0419043147510474, "learning_rate": 3.919040512423706e-06, "loss": 0.6655, "step": 3614 }, { "epoch": 0.58, "grad_norm": 0.9554011474037226, "learning_rate": 3.91649264782862e-06, "loss": 0.5231, "step": 3615 }, { "epoch": 0.58, "grad_norm": 1.1408753992748843, "learning_rate": 3.913945078410802e-06, "loss": 0.6954, "step": 3616 }, { "epoch": 0.58, "grad_norm": 1.019527657464595, "learning_rate": 3.911397804864285e-06, "loss": 0.6179, "step": 3617 }, { "epoch": 0.58, "grad_norm": 1.00836818832811, "learning_rate": 3.908850827883012e-06, "loss": 0.5775, "step": 3618 }, { "epoch": 0.58, "grad_norm": 1.0138906764525937, "learning_rate": 3.90630414816085e-06, "loss": 0.6412, "step": 3619 }, { "epoch": 0.58, "grad_norm": 0.6430768421241214, "learning_rate": 3.9037577663915885e-06, "loss": 0.4665, "step": 3620 }, { "epoch": 0.58, "grad_norm": 1.0093415253321745, "learning_rate": 3.901211683268928e-06, "loss": 0.6549, "step": 3621 }, { "epoch": 0.58, "grad_norm": 0.9538775654820494, "learning_rate": 3.898665899486493e-06, "loss": 0.507, "step": 3622 }, { "epoch": 0.58, "grad_norm": 1.0054704400063228, "learning_rate": 3.896120415737825e-06, "loss": 0.629, "step": 3623 }, { "epoch": 0.58, "grad_norm": 1.0034105497009433, "learning_rate": 3.893575232716387e-06, "loss": 0.6086, "step": 3624 }, { "epoch": 0.58, "grad_norm": 1.0017484607329237, "learning_rate": 3.891030351115552e-06, "loss": 0.58, "step": 3625 }, { "epoch": 0.58, "grad_norm": 0.9608848017939492, "learning_rate": 3.888485771628618e-06, "loss": 0.604, "step": 3626 }, { "epoch": 0.58, "grad_norm": 1.090375844173031, "learning_rate": 3.885941494948802e-06, "loss": 0.6254, "step": 3627 }, { "epoch": 0.58, "grad_norm": 0.8865414601880237, "learning_rate": 3.8833975217692285e-06, "loss": 0.5594, "step": 3628 }, { "epoch": 0.58, "grad_norm": 1.028636722987518, "learning_rate": 3.88085385278295e-06, "loss": 0.6588, "step": 3629 }, { "epoch": 0.58, "grad_norm": 1.0516760461341588, "learning_rate": 3.878310488682934e-06, "loss": 0.6799, "step": 3630 }, { "epoch": 0.59, "grad_norm": 1.054240845472082, "learning_rate": 3.875767430162058e-06, "loss": 0.6201, "step": 3631 }, { "epoch": 0.59, "grad_norm": 0.9821712063432436, "learning_rate": 3.873224677913124e-06, "loss": 0.6511, "step": 3632 }, { "epoch": 0.59, "grad_norm": 1.0828149086576655, "learning_rate": 3.870682232628848e-06, "loss": 0.6449, "step": 3633 }, { "epoch": 0.59, "grad_norm": 0.9587686176002178, "learning_rate": 3.8681400950018615e-06, "loss": 0.6138, "step": 3634 }, { "epoch": 0.59, "grad_norm": 0.9906677234458398, "learning_rate": 3.865598265724713e-06, "loss": 0.5973, "step": 3635 }, { "epoch": 0.59, "grad_norm": 0.9693627283449526, "learning_rate": 3.8630567454898676e-06, "loss": 0.5637, "step": 3636 }, { "epoch": 0.59, "grad_norm": 0.9635958691555139, "learning_rate": 3.860515534989704e-06, "loss": 0.5521, "step": 3637 }, { "epoch": 0.59, "grad_norm": 0.9776853309359553, "learning_rate": 3.857974634916517e-06, "loss": 0.6092, "step": 3638 }, { "epoch": 0.59, "grad_norm": 0.9919823100920274, "learning_rate": 3.8554340459625235e-06, "loss": 0.6157, "step": 3639 }, { "epoch": 0.59, "grad_norm": 0.9592799809249056, "learning_rate": 3.852893768819843e-06, "loss": 0.5655, "step": 3640 }, { "epoch": 0.59, "grad_norm": 1.0302782328014588, "learning_rate": 3.85035380418052e-06, "loss": 0.5606, "step": 3641 }, { "epoch": 0.59, "grad_norm": 0.9471612063342956, "learning_rate": 3.847814152736512e-06, "loss": 0.5105, "step": 3642 }, { "epoch": 0.59, "grad_norm": 1.0683907731608226, "learning_rate": 3.84527481517969e-06, "loss": 0.5703, "step": 3643 }, { "epoch": 0.59, "grad_norm": 0.9812978222579662, "learning_rate": 3.842735792201837e-06, "loss": 0.5607, "step": 3644 }, { "epoch": 0.59, "grad_norm": 0.9649701688631073, "learning_rate": 3.840197084494653e-06, "loss": 0.5839, "step": 3645 }, { "epoch": 0.59, "grad_norm": 1.1144162779059659, "learning_rate": 3.8376586927497565e-06, "loss": 0.6317, "step": 3646 }, { "epoch": 0.59, "grad_norm": 1.0057852413562807, "learning_rate": 3.835120617658669e-06, "loss": 0.5729, "step": 3647 }, { "epoch": 0.59, "grad_norm": 0.6248378964088227, "learning_rate": 3.832582859912838e-06, "loss": 0.4767, "step": 3648 }, { "epoch": 0.59, "grad_norm": 1.0457974670464516, "learning_rate": 3.8300454202036155e-06, "loss": 0.5634, "step": 3649 }, { "epoch": 0.59, "grad_norm": 1.0244525669570272, "learning_rate": 3.82750829922227e-06, "loss": 0.6233, "step": 3650 }, { "epoch": 0.59, "grad_norm": 1.0319010038810321, "learning_rate": 3.824971497659983e-06, "loss": 0.5837, "step": 3651 }, { "epoch": 0.59, "grad_norm": 0.985724371482178, "learning_rate": 3.8224350162078526e-06, "loss": 0.5603, "step": 3652 }, { "epoch": 0.59, "grad_norm": 0.9773095017922779, "learning_rate": 3.81989885555688e-06, "loss": 0.587, "step": 3653 }, { "epoch": 0.59, "grad_norm": 1.0466489033617228, "learning_rate": 3.81736301639799e-06, "loss": 0.6294, "step": 3654 }, { "epoch": 0.59, "grad_norm": 1.0013083651569465, "learning_rate": 3.814827499422016e-06, "loss": 0.5524, "step": 3655 }, { "epoch": 0.59, "grad_norm": 1.0693882044740042, "learning_rate": 3.8122923053196984e-06, "loss": 0.6253, "step": 3656 }, { "epoch": 0.59, "grad_norm": 1.0463034654771313, "learning_rate": 3.809757434781697e-06, "loss": 0.6604, "step": 3657 }, { "epoch": 0.59, "grad_norm": 1.027755000773748, "learning_rate": 3.8072228884985803e-06, "loss": 0.6065, "step": 3658 }, { "epoch": 0.59, "grad_norm": 1.0537201421606006, "learning_rate": 3.8046886671608264e-06, "loss": 0.5925, "step": 3659 }, { "epoch": 0.59, "grad_norm": 0.9882733363588228, "learning_rate": 3.802154771458828e-06, "loss": 0.6477, "step": 3660 }, { "epoch": 0.59, "grad_norm": 1.035328119940369, "learning_rate": 3.7996212020828915e-06, "loss": 0.6528, "step": 3661 }, { "epoch": 0.59, "grad_norm": 1.0391902086360811, "learning_rate": 3.797087959723225e-06, "loss": 0.5394, "step": 3662 }, { "epoch": 0.59, "grad_norm": 1.0101724814285764, "learning_rate": 3.7945550450699585e-06, "loss": 0.6649, "step": 3663 }, { "epoch": 0.59, "grad_norm": 0.636921762329395, "learning_rate": 3.7920224588131256e-06, "loss": 0.4822, "step": 3664 }, { "epoch": 0.59, "grad_norm": 1.039649732775452, "learning_rate": 3.7894902016426738e-06, "loss": 0.6275, "step": 3665 }, { "epoch": 0.59, "grad_norm": 1.0176045175959179, "learning_rate": 3.786958274248458e-06, "loss": 0.5725, "step": 3666 }, { "epoch": 0.59, "grad_norm": 0.954283123285502, "learning_rate": 3.7844266773202448e-06, "loss": 0.5167, "step": 3667 }, { "epoch": 0.59, "grad_norm": 0.998656315280911, "learning_rate": 3.7818954115477158e-06, "loss": 0.5774, "step": 3668 }, { "epoch": 0.59, "grad_norm": 0.857171099232043, "learning_rate": 3.7793644776204503e-06, "loss": 0.5464, "step": 3669 }, { "epoch": 0.59, "grad_norm": 0.9685128508313092, "learning_rate": 3.7768338762279493e-06, "loss": 0.5844, "step": 3670 }, { "epoch": 0.59, "grad_norm": 0.9451842011379081, "learning_rate": 3.7743036080596184e-06, "loss": 0.5733, "step": 3671 }, { "epoch": 0.59, "grad_norm": 1.0049774831781737, "learning_rate": 3.77177367380477e-06, "loss": 0.6287, "step": 3672 }, { "epoch": 0.59, "grad_norm": 0.9708478743800151, "learning_rate": 3.7692440741526293e-06, "loss": 0.6192, "step": 3673 }, { "epoch": 0.59, "grad_norm": 1.0671482179780776, "learning_rate": 3.76671480979233e-06, "loss": 0.5902, "step": 3674 }, { "epoch": 0.59, "grad_norm": 1.0640032070625849, "learning_rate": 3.7641858814129093e-06, "loss": 0.5895, "step": 3675 }, { "epoch": 0.59, "grad_norm": 0.9086950026591353, "learning_rate": 3.7616572897033223e-06, "loss": 0.5054, "step": 3676 }, { "epoch": 0.59, "grad_norm": 0.8865936394059376, "learning_rate": 3.7591290353524247e-06, "loss": 0.4583, "step": 3677 }, { "epoch": 0.59, "grad_norm": 1.0264330824186085, "learning_rate": 3.7566011190489815e-06, "loss": 0.5498, "step": 3678 }, { "epoch": 0.59, "grad_norm": 1.043436189062516, "learning_rate": 3.7540735414816685e-06, "loss": 0.5719, "step": 3679 }, { "epoch": 0.59, "grad_norm": 0.919636737589086, "learning_rate": 3.7515463033390676e-06, "loss": 0.5255, "step": 3680 }, { "epoch": 0.59, "grad_norm": 0.9250224772550386, "learning_rate": 3.7490194053096668e-06, "loss": 0.496, "step": 3681 }, { "epoch": 0.59, "grad_norm": 0.939856344823606, "learning_rate": 3.7464928480818623e-06, "loss": 0.4919, "step": 3682 }, { "epoch": 0.59, "grad_norm": 1.0346223794439071, "learning_rate": 3.7439666323439603e-06, "loss": 0.6701, "step": 3683 }, { "epoch": 0.59, "grad_norm": 0.933259065258618, "learning_rate": 3.741440758784172e-06, "loss": 0.6088, "step": 3684 }, { "epoch": 0.59, "grad_norm": 1.0570004191780462, "learning_rate": 3.738915228090611e-06, "loss": 0.6379, "step": 3685 }, { "epoch": 0.59, "grad_norm": 1.0318288603357135, "learning_rate": 3.736390040951304e-06, "loss": 0.6213, "step": 3686 }, { "epoch": 0.59, "grad_norm": 1.091764499969712, "learning_rate": 3.733865198054184e-06, "loss": 0.6524, "step": 3687 }, { "epoch": 0.59, "grad_norm": 1.072082840247095, "learning_rate": 3.7313407000870826e-06, "loss": 0.5865, "step": 3688 }, { "epoch": 0.59, "grad_norm": 0.9434084236197783, "learning_rate": 3.728816547737745e-06, "loss": 0.6035, "step": 3689 }, { "epoch": 0.59, "grad_norm": 1.0818286902320455, "learning_rate": 3.7262927416938234e-06, "loss": 0.6139, "step": 3690 }, { "epoch": 0.59, "grad_norm": 0.9334099724350631, "learning_rate": 3.7237692826428662e-06, "loss": 0.5408, "step": 3691 }, { "epoch": 0.59, "grad_norm": 1.0566960558738816, "learning_rate": 3.721246171272336e-06, "loss": 0.6409, "step": 3692 }, { "epoch": 0.6, "grad_norm": 1.0305981051713555, "learning_rate": 3.718723408269599e-06, "loss": 0.582, "step": 3693 }, { "epoch": 0.6, "grad_norm": 1.0092975564584266, "learning_rate": 3.7162009943219234e-06, "loss": 0.5413, "step": 3694 }, { "epoch": 0.6, "grad_norm": 0.9449371866673747, "learning_rate": 3.7136789301164854e-06, "loss": 0.5441, "step": 3695 }, { "epoch": 0.6, "grad_norm": 1.0709692124389771, "learning_rate": 3.7111572163403653e-06, "loss": 0.5534, "step": 3696 }, { "epoch": 0.6, "grad_norm": 0.9555718633938672, "learning_rate": 3.7086358536805455e-06, "loss": 0.566, "step": 3697 }, { "epoch": 0.6, "grad_norm": 0.9575918702428928, "learning_rate": 3.7061148428239147e-06, "loss": 0.6039, "step": 3698 }, { "epoch": 0.6, "grad_norm": 0.908542315185394, "learning_rate": 3.7035941844572687e-06, "loss": 0.5145, "step": 3699 }, { "epoch": 0.6, "grad_norm": 0.985297275370482, "learning_rate": 3.701073879267302e-06, "loss": 0.5886, "step": 3700 }, { "epoch": 0.6, "grad_norm": 0.9234656082570784, "learning_rate": 3.698553927940615e-06, "loss": 0.4895, "step": 3701 }, { "epoch": 0.6, "grad_norm": 1.0285237296142236, "learning_rate": 3.6960343311637132e-06, "loss": 0.6208, "step": 3702 }, { "epoch": 0.6, "grad_norm": 1.0562931810182334, "learning_rate": 3.6935150896230045e-06, "loss": 0.6351, "step": 3703 }, { "epoch": 0.6, "grad_norm": 1.308257613000332, "learning_rate": 3.6909962040047964e-06, "loss": 0.6813, "step": 3704 }, { "epoch": 0.6, "grad_norm": 1.0124062074607452, "learning_rate": 3.6884776749953065e-06, "loss": 0.6446, "step": 3705 }, { "epoch": 0.6, "grad_norm": 0.9711714314337834, "learning_rate": 3.6859595032806518e-06, "loss": 0.6212, "step": 3706 }, { "epoch": 0.6, "grad_norm": 1.0920171599617163, "learning_rate": 3.683441689546849e-06, "loss": 0.6771, "step": 3707 }, { "epoch": 0.6, "grad_norm": 0.9369927068211686, "learning_rate": 3.6809242344798207e-06, "loss": 0.5389, "step": 3708 }, { "epoch": 0.6, "grad_norm": 0.9367938641030269, "learning_rate": 3.6784071387653926e-06, "loss": 0.6136, "step": 3709 }, { "epoch": 0.6, "grad_norm": 1.0040852131615712, "learning_rate": 3.675890403089289e-06, "loss": 0.533, "step": 3710 }, { "epoch": 0.6, "grad_norm": 1.112536930927805, "learning_rate": 3.6733740281371377e-06, "loss": 0.6125, "step": 3711 }, { "epoch": 0.6, "grad_norm": 1.0274323160056607, "learning_rate": 3.670858014594473e-06, "loss": 0.6369, "step": 3712 }, { "epoch": 0.6, "grad_norm": 0.9903198715359927, "learning_rate": 3.66834236314672e-06, "loss": 0.5086, "step": 3713 }, { "epoch": 0.6, "grad_norm": 1.0374422086389528, "learning_rate": 3.665827074479215e-06, "loss": 0.6532, "step": 3714 }, { "epoch": 0.6, "grad_norm": 1.0019424313550227, "learning_rate": 3.6633121492771933e-06, "loss": 0.542, "step": 3715 }, { "epoch": 0.6, "grad_norm": 0.6828897592530904, "learning_rate": 3.6607975882257875e-06, "loss": 0.4592, "step": 3716 }, { "epoch": 0.6, "grad_norm": 1.0281136626208112, "learning_rate": 3.6582833920100336e-06, "loss": 0.5634, "step": 3717 }, { "epoch": 0.6, "grad_norm": 0.9760001597668401, "learning_rate": 3.655769561314869e-06, "loss": 0.5393, "step": 3718 }, { "epoch": 0.6, "grad_norm": 1.027252856895966, "learning_rate": 3.653256096825129e-06, "loss": 0.5786, "step": 3719 }, { "epoch": 0.6, "grad_norm": 1.0461158386862597, "learning_rate": 3.6507429992255503e-06, "loss": 0.5831, "step": 3720 }, { "epoch": 0.6, "grad_norm": 0.9730807327154681, "learning_rate": 3.648230269200775e-06, "loss": 0.5938, "step": 3721 }, { "epoch": 0.6, "grad_norm": 1.026528074930323, "learning_rate": 3.645717907435332e-06, "loss": 0.5867, "step": 3722 }, { "epoch": 0.6, "grad_norm": 0.9747641035795074, "learning_rate": 3.6432059146136633e-06, "loss": 0.5838, "step": 3723 }, { "epoch": 0.6, "grad_norm": 1.032551067774948, "learning_rate": 3.6406942914201045e-06, "loss": 0.5871, "step": 3724 }, { "epoch": 0.6, "grad_norm": 1.0405188636025802, "learning_rate": 3.6381830385388907e-06, "loss": 0.5902, "step": 3725 }, { "epoch": 0.6, "grad_norm": 1.035594332369752, "learning_rate": 3.635672156654154e-06, "loss": 0.6106, "step": 3726 }, { "epoch": 0.6, "grad_norm": 0.9461560612524657, "learning_rate": 3.6331616464499297e-06, "loss": 0.576, "step": 3727 }, { "epoch": 0.6, "grad_norm": 1.069606405132498, "learning_rate": 3.6306515086101522e-06, "loss": 0.6559, "step": 3728 }, { "epoch": 0.6, "grad_norm": 0.9593178916229198, "learning_rate": 3.6281417438186484e-06, "loss": 0.4979, "step": 3729 }, { "epoch": 0.6, "grad_norm": 0.9365949825020783, "learning_rate": 3.6256323527591496e-06, "loss": 0.5148, "step": 3730 }, { "epoch": 0.6, "grad_norm": 0.9071196365865485, "learning_rate": 3.623123336115284e-06, "loss": 0.6425, "step": 3731 }, { "epoch": 0.6, "grad_norm": 0.8857162582489134, "learning_rate": 3.6206146945705735e-06, "loss": 0.5876, "step": 3732 }, { "epoch": 0.6, "grad_norm": 1.0537968052530504, "learning_rate": 3.6181064288084423e-06, "loss": 0.6381, "step": 3733 }, { "epoch": 0.6, "grad_norm": 1.0710083113371174, "learning_rate": 3.6155985395122157e-06, "loss": 0.5272, "step": 3734 }, { "epoch": 0.6, "grad_norm": 1.0030239065559137, "learning_rate": 3.613091027365104e-06, "loss": 0.6065, "step": 3735 }, { "epoch": 0.6, "grad_norm": 1.0140373771734972, "learning_rate": 3.610583893050229e-06, "loss": 0.6166, "step": 3736 }, { "epoch": 0.6, "grad_norm": 0.9945480461370594, "learning_rate": 3.6080771372506017e-06, "loss": 0.5035, "step": 3737 }, { "epoch": 0.6, "grad_norm": 1.015427771330092, "learning_rate": 3.6055707606491297e-06, "loss": 0.5989, "step": 3738 }, { "epoch": 0.6, "grad_norm": 0.9661695759944301, "learning_rate": 3.6030647639286196e-06, "loss": 0.5115, "step": 3739 }, { "epoch": 0.6, "grad_norm": 0.9746666332410842, "learning_rate": 3.6005591477717766e-06, "loss": 0.5358, "step": 3740 }, { "epoch": 0.6, "grad_norm": 1.0397687798955546, "learning_rate": 3.598053912861196e-06, "loss": 0.5973, "step": 3741 }, { "epoch": 0.6, "grad_norm": 0.9967555690430571, "learning_rate": 3.5955490598793734e-06, "loss": 0.5378, "step": 3742 }, { "epoch": 0.6, "grad_norm": 0.986029809508707, "learning_rate": 3.5930445895087017e-06, "loss": 0.5898, "step": 3743 }, { "epoch": 0.6, "grad_norm": 0.6719587740086863, "learning_rate": 3.5905405024314683e-06, "loss": 0.5099, "step": 3744 }, { "epoch": 0.6, "grad_norm": 1.0386979394093914, "learning_rate": 3.588036799329853e-06, "loss": 0.5404, "step": 3745 }, { "epoch": 0.6, "grad_norm": 0.961626072896982, "learning_rate": 3.585533480885934e-06, "loss": 0.5343, "step": 3746 }, { "epoch": 0.6, "grad_norm": 1.0124418587997555, "learning_rate": 3.5830305477816863e-06, "loss": 0.5607, "step": 3747 }, { "epoch": 0.6, "grad_norm": 0.8621972663090065, "learning_rate": 3.580528000698975e-06, "loss": 0.505, "step": 3748 }, { "epoch": 0.6, "grad_norm": 0.6921688698729664, "learning_rate": 3.5780258403195635e-06, "loss": 0.4988, "step": 3749 }, { "epoch": 0.6, "grad_norm": 0.9674436575349495, "learning_rate": 3.5755240673251125e-06, "loss": 0.5687, "step": 3750 }, { "epoch": 0.6, "grad_norm": 1.0101681946376775, "learning_rate": 3.5730226823971693e-06, "loss": 0.5953, "step": 3751 }, { "epoch": 0.6, "grad_norm": 1.0591658946321683, "learning_rate": 3.5705216862171823e-06, "loss": 0.6232, "step": 3752 }, { "epoch": 0.6, "grad_norm": 1.0626841687338044, "learning_rate": 3.568021079466494e-06, "loss": 0.6207, "step": 3753 }, { "epoch": 0.6, "grad_norm": 1.007311192565927, "learning_rate": 3.5655208628263345e-06, "loss": 0.5529, "step": 3754 }, { "epoch": 0.61, "grad_norm": 0.9236009604488256, "learning_rate": 3.563021036977834e-06, "loss": 0.4992, "step": 3755 }, { "epoch": 0.61, "grad_norm": 1.0674966716552712, "learning_rate": 3.560521602602014e-06, "loss": 0.5689, "step": 3756 }, { "epoch": 0.61, "grad_norm": 1.0254910971082718, "learning_rate": 3.5580225603797873e-06, "loss": 0.614, "step": 3757 }, { "epoch": 0.61, "grad_norm": 0.9855145166046688, "learning_rate": 3.5555239109919647e-06, "loss": 0.5802, "step": 3758 }, { "epoch": 0.61, "grad_norm": 1.0386564201646364, "learning_rate": 3.5530256551192467e-06, "loss": 0.5977, "step": 3759 }, { "epoch": 0.61, "grad_norm": 0.9745676066325097, "learning_rate": 3.5505277934422254e-06, "loss": 0.5789, "step": 3760 }, { "epoch": 0.61, "grad_norm": 1.010156681798062, "learning_rate": 3.5480303266413884e-06, "loss": 0.5099, "step": 3761 }, { "epoch": 0.61, "grad_norm": 0.9424613633755482, "learning_rate": 3.545533255397112e-06, "loss": 0.4641, "step": 3762 }, { "epoch": 0.61, "grad_norm": 1.0760814249442308, "learning_rate": 3.5430365803896736e-06, "loss": 0.5708, "step": 3763 }, { "epoch": 0.61, "grad_norm": 1.0939593328510797, "learning_rate": 3.540540302299229e-06, "loss": 0.6189, "step": 3764 }, { "epoch": 0.61, "grad_norm": 1.0033054315733312, "learning_rate": 3.5380444218058374e-06, "loss": 0.6512, "step": 3765 }, { "epoch": 0.61, "grad_norm": 1.0390242538863326, "learning_rate": 3.5355489395894448e-06, "loss": 0.6106, "step": 3766 }, { "epoch": 0.61, "grad_norm": 0.9220934988649714, "learning_rate": 3.533053856329889e-06, "loss": 0.5308, "step": 3767 }, { "epoch": 0.61, "grad_norm": 0.999644982687219, "learning_rate": 3.5305591727068984e-06, "loss": 0.6297, "step": 3768 }, { "epoch": 0.61, "grad_norm": 0.9681998955100389, "learning_rate": 3.5280648894000957e-06, "loss": 0.512, "step": 3769 }, { "epoch": 0.61, "grad_norm": 1.0945452887477485, "learning_rate": 3.5255710070889903e-06, "loss": 0.5709, "step": 3770 }, { "epoch": 0.61, "grad_norm": 0.6743879012061959, "learning_rate": 3.5230775264529837e-06, "loss": 0.5036, "step": 3771 }, { "epoch": 0.61, "grad_norm": 1.0304830272793255, "learning_rate": 3.520584448171375e-06, "loss": 0.5843, "step": 3772 }, { "epoch": 0.61, "grad_norm": 0.6462321439523159, "learning_rate": 3.518091772923339e-06, "loss": 0.4509, "step": 3773 }, { "epoch": 0.61, "grad_norm": 1.0385460809770495, "learning_rate": 3.515599501387954e-06, "loss": 0.6379, "step": 3774 }, { "epoch": 0.61, "grad_norm": 1.0030173222407714, "learning_rate": 3.5131076342441838e-06, "loss": 0.6234, "step": 3775 }, { "epoch": 0.61, "grad_norm": 0.6417501162560092, "learning_rate": 3.5106161721708797e-06, "loss": 0.4729, "step": 3776 }, { "epoch": 0.61, "grad_norm": 0.668150555746875, "learning_rate": 3.508125115846785e-06, "loss": 0.5046, "step": 3777 }, { "epoch": 0.61, "grad_norm": 0.974666462778021, "learning_rate": 3.5056344659505335e-06, "loss": 0.5618, "step": 3778 }, { "epoch": 0.61, "grad_norm": 1.047299274571347, "learning_rate": 3.503144223160644e-06, "loss": 0.6341, "step": 3779 }, { "epoch": 0.61, "grad_norm": 0.970053916181946, "learning_rate": 3.5006543881555304e-06, "loss": 0.5466, "step": 3780 }, { "epoch": 0.61, "grad_norm": 1.1330396493119341, "learning_rate": 3.4981649616134912e-06, "loss": 0.6785, "step": 3781 }, { "epoch": 0.61, "grad_norm": 1.061182426738625, "learning_rate": 3.495675944212715e-06, "loss": 0.5731, "step": 3782 }, { "epoch": 0.61, "grad_norm": 1.025664740388029, "learning_rate": 3.4931873366312785e-06, "loss": 0.6098, "step": 3783 }, { "epoch": 0.61, "grad_norm": 0.8937068979590761, "learning_rate": 3.490699139547146e-06, "loss": 0.5564, "step": 3784 }, { "epoch": 0.61, "grad_norm": 1.0820352482690225, "learning_rate": 3.4882113536381744e-06, "loss": 0.5599, "step": 3785 }, { "epoch": 0.61, "grad_norm": 1.0334731082963249, "learning_rate": 3.4857239795821003e-06, "loss": 0.6502, "step": 3786 }, { "epoch": 0.61, "grad_norm": 0.9884288461889096, "learning_rate": 3.483237018056556e-06, "loss": 0.5464, "step": 3787 }, { "epoch": 0.61, "grad_norm": 1.0365161320607403, "learning_rate": 3.480750469739059e-06, "loss": 0.6674, "step": 3788 }, { "epoch": 0.61, "grad_norm": 1.0941740081468672, "learning_rate": 3.478264335307011e-06, "loss": 0.6813, "step": 3789 }, { "epoch": 0.61, "grad_norm": 0.9652479966212781, "learning_rate": 3.475778615437706e-06, "loss": 0.5583, "step": 3790 }, { "epoch": 0.61, "grad_norm": 0.9919937186626112, "learning_rate": 3.4732933108083218e-06, "loss": 0.6403, "step": 3791 }, { "epoch": 0.61, "grad_norm": 1.0708996079670052, "learning_rate": 3.470808422095923e-06, "loss": 0.7034, "step": 3792 }, { "epoch": 0.61, "grad_norm": 0.9765262810141593, "learning_rate": 3.4683239499774606e-06, "loss": 0.5522, "step": 3793 }, { "epoch": 0.61, "grad_norm": 0.8977404724686663, "learning_rate": 3.465839895129779e-06, "loss": 0.4889, "step": 3794 }, { "epoch": 0.61, "grad_norm": 0.9280739806044486, "learning_rate": 3.463356258229596e-06, "loss": 0.5496, "step": 3795 }, { "epoch": 0.61, "grad_norm": 1.0657170922593124, "learning_rate": 3.4608730399535273e-06, "loss": 0.5261, "step": 3796 }, { "epoch": 0.61, "grad_norm": 0.9949565918874539, "learning_rate": 3.4583902409780693e-06, "loss": 0.6083, "step": 3797 }, { "epoch": 0.61, "grad_norm": 1.0440458759239095, "learning_rate": 3.4559078619796036e-06, "loss": 0.6129, "step": 3798 }, { "epoch": 0.61, "grad_norm": 0.7112464100255558, "learning_rate": 3.4534259036343996e-06, "loss": 0.4976, "step": 3799 }, { "epoch": 0.61, "grad_norm": 1.0007167626916826, "learning_rate": 3.450944366618613e-06, "loss": 0.4835, "step": 3800 }, { "epoch": 0.61, "grad_norm": 0.9983836366821283, "learning_rate": 3.4484632516082784e-06, "loss": 0.6277, "step": 3801 }, { "epoch": 0.61, "grad_norm": 1.0702889912854148, "learning_rate": 3.445982559279322e-06, "loss": 0.6222, "step": 3802 }, { "epoch": 0.61, "grad_norm": 0.9909125383805912, "learning_rate": 3.4435022903075536e-06, "loss": 0.5702, "step": 3803 }, { "epoch": 0.61, "grad_norm": 1.092927579175884, "learning_rate": 3.441022445368668e-06, "loss": 0.6606, "step": 3804 }, { "epoch": 0.61, "grad_norm": 0.997139841203399, "learning_rate": 3.4385430251382407e-06, "loss": 0.5478, "step": 3805 }, { "epoch": 0.61, "grad_norm": 1.013557843900264, "learning_rate": 3.4360640302917353e-06, "loss": 0.5567, "step": 3806 }, { "epoch": 0.61, "grad_norm": 1.0851471345922516, "learning_rate": 3.433585461504499e-06, "loss": 0.5636, "step": 3807 }, { "epoch": 0.61, "grad_norm": 1.0669344730340906, "learning_rate": 3.4311073194517596e-06, "loss": 0.6132, "step": 3808 }, { "epoch": 0.61, "grad_norm": 0.945417068996163, "learning_rate": 3.4286296048086343e-06, "loss": 0.5444, "step": 3809 }, { "epoch": 0.61, "grad_norm": 1.1078145189560855, "learning_rate": 3.42615231825012e-06, "loss": 0.6499, "step": 3810 }, { "epoch": 0.61, "grad_norm": 1.0910434657081942, "learning_rate": 3.423675460451097e-06, "loss": 0.5797, "step": 3811 }, { "epoch": 0.61, "grad_norm": 1.1091749285038728, "learning_rate": 3.4211990320863307e-06, "loss": 0.6325, "step": 3812 }, { "epoch": 0.61, "grad_norm": 0.9652520994772008, "learning_rate": 3.4187230338304684e-06, "loss": 0.5161, "step": 3813 }, { "epoch": 0.61, "grad_norm": 1.0093500327544223, "learning_rate": 3.416247466358039e-06, "loss": 0.6204, "step": 3814 }, { "epoch": 0.61, "grad_norm": 0.9716010010385902, "learning_rate": 3.413772330343455e-06, "loss": 0.5435, "step": 3815 }, { "epoch": 0.61, "grad_norm": 0.9334389388879336, "learning_rate": 3.4112976264610167e-06, "loss": 0.5108, "step": 3816 }, { "epoch": 0.62, "grad_norm": 1.0416967376825383, "learning_rate": 3.408823355384894e-06, "loss": 0.7029, "step": 3817 }, { "epoch": 0.62, "grad_norm": 0.9808993305224007, "learning_rate": 3.406349517789151e-06, "loss": 0.5983, "step": 3818 }, { "epoch": 0.62, "grad_norm": 0.9403708357453641, "learning_rate": 3.4038761143477296e-06, "loss": 0.6402, "step": 3819 }, { "epoch": 0.62, "grad_norm": 1.0255942724402032, "learning_rate": 3.4014031457344517e-06, "loss": 0.5946, "step": 3820 }, { "epoch": 0.62, "grad_norm": 1.0136123411115194, "learning_rate": 3.3989306126230226e-06, "loss": 0.6333, "step": 3821 }, { "epoch": 0.62, "grad_norm": 1.1014044416695128, "learning_rate": 3.3964585156870267e-06, "loss": 0.649, "step": 3822 }, { "epoch": 0.62, "grad_norm": 1.0704261066864926, "learning_rate": 3.393986855599936e-06, "loss": 0.6005, "step": 3823 }, { "epoch": 0.62, "grad_norm": 1.0293231100131388, "learning_rate": 3.391515633035093e-06, "loss": 0.6228, "step": 3824 }, { "epoch": 0.62, "grad_norm": 1.0497042683783568, "learning_rate": 3.389044848665731e-06, "loss": 0.5841, "step": 3825 }, { "epoch": 0.62, "grad_norm": 0.9889951022187614, "learning_rate": 3.3865745031649595e-06, "loss": 0.5989, "step": 3826 }, { "epoch": 0.62, "grad_norm": 1.009225706833987, "learning_rate": 3.3841045972057663e-06, "loss": 0.587, "step": 3827 }, { "epoch": 0.62, "grad_norm": 0.6881615793553358, "learning_rate": 3.3816351314610235e-06, "loss": 0.4986, "step": 3828 }, { "epoch": 0.62, "grad_norm": 0.981582931917428, "learning_rate": 3.379166106603482e-06, "loss": 0.6434, "step": 3829 }, { "epoch": 0.62, "grad_norm": 0.999532755734759, "learning_rate": 3.3766975233057715e-06, "loss": 0.5965, "step": 3830 }, { "epoch": 0.62, "grad_norm": 0.9925510671524317, "learning_rate": 3.3742293822404005e-06, "loss": 0.6545, "step": 3831 }, { "epoch": 0.62, "grad_norm": 1.0584546470653944, "learning_rate": 3.371761684079763e-06, "loss": 0.611, "step": 3832 }, { "epoch": 0.62, "grad_norm": 0.9643934712837668, "learning_rate": 3.369294429496124e-06, "loss": 0.5979, "step": 3833 }, { "epoch": 0.62, "grad_norm": 1.1105021994254516, "learning_rate": 3.366827619161632e-06, "loss": 0.4818, "step": 3834 }, { "epoch": 0.62, "grad_norm": 1.0063002848521414, "learning_rate": 3.364361253748318e-06, "loss": 0.6076, "step": 3835 }, { "epoch": 0.62, "grad_norm": 0.9740458883440557, "learning_rate": 3.361895333928083e-06, "loss": 0.5727, "step": 3836 }, { "epoch": 0.62, "grad_norm": 1.047983319551925, "learning_rate": 3.3594298603727126e-06, "loss": 0.6167, "step": 3837 }, { "epoch": 0.62, "grad_norm": 1.0238092677234603, "learning_rate": 3.356964833753875e-06, "loss": 0.5851, "step": 3838 }, { "epoch": 0.62, "grad_norm": 1.0280806689082074, "learning_rate": 3.3545002547431034e-06, "loss": 0.6203, "step": 3839 }, { "epoch": 0.62, "grad_norm": 1.0236209512934518, "learning_rate": 3.3520361240118216e-06, "loss": 0.6073, "step": 3840 }, { "epoch": 0.62, "grad_norm": 1.0531921026539857, "learning_rate": 3.3495724422313262e-06, "loss": 0.5258, "step": 3841 }, { "epoch": 0.62, "grad_norm": 1.0495024651792977, "learning_rate": 3.347109210072793e-06, "loss": 0.55, "step": 3842 }, { "epoch": 0.62, "grad_norm": 0.9861963312732116, "learning_rate": 3.3446464282072723e-06, "loss": 0.6009, "step": 3843 }, { "epoch": 0.62, "grad_norm": 1.0215939802065237, "learning_rate": 3.3421840973056935e-06, "loss": 0.6162, "step": 3844 }, { "epoch": 0.62, "grad_norm": 1.096611425229634, "learning_rate": 3.3397222180388677e-06, "loss": 0.6058, "step": 3845 }, { "epoch": 0.62, "grad_norm": 0.9837178727566318, "learning_rate": 3.3372607910774726e-06, "loss": 0.5588, "step": 3846 }, { "epoch": 0.62, "grad_norm": 1.0430030139893576, "learning_rate": 3.3347998170920724e-06, "loss": 0.6685, "step": 3847 }, { "epoch": 0.62, "grad_norm": 1.0483787884757538, "learning_rate": 3.3323392967531043e-06, "loss": 0.6181, "step": 3848 }, { "epoch": 0.62, "grad_norm": 0.9593660690088923, "learning_rate": 3.32987923073088e-06, "loss": 0.5636, "step": 3849 }, { "epoch": 0.62, "grad_norm": 1.0171221211863994, "learning_rate": 3.327419619695591e-06, "loss": 0.6092, "step": 3850 }, { "epoch": 0.62, "grad_norm": 1.1164319239268619, "learning_rate": 3.3249604643173037e-06, "loss": 0.6894, "step": 3851 }, { "epoch": 0.62, "grad_norm": 1.0179417622328548, "learning_rate": 3.3225017652659577e-06, "loss": 0.4866, "step": 3852 }, { "epoch": 0.62, "grad_norm": 1.0404798651394507, "learning_rate": 3.3200435232113694e-06, "loss": 0.601, "step": 3853 }, { "epoch": 0.62, "grad_norm": 0.9838921438738345, "learning_rate": 3.3175857388232376e-06, "loss": 0.5197, "step": 3854 }, { "epoch": 0.62, "grad_norm": 1.031423045261859, "learning_rate": 3.3151284127711227e-06, "loss": 0.5675, "step": 3855 }, { "epoch": 0.62, "grad_norm": 1.0434942394479056, "learning_rate": 3.312671545724474e-06, "loss": 0.5814, "step": 3856 }, { "epoch": 0.62, "grad_norm": 1.0556089090014797, "learning_rate": 3.3102151383526077e-06, "loss": 0.6185, "step": 3857 }, { "epoch": 0.62, "grad_norm": 1.0753541375345395, "learning_rate": 3.3077591913247166e-06, "loss": 0.5566, "step": 3858 }, { "epoch": 0.62, "grad_norm": 1.0163533202198782, "learning_rate": 3.305303705309868e-06, "loss": 0.5634, "step": 3859 }, { "epoch": 0.62, "grad_norm": 1.0907485922303994, "learning_rate": 3.3028486809770046e-06, "loss": 0.6315, "step": 3860 }, { "epoch": 0.62, "grad_norm": 1.022000890092407, "learning_rate": 3.300394118994944e-06, "loss": 0.5632, "step": 3861 }, { "epoch": 0.62, "grad_norm": 1.077016789156783, "learning_rate": 3.297940020032374e-06, "loss": 0.6239, "step": 3862 }, { "epoch": 0.62, "grad_norm": 0.7277498780297968, "learning_rate": 3.295486384757861e-06, "loss": 0.5215, "step": 3863 }, { "epoch": 0.62, "grad_norm": 0.9914723162909143, "learning_rate": 3.2930332138398422e-06, "loss": 0.5317, "step": 3864 }, { "epoch": 0.62, "grad_norm": 1.0380908649909524, "learning_rate": 3.2905805079466284e-06, "loss": 0.5879, "step": 3865 }, { "epoch": 0.62, "grad_norm": 1.0138116000033237, "learning_rate": 3.2881282677464034e-06, "loss": 0.6106, "step": 3866 }, { "epoch": 0.62, "grad_norm": 0.961425936120296, "learning_rate": 3.2856764939072294e-06, "loss": 0.5415, "step": 3867 }, { "epoch": 0.62, "grad_norm": 0.6584740833075744, "learning_rate": 3.283225187097031e-06, "loss": 0.4866, "step": 3868 }, { "epoch": 0.62, "grad_norm": 0.9615580717935966, "learning_rate": 3.2807743479836155e-06, "loss": 0.5571, "step": 3869 }, { "epoch": 0.62, "grad_norm": 0.9550526246979025, "learning_rate": 3.27832397723466e-06, "loss": 0.5318, "step": 3870 }, { "epoch": 0.62, "grad_norm": 1.0677613108747284, "learning_rate": 3.27587407551771e-06, "loss": 0.6134, "step": 3871 }, { "epoch": 0.62, "grad_norm": 0.9922388490368044, "learning_rate": 3.273424643500187e-06, "loss": 0.5888, "step": 3872 }, { "epoch": 0.62, "grad_norm": 1.0540217881380682, "learning_rate": 3.2709756818493867e-06, "loss": 0.5908, "step": 3873 }, { "epoch": 0.62, "grad_norm": 1.1052795806249633, "learning_rate": 3.26852719123247e-06, "loss": 0.5983, "step": 3874 }, { "epoch": 0.62, "grad_norm": 0.9467924582273569, "learning_rate": 3.266079172316473e-06, "loss": 0.5271, "step": 3875 }, { "epoch": 0.62, "grad_norm": 0.9873677001461719, "learning_rate": 3.263631625768309e-06, "loss": 0.5883, "step": 3876 }, { "epoch": 0.62, "grad_norm": 1.0277961079433071, "learning_rate": 3.2611845522547503e-06, "loss": 0.5514, "step": 3877 }, { "epoch": 0.62, "grad_norm": 0.9640356475261352, "learning_rate": 3.2587379524424513e-06, "loss": 0.6418, "step": 3878 }, { "epoch": 0.62, "grad_norm": 0.9306280423357934, "learning_rate": 3.2562918269979334e-06, "loss": 0.5543, "step": 3879 }, { "epoch": 0.63, "grad_norm": 0.9874498620896189, "learning_rate": 3.253846176587586e-06, "loss": 0.6491, "step": 3880 }, { "epoch": 0.63, "grad_norm": 1.0175588846231591, "learning_rate": 3.251401001877673e-06, "loss": 0.6684, "step": 3881 }, { "epoch": 0.63, "grad_norm": 0.7127520043173042, "learning_rate": 3.2489563035343276e-06, "loss": 0.4573, "step": 3882 }, { "epoch": 0.63, "grad_norm": 1.046964530426077, "learning_rate": 3.246512082223555e-06, "loss": 0.5647, "step": 3883 }, { "epoch": 0.63, "grad_norm": 1.0364448838734412, "learning_rate": 3.2440683386112238e-06, "loss": 0.5877, "step": 3884 }, { "epoch": 0.63, "grad_norm": 0.9810602879243683, "learning_rate": 3.24162507336308e-06, "loss": 0.5139, "step": 3885 }, { "epoch": 0.63, "grad_norm": 1.0750372116204037, "learning_rate": 3.2391822871447377e-06, "loss": 0.5118, "step": 3886 }, { "epoch": 0.63, "grad_norm": 1.0437136388529185, "learning_rate": 3.2367399806216765e-06, "loss": 0.6162, "step": 3887 }, { "epoch": 0.63, "grad_norm": 0.6642080921326198, "learning_rate": 3.234298154459249e-06, "loss": 0.4872, "step": 3888 }, { "epoch": 0.63, "grad_norm": 0.9747672796951058, "learning_rate": 3.231856809322677e-06, "loss": 0.5257, "step": 3889 }, { "epoch": 0.63, "grad_norm": 0.9604167273488331, "learning_rate": 3.229415945877048e-06, "loss": 0.4676, "step": 3890 }, { "epoch": 0.63, "grad_norm": 1.040546834155076, "learning_rate": 3.226975564787322e-06, "loss": 0.5823, "step": 3891 }, { "epoch": 0.63, "grad_norm": 1.0658304899066287, "learning_rate": 3.224535666718327e-06, "loss": 0.5483, "step": 3892 }, { "epoch": 0.63, "grad_norm": 1.0114800986989505, "learning_rate": 3.2220962523347567e-06, "loss": 0.5761, "step": 3893 }, { "epoch": 0.63, "grad_norm": 1.0324232040563512, "learning_rate": 3.219657322301175e-06, "loss": 0.5825, "step": 3894 }, { "epoch": 0.63, "grad_norm": 1.1213028642852583, "learning_rate": 3.2172188772820154e-06, "loss": 0.6016, "step": 3895 }, { "epoch": 0.63, "grad_norm": 0.9161299632257038, "learning_rate": 3.214780917941575e-06, "loss": 0.4775, "step": 3896 }, { "epoch": 0.63, "grad_norm": 1.0684266603916466, "learning_rate": 3.212343444944022e-06, "loss": 0.572, "step": 3897 }, { "epoch": 0.63, "grad_norm": 1.07930994972836, "learning_rate": 3.209906458953394e-06, "loss": 0.6814, "step": 3898 }, { "epoch": 0.63, "grad_norm": 0.9823765783505446, "learning_rate": 3.207469960633588e-06, "loss": 0.5735, "step": 3899 }, { "epoch": 0.63, "grad_norm": 0.9964728991615143, "learning_rate": 3.2050339506483774e-06, "loss": 0.5309, "step": 3900 }, { "epoch": 0.63, "grad_norm": 0.9701642956689647, "learning_rate": 3.2025984296613965e-06, "loss": 0.588, "step": 3901 }, { "epoch": 0.63, "grad_norm": 1.0205962542619393, "learning_rate": 3.200163398336151e-06, "loss": 0.5608, "step": 3902 }, { "epoch": 0.63, "grad_norm": 1.0095257759575251, "learning_rate": 3.1977288573360064e-06, "loss": 0.5441, "step": 3903 }, { "epoch": 0.63, "grad_norm": 0.9797308776978978, "learning_rate": 3.1952948073242006e-06, "loss": 0.5662, "step": 3904 }, { "epoch": 0.63, "grad_norm": 0.9433515548605147, "learning_rate": 3.19286124896384e-06, "loss": 0.6069, "step": 3905 }, { "epoch": 0.63, "grad_norm": 0.9823302178287, "learning_rate": 3.190428182917885e-06, "loss": 0.5448, "step": 3906 }, { "epoch": 0.63, "grad_norm": 1.0107005098866073, "learning_rate": 3.187995609849176e-06, "loss": 0.5432, "step": 3907 }, { "epoch": 0.63, "grad_norm": 0.9877543528653476, "learning_rate": 3.1855635304204113e-06, "loss": 0.5698, "step": 3908 }, { "epoch": 0.63, "grad_norm": 1.0127885371414798, "learning_rate": 3.1831319452941557e-06, "loss": 0.5861, "step": 3909 }, { "epoch": 0.63, "grad_norm": 1.1054590602451397, "learning_rate": 3.1807008551328407e-06, "loss": 0.6148, "step": 3910 }, { "epoch": 0.63, "grad_norm": 0.9365363089327245, "learning_rate": 3.1782702605987623e-06, "loss": 0.517, "step": 3911 }, { "epoch": 0.63, "grad_norm": 0.9800314041370826, "learning_rate": 3.175840162354081e-06, "loss": 0.6697, "step": 3912 }, { "epoch": 0.63, "grad_norm": 1.0280127140618784, "learning_rate": 3.1734105610608213e-06, "loss": 0.6088, "step": 3913 }, { "epoch": 0.63, "grad_norm": 1.0145710239859964, "learning_rate": 3.1709814573808766e-06, "loss": 0.5693, "step": 3914 }, { "epoch": 0.63, "grad_norm": 1.0268224268479194, "learning_rate": 3.168552851976e-06, "loss": 0.5772, "step": 3915 }, { "epoch": 0.63, "grad_norm": 1.0854266377996473, "learning_rate": 3.1661247455078097e-06, "loss": 0.6609, "step": 3916 }, { "epoch": 0.63, "grad_norm": 1.0064409912197665, "learning_rate": 3.163697138637791e-06, "loss": 0.6255, "step": 3917 }, { "epoch": 0.63, "grad_norm": 1.0181912941069655, "learning_rate": 3.161270032027289e-06, "loss": 0.653, "step": 3918 }, { "epoch": 0.63, "grad_norm": 1.0529235769020393, "learning_rate": 3.1588434263375146e-06, "loss": 0.6221, "step": 3919 }, { "epoch": 0.63, "grad_norm": 0.7093248634960904, "learning_rate": 3.156417322229543e-06, "loss": 0.4734, "step": 3920 }, { "epoch": 0.63, "grad_norm": 1.0317601315287412, "learning_rate": 3.153991720364313e-06, "loss": 0.5809, "step": 3921 }, { "epoch": 0.63, "grad_norm": 0.9941643082894005, "learning_rate": 3.151566621402622e-06, "loss": 0.6072, "step": 3922 }, { "epoch": 0.63, "grad_norm": 0.9527915073207505, "learning_rate": 3.1491420260051362e-06, "loss": 0.5579, "step": 3923 }, { "epoch": 0.63, "grad_norm": 0.9632165856033459, "learning_rate": 3.146717934832383e-06, "loss": 0.544, "step": 3924 }, { "epoch": 0.63, "grad_norm": 1.0069444861692525, "learning_rate": 3.1442943485447493e-06, "loss": 0.5907, "step": 3925 }, { "epoch": 0.63, "grad_norm": 1.0434127370633832, "learning_rate": 3.1418712678024866e-06, "loss": 0.6144, "step": 3926 }, { "epoch": 0.63, "grad_norm": 1.048718618294521, "learning_rate": 3.1394486932657133e-06, "loss": 0.5173, "step": 3927 }, { "epoch": 0.63, "grad_norm": 0.6605594509395147, "learning_rate": 3.137026625594399e-06, "loss": 0.4878, "step": 3928 }, { "epoch": 0.63, "grad_norm": 1.1391659174370299, "learning_rate": 3.1346050654483867e-06, "loss": 0.5102, "step": 3929 }, { "epoch": 0.63, "grad_norm": 0.9176096080073862, "learning_rate": 3.132184013487375e-06, "loss": 0.5154, "step": 3930 }, { "epoch": 0.63, "grad_norm": 1.003544181392987, "learning_rate": 3.129763470370924e-06, "loss": 0.4953, "step": 3931 }, { "epoch": 0.63, "grad_norm": 0.9733027541948882, "learning_rate": 3.1273434367584567e-06, "loss": 0.6208, "step": 3932 }, { "epoch": 0.63, "grad_norm": 1.0427152419050678, "learning_rate": 3.124923913309259e-06, "loss": 0.583, "step": 3933 }, { "epoch": 0.63, "grad_norm": 1.0123207416349886, "learning_rate": 3.1225049006824724e-06, "loss": 0.646, "step": 3934 }, { "epoch": 0.63, "grad_norm": 1.0700395644681315, "learning_rate": 3.1200863995371035e-06, "loss": 0.569, "step": 3935 }, { "epoch": 0.63, "grad_norm": 1.0121122100806814, "learning_rate": 3.1176684105320208e-06, "loss": 0.5601, "step": 3936 }, { "epoch": 0.63, "grad_norm": 1.0354226557393735, "learning_rate": 3.1152509343259494e-06, "loss": 0.5317, "step": 3937 }, { "epoch": 0.63, "grad_norm": 1.0493509529674856, "learning_rate": 3.112833971577478e-06, "loss": 0.6224, "step": 3938 }, { "epoch": 0.63, "grad_norm": 1.0685350193886183, "learning_rate": 3.1104175229450517e-06, "loss": 0.6386, "step": 3939 }, { "epoch": 0.63, "grad_norm": 0.9923083837423622, "learning_rate": 3.1080015890869796e-06, "loss": 0.5829, "step": 3940 }, { "epoch": 0.63, "grad_norm": 0.9283431805266033, "learning_rate": 3.1055861706614264e-06, "loss": 0.5328, "step": 3941 }, { "epoch": 0.64, "grad_norm": 1.0854677475824348, "learning_rate": 3.1031712683264204e-06, "loss": 0.5204, "step": 3942 }, { "epoch": 0.64, "grad_norm": 1.0388740428344243, "learning_rate": 3.1007568827398495e-06, "loss": 0.4937, "step": 3943 }, { "epoch": 0.64, "grad_norm": 0.9577553843347524, "learning_rate": 3.0983430145594547e-06, "loss": 0.5325, "step": 3944 }, { "epoch": 0.64, "grad_norm": 1.0226161745670226, "learning_rate": 3.0959296644428427e-06, "loss": 0.5775, "step": 3945 }, { "epoch": 0.64, "grad_norm": 1.0168065622439244, "learning_rate": 3.0935168330474763e-06, "loss": 0.673, "step": 3946 }, { "epoch": 0.64, "grad_norm": 0.9236844414712579, "learning_rate": 3.0911045210306767e-06, "loss": 0.5031, "step": 3947 }, { "epoch": 0.64, "grad_norm": 0.9867505138359844, "learning_rate": 3.088692729049624e-06, "loss": 0.5637, "step": 3948 }, { "epoch": 0.64, "grad_norm": 0.6791108286637646, "learning_rate": 3.0862814577613598e-06, "loss": 0.4872, "step": 3949 }, { "epoch": 0.64, "grad_norm": 0.6642796346266474, "learning_rate": 3.083870707822776e-06, "loss": 0.5134, "step": 3950 }, { "epoch": 0.64, "grad_norm": 0.9905004600933245, "learning_rate": 3.08146047989063e-06, "loss": 0.5607, "step": 3951 }, { "epoch": 0.64, "grad_norm": 1.060459845436706, "learning_rate": 3.079050774621536e-06, "loss": 0.698, "step": 3952 }, { "epoch": 0.64, "grad_norm": 0.9989552984061584, "learning_rate": 3.0766415926719606e-06, "loss": 0.6178, "step": 3953 }, { "epoch": 0.64, "grad_norm": 0.9602586761919033, "learning_rate": 3.074232934698234e-06, "loss": 0.6212, "step": 3954 }, { "epoch": 0.64, "grad_norm": 1.004330893952061, "learning_rate": 3.07182480135654e-06, "loss": 0.5922, "step": 3955 }, { "epoch": 0.64, "grad_norm": 0.9897670689631785, "learning_rate": 3.06941719330292e-06, "loss": 0.6009, "step": 3956 }, { "epoch": 0.64, "grad_norm": 1.1091726371428576, "learning_rate": 3.067010111193272e-06, "loss": 0.6615, "step": 3957 }, { "epoch": 0.64, "grad_norm": 1.0335384186321204, "learning_rate": 3.0646035556833563e-06, "loss": 0.5858, "step": 3958 }, { "epoch": 0.64, "grad_norm": 1.0714992736812141, "learning_rate": 3.0621975274287784e-06, "loss": 0.5414, "step": 3959 }, { "epoch": 0.64, "grad_norm": 1.0241482478513784, "learning_rate": 3.059792027085011e-06, "loss": 0.6138, "step": 3960 }, { "epoch": 0.64, "grad_norm": 0.9956902669802108, "learning_rate": 3.0573870553073776e-06, "loss": 0.5709, "step": 3961 }, { "epoch": 0.64, "grad_norm": 1.1193646097576968, "learning_rate": 3.0549826127510595e-06, "loss": 0.5772, "step": 3962 }, { "epoch": 0.64, "grad_norm": 0.8937665335720016, "learning_rate": 3.0525787000710915e-06, "loss": 0.4973, "step": 3963 }, { "epoch": 0.64, "grad_norm": 1.0077464705286991, "learning_rate": 3.0501753179223657e-06, "loss": 0.5847, "step": 3964 }, { "epoch": 0.64, "grad_norm": 1.0737823871175538, "learning_rate": 3.0477724669596326e-06, "loss": 0.6278, "step": 3965 }, { "epoch": 0.64, "grad_norm": 1.0488158934169045, "learning_rate": 3.045370147837492e-06, "loss": 0.6242, "step": 3966 }, { "epoch": 0.64, "grad_norm": 1.0433275138668243, "learning_rate": 3.042968361210403e-06, "loss": 0.5703, "step": 3967 }, { "epoch": 0.64, "grad_norm": 1.0373335544904982, "learning_rate": 3.040567107732679e-06, "loss": 0.6565, "step": 3968 }, { "epoch": 0.64, "grad_norm": 0.9717788618526988, "learning_rate": 3.0381663880584855e-06, "loss": 0.5418, "step": 3969 }, { "epoch": 0.64, "grad_norm": 0.9742336211818127, "learning_rate": 3.0357662028418455e-06, "loss": 0.5567, "step": 3970 }, { "epoch": 0.64, "grad_norm": 0.995405615684413, "learning_rate": 3.0333665527366394e-06, "loss": 0.6194, "step": 3971 }, { "epoch": 0.64, "grad_norm": 1.0705520500341474, "learning_rate": 3.0309674383965915e-06, "loss": 0.6058, "step": 3972 }, { "epoch": 0.64, "grad_norm": 1.008180411432752, "learning_rate": 3.0285688604752916e-06, "loss": 0.6149, "step": 3973 }, { "epoch": 0.64, "grad_norm": 0.7135981161783805, "learning_rate": 3.026170819626178e-06, "loss": 0.5309, "step": 3974 }, { "epoch": 0.64, "grad_norm": 1.0364132318562187, "learning_rate": 3.0237733165025408e-06, "loss": 0.5973, "step": 3975 }, { "epoch": 0.64, "grad_norm": 0.9770705726950235, "learning_rate": 3.021376351757527e-06, "loss": 0.5969, "step": 3976 }, { "epoch": 0.64, "grad_norm": 0.980248473453085, "learning_rate": 3.0189799260441377e-06, "loss": 0.6109, "step": 3977 }, { "epoch": 0.64, "grad_norm": 1.0503109441728466, "learning_rate": 3.0165840400152218e-06, "loss": 0.6146, "step": 3978 }, { "epoch": 0.64, "grad_norm": 1.0329639379462412, "learning_rate": 3.014188694323486e-06, "loss": 0.6247, "step": 3979 }, { "epoch": 0.64, "grad_norm": 0.9636408239824905, "learning_rate": 3.0117938896214904e-06, "loss": 0.5129, "step": 3980 }, { "epoch": 0.64, "grad_norm": 1.1911600335662196, "learning_rate": 3.0093996265616447e-06, "loss": 0.6054, "step": 3981 }, { "epoch": 0.64, "grad_norm": 0.9808958923708414, "learning_rate": 3.007005905796212e-06, "loss": 0.5768, "step": 3982 }, { "epoch": 0.64, "grad_norm": 1.0026946143582824, "learning_rate": 3.0046127279773067e-06, "loss": 0.6459, "step": 3983 }, { "epoch": 0.64, "grad_norm": 1.08459436468111, "learning_rate": 3.002220093756899e-06, "loss": 0.6993, "step": 3984 }, { "epoch": 0.64, "grad_norm": 0.9606463638486709, "learning_rate": 2.999828003786806e-06, "loss": 0.6063, "step": 3985 }, { "epoch": 0.64, "grad_norm": 1.0152998593346465, "learning_rate": 2.9974364587186988e-06, "loss": 0.5774, "step": 3986 }, { "epoch": 0.64, "grad_norm": 0.9389022774674889, "learning_rate": 2.995045459204104e-06, "loss": 0.5674, "step": 3987 }, { "epoch": 0.64, "grad_norm": 1.0183193081616835, "learning_rate": 2.9926550058943905e-06, "loss": 0.585, "step": 3988 }, { "epoch": 0.64, "grad_norm": 1.0289483491356517, "learning_rate": 2.9902650994407867e-06, "loss": 0.6559, "step": 3989 }, { "epoch": 0.64, "grad_norm": 1.1998811268913283, "learning_rate": 2.9878757404943694e-06, "loss": 0.6646, "step": 3990 }, { "epoch": 0.64, "grad_norm": 0.9602306860241301, "learning_rate": 2.985486929706064e-06, "loss": 0.6002, "step": 3991 }, { "epoch": 0.64, "grad_norm": 0.9152847388631519, "learning_rate": 2.9830986677266495e-06, "loss": 0.4856, "step": 3992 }, { "epoch": 0.64, "grad_norm": 1.0392895417871526, "learning_rate": 2.980710955206755e-06, "loss": 0.5952, "step": 3993 }, { "epoch": 0.64, "grad_norm": 0.9815355673280974, "learning_rate": 2.9783237927968567e-06, "loss": 0.5494, "step": 3994 }, { "epoch": 0.64, "grad_norm": 1.0115383196015817, "learning_rate": 2.9759371811472857e-06, "loss": 0.5873, "step": 3995 }, { "epoch": 0.64, "grad_norm": 1.0855040415203778, "learning_rate": 2.9735511209082213e-06, "loss": 0.5879, "step": 3996 }, { "epoch": 0.64, "grad_norm": 0.9648672459405413, "learning_rate": 2.9711656127296895e-06, "loss": 0.5235, "step": 3997 }, { "epoch": 0.64, "grad_norm": 0.9857473710790505, "learning_rate": 2.968780657261571e-06, "loss": 0.554, "step": 3998 }, { "epoch": 0.64, "grad_norm": 1.0269452393286644, "learning_rate": 2.96639625515359e-06, "loss": 0.6423, "step": 3999 }, { "epoch": 0.64, "grad_norm": 0.9932749260085405, "learning_rate": 2.9640124070553296e-06, "loss": 0.6052, "step": 4000 }, { "epoch": 0.64, "grad_norm": 0.9086579152455732, "learning_rate": 2.961629113616209e-06, "loss": 0.5286, "step": 4001 }, { "epoch": 0.64, "grad_norm": 1.0370745166057331, "learning_rate": 2.959246375485506e-06, "loss": 0.5563, "step": 4002 }, { "epoch": 0.64, "grad_norm": 1.0665460025653735, "learning_rate": 2.9568641933123456e-06, "loss": 0.6464, "step": 4003 }, { "epoch": 0.65, "grad_norm": 1.0087462408363064, "learning_rate": 2.954482567745697e-06, "loss": 0.6011, "step": 4004 }, { "epoch": 0.65, "grad_norm": 1.062212148392431, "learning_rate": 2.9521014994343823e-06, "loss": 0.6257, "step": 4005 }, { "epoch": 0.65, "grad_norm": 1.0860046954246445, "learning_rate": 2.9497209890270704e-06, "loss": 0.5646, "step": 4006 }, { "epoch": 0.65, "grad_norm": 0.9864791856581164, "learning_rate": 2.947341037172277e-06, "loss": 0.5725, "step": 4007 }, { "epoch": 0.65, "grad_norm": 0.9408093108832384, "learning_rate": 2.944961644518366e-06, "loss": 0.5837, "step": 4008 }, { "epoch": 0.65, "grad_norm": 1.8584641385170075, "learning_rate": 2.942582811713553e-06, "loss": 0.6019, "step": 4009 }, { "epoch": 0.65, "grad_norm": 0.9590888743148718, "learning_rate": 2.940204539405892e-06, "loss": 0.5878, "step": 4010 }, { "epoch": 0.65, "grad_norm": 1.0804387029583662, "learning_rate": 2.937826828243294e-06, "loss": 0.6017, "step": 4011 }, { "epoch": 0.65, "grad_norm": 1.0725208438126408, "learning_rate": 2.9354496788735145e-06, "loss": 0.582, "step": 4012 }, { "epoch": 0.65, "grad_norm": 0.9758441344069746, "learning_rate": 2.9330730919441498e-06, "loss": 0.5477, "step": 4013 }, { "epoch": 0.65, "grad_norm": 0.9948540696366719, "learning_rate": 2.9306970681026503e-06, "loss": 0.5982, "step": 4014 }, { "epoch": 0.65, "grad_norm": 1.017758082730898, "learning_rate": 2.9283216079963108e-06, "loss": 0.5707, "step": 4015 }, { "epoch": 0.65, "grad_norm": 0.9648955590752109, "learning_rate": 2.9259467122722705e-06, "loss": 0.5523, "step": 4016 }, { "epoch": 0.65, "grad_norm": 1.0696241005428277, "learning_rate": 2.9235723815775167e-06, "loss": 0.6234, "step": 4017 }, { "epoch": 0.65, "grad_norm": 1.075589223409787, "learning_rate": 2.9211986165588856e-06, "loss": 0.6035, "step": 4018 }, { "epoch": 0.65, "grad_norm": 0.9699281154448633, "learning_rate": 2.9188254178630526e-06, "loss": 0.5683, "step": 4019 }, { "epoch": 0.65, "grad_norm": 1.0653538926361192, "learning_rate": 2.916452786136542e-06, "loss": 0.5727, "step": 4020 }, { "epoch": 0.65, "grad_norm": 1.1077431501808022, "learning_rate": 2.914080722025728e-06, "loss": 0.6357, "step": 4021 }, { "epoch": 0.65, "grad_norm": 1.0366285857823563, "learning_rate": 2.9117092261768247e-06, "loss": 0.57, "step": 4022 }, { "epoch": 0.65, "grad_norm": 1.1035121598330206, "learning_rate": 2.9093382992358897e-06, "loss": 0.5655, "step": 4023 }, { "epoch": 0.65, "grad_norm": 1.021557033555813, "learning_rate": 2.906967941848834e-06, "loss": 0.5644, "step": 4024 }, { "epoch": 0.65, "grad_norm": 0.9723155030057634, "learning_rate": 2.9045981546614057e-06, "loss": 0.5638, "step": 4025 }, { "epoch": 0.65, "grad_norm": 0.9999347167003046, "learning_rate": 2.9022289383191972e-06, "loss": 0.5834, "step": 4026 }, { "epoch": 0.65, "grad_norm": 0.9184491080564697, "learning_rate": 2.899860293467652e-06, "loss": 0.4453, "step": 4027 }, { "epoch": 0.65, "grad_norm": 1.098049304349839, "learning_rate": 2.897492220752057e-06, "loss": 0.6185, "step": 4028 }, { "epoch": 0.65, "grad_norm": 0.6522077055101206, "learning_rate": 2.8951247208175337e-06, "loss": 0.4705, "step": 4029 }, { "epoch": 0.65, "grad_norm": 0.9601248699824723, "learning_rate": 2.8927577943090574e-06, "loss": 0.5736, "step": 4030 }, { "epoch": 0.65, "grad_norm": 0.9295371322619084, "learning_rate": 2.890391441871446e-06, "loss": 0.5192, "step": 4031 }, { "epoch": 0.65, "grad_norm": 0.9424014440626866, "learning_rate": 2.8880256641493582e-06, "loss": 0.5074, "step": 4032 }, { "epoch": 0.65, "grad_norm": 0.9760377335761027, "learning_rate": 2.885660461787294e-06, "loss": 0.5554, "step": 4033 }, { "epoch": 0.65, "grad_norm": 1.0681492263783305, "learning_rate": 2.8832958354296048e-06, "loss": 0.5766, "step": 4034 }, { "epoch": 0.65, "grad_norm": 1.0247553021239837, "learning_rate": 2.880931785720474e-06, "loss": 0.523, "step": 4035 }, { "epoch": 0.65, "grad_norm": 1.0980706991646645, "learning_rate": 2.8785683133039404e-06, "loss": 0.6152, "step": 4036 }, { "epoch": 0.65, "grad_norm": 1.0251535049515124, "learning_rate": 2.876205418823875e-06, "loss": 0.5336, "step": 4037 }, { "epoch": 0.65, "grad_norm": 1.066769795922078, "learning_rate": 2.8738431029239954e-06, "loss": 0.6633, "step": 4038 }, { "epoch": 0.65, "grad_norm": 0.9513954635247176, "learning_rate": 2.8714813662478633e-06, "loss": 0.5442, "step": 4039 }, { "epoch": 0.65, "grad_norm": 1.0569619391592409, "learning_rate": 2.869120209438879e-06, "loss": 0.5756, "step": 4040 }, { "epoch": 0.65, "grad_norm": 0.998953772481972, "learning_rate": 2.8667596331402892e-06, "loss": 0.645, "step": 4041 }, { "epoch": 0.65, "grad_norm": 0.9452384310233856, "learning_rate": 2.864399637995176e-06, "loss": 0.4889, "step": 4042 }, { "epoch": 0.65, "grad_norm": 0.6030112875811883, "learning_rate": 2.8620402246464717e-06, "loss": 0.4678, "step": 4043 }, { "epoch": 0.65, "grad_norm": 1.0583223163384945, "learning_rate": 2.8596813937369437e-06, "loss": 0.5462, "step": 4044 }, { "epoch": 0.65, "grad_norm": 0.9372717342109275, "learning_rate": 2.8573231459092007e-06, "loss": 0.5508, "step": 4045 }, { "epoch": 0.65, "grad_norm": 0.6291393750141842, "learning_rate": 2.854965481805697e-06, "loss": 0.4609, "step": 4046 }, { "epoch": 0.65, "grad_norm": 1.0262129556727317, "learning_rate": 2.852608402068725e-06, "loss": 0.5961, "step": 4047 }, { "epoch": 0.65, "grad_norm": 0.9380202927331176, "learning_rate": 2.8502519073404155e-06, "loss": 0.4935, "step": 4048 }, { "epoch": 0.65, "grad_norm": 1.0678738327192216, "learning_rate": 2.847895998262744e-06, "loss": 0.6459, "step": 4049 }, { "epoch": 0.65, "grad_norm": 1.01267225095389, "learning_rate": 2.8455406754775305e-06, "loss": 0.6172, "step": 4050 }, { "epoch": 0.65, "grad_norm": 1.0619557618270221, "learning_rate": 2.8431859396264203e-06, "loss": 0.6012, "step": 4051 }, { "epoch": 0.65, "grad_norm": 0.8820615161607135, "learning_rate": 2.8408317913509137e-06, "loss": 0.5129, "step": 4052 }, { "epoch": 0.65, "grad_norm": 0.9836737696913604, "learning_rate": 2.8384782312923466e-06, "loss": 0.5429, "step": 4053 }, { "epoch": 0.65, "grad_norm": 1.0486368746643207, "learning_rate": 2.836125260091892e-06, "loss": 0.6453, "step": 4054 }, { "epoch": 0.65, "grad_norm": 0.973587137202366, "learning_rate": 2.8337728783905618e-06, "loss": 0.5924, "step": 4055 }, { "epoch": 0.65, "grad_norm": 1.0595626153235649, "learning_rate": 2.8314210868292145e-06, "loss": 0.6533, "step": 4056 }, { "epoch": 0.65, "grad_norm": 1.0163422289707038, "learning_rate": 2.82906988604854e-06, "loss": 0.5278, "step": 4057 }, { "epoch": 0.65, "grad_norm": 0.9642361096096631, "learning_rate": 2.8267192766890685e-06, "loss": 0.5671, "step": 4058 }, { "epoch": 0.65, "grad_norm": 0.9973913156262183, "learning_rate": 2.824369259391173e-06, "loss": 0.5524, "step": 4059 }, { "epoch": 0.65, "grad_norm": 0.6535756186752398, "learning_rate": 2.822019834795065e-06, "loss": 0.4992, "step": 4060 }, { "epoch": 0.65, "grad_norm": 0.9676996624162714, "learning_rate": 2.8196710035407905e-06, "loss": 0.5423, "step": 4061 }, { "epoch": 0.65, "grad_norm": 1.0111555335197016, "learning_rate": 2.8173227662682332e-06, "loss": 0.5783, "step": 4062 }, { "epoch": 0.65, "grad_norm": 1.0528855400098003, "learning_rate": 2.814975123617123e-06, "loss": 0.6183, "step": 4063 }, { "epoch": 0.65, "grad_norm": 0.9720294574597103, "learning_rate": 2.8126280762270177e-06, "loss": 0.5766, "step": 4064 }, { "epoch": 0.65, "grad_norm": 1.0117059077883155, "learning_rate": 2.810281624737321e-06, "loss": 0.5884, "step": 4065 }, { "epoch": 0.66, "grad_norm": 1.0029032986306197, "learning_rate": 2.8079357697872696e-06, "loss": 0.543, "step": 4066 }, { "epoch": 0.66, "grad_norm": 0.9829190310875199, "learning_rate": 2.8055905120159375e-06, "loss": 0.5926, "step": 4067 }, { "epoch": 0.66, "grad_norm": 1.0012520707616437, "learning_rate": 2.803245852062241e-06, "loss": 0.5412, "step": 4068 }, { "epoch": 0.66, "grad_norm": 0.922010983816059, "learning_rate": 2.8009017905649283e-06, "loss": 0.5624, "step": 4069 }, { "epoch": 0.66, "grad_norm": 0.9398984862427566, "learning_rate": 2.7985583281625844e-06, "loss": 0.5919, "step": 4070 }, { "epoch": 0.66, "grad_norm": 0.99623431936887, "learning_rate": 2.7962154654936352e-06, "loss": 0.6051, "step": 4071 }, { "epoch": 0.66, "grad_norm": 1.0172174272942505, "learning_rate": 2.7938732031963443e-06, "loss": 0.5767, "step": 4072 }, { "epoch": 0.66, "grad_norm": 1.1255140562845085, "learning_rate": 2.791531541908802e-06, "loss": 0.6609, "step": 4073 }, { "epoch": 0.66, "grad_norm": 1.0834849077560833, "learning_rate": 2.7891904822689443e-06, "loss": 0.6653, "step": 4074 }, { "epoch": 0.66, "grad_norm": 0.993396970970261, "learning_rate": 2.7868500249145414e-06, "loss": 0.5427, "step": 4075 }, { "epoch": 0.66, "grad_norm": 1.0394902440121143, "learning_rate": 2.784510170483199e-06, "loss": 0.5476, "step": 4076 }, { "epoch": 0.66, "grad_norm": 1.0395937735266692, "learning_rate": 2.7821709196123535e-06, "loss": 0.6027, "step": 4077 }, { "epoch": 0.66, "grad_norm": 1.023831495368322, "learning_rate": 2.779832272939285e-06, "loss": 0.6551, "step": 4078 }, { "epoch": 0.66, "grad_norm": 0.996346818136337, "learning_rate": 2.7774942311011082e-06, "loss": 0.5874, "step": 4079 }, { "epoch": 0.66, "grad_norm": 1.1227017854867904, "learning_rate": 2.7751567947347624e-06, "loss": 0.6561, "step": 4080 }, { "epoch": 0.66, "grad_norm": 1.1112045278347196, "learning_rate": 2.772819964477035e-06, "loss": 0.6021, "step": 4081 }, { "epoch": 0.66, "grad_norm": 1.0112670827851002, "learning_rate": 2.7704837409645425e-06, "loss": 0.5412, "step": 4082 }, { "epoch": 0.66, "grad_norm": 1.0267388027631472, "learning_rate": 2.768148124833736e-06, "loss": 0.6338, "step": 4083 }, { "epoch": 0.66, "grad_norm": 1.0503775356111906, "learning_rate": 2.765813116720901e-06, "loss": 0.6238, "step": 4084 }, { "epoch": 0.66, "grad_norm": 0.9453234809026312, "learning_rate": 2.7634787172621593e-06, "loss": 0.544, "step": 4085 }, { "epoch": 0.66, "grad_norm": 0.9783486332969259, "learning_rate": 2.761144927093464e-06, "loss": 0.6603, "step": 4086 }, { "epoch": 0.66, "grad_norm": 0.9646492267888072, "learning_rate": 2.7588117468506064e-06, "loss": 0.5816, "step": 4087 }, { "epoch": 0.66, "grad_norm": 1.0643843033456615, "learning_rate": 2.756479177169208e-06, "loss": 0.5559, "step": 4088 }, { "epoch": 0.66, "grad_norm": 1.107682969498182, "learning_rate": 2.7541472186847224e-06, "loss": 0.6535, "step": 4089 }, { "epoch": 0.66, "grad_norm": 0.9439996384254786, "learning_rate": 2.751815872032444e-06, "loss": 0.4856, "step": 4090 }, { "epoch": 0.66, "grad_norm": 1.0541343974608182, "learning_rate": 2.7494851378474936e-06, "loss": 0.6625, "step": 4091 }, { "epoch": 0.66, "grad_norm": 0.9710797886490599, "learning_rate": 2.7471550167648255e-06, "loss": 0.5354, "step": 4092 }, { "epoch": 0.66, "grad_norm": 0.9632435199968827, "learning_rate": 2.744825509419231e-06, "loss": 0.5757, "step": 4093 }, { "epoch": 0.66, "grad_norm": 0.9891147592514674, "learning_rate": 2.742496616445335e-06, "loss": 0.6176, "step": 4094 }, { "epoch": 0.66, "grad_norm": 1.0284624614492337, "learning_rate": 2.740168338477587e-06, "loss": 0.5448, "step": 4095 }, { "epoch": 0.66, "grad_norm": 1.0120196330006106, "learning_rate": 2.7378406761502747e-06, "loss": 0.55, "step": 4096 }, { "epoch": 0.66, "grad_norm": 1.0207633426175418, "learning_rate": 2.7355136300975214e-06, "loss": 0.596, "step": 4097 }, { "epoch": 0.66, "grad_norm": 1.0036759728682145, "learning_rate": 2.733187200953276e-06, "loss": 0.6742, "step": 4098 }, { "epoch": 0.66, "grad_norm": 0.9684029672783085, "learning_rate": 2.7308613893513205e-06, "loss": 0.6145, "step": 4099 }, { "epoch": 0.66, "grad_norm": 0.6860798733077779, "learning_rate": 2.7285361959252716e-06, "loss": 0.4903, "step": 4100 }, { "epoch": 0.66, "grad_norm": 1.0249399111039097, "learning_rate": 2.72621162130858e-06, "loss": 0.5744, "step": 4101 }, { "epoch": 0.66, "grad_norm": 0.9675987015377583, "learning_rate": 2.723887666134516e-06, "loss": 0.5363, "step": 4102 }, { "epoch": 0.66, "grad_norm": 0.9601710511198105, "learning_rate": 2.721564331036194e-06, "loss": 0.4931, "step": 4103 }, { "epoch": 0.66, "grad_norm": 1.0002350678264316, "learning_rate": 2.719241616646555e-06, "loss": 0.5632, "step": 4104 }, { "epoch": 0.66, "grad_norm": 1.0268711646908977, "learning_rate": 2.716919523598369e-06, "loss": 0.519, "step": 4105 }, { "epoch": 0.66, "grad_norm": 1.0328383060684727, "learning_rate": 2.7145980525242367e-06, "loss": 0.5732, "step": 4106 }, { "epoch": 0.66, "grad_norm": 0.8830074006332264, "learning_rate": 2.712277204056594e-06, "loss": 0.5377, "step": 4107 }, { "epoch": 0.66, "grad_norm": 0.9633476677697133, "learning_rate": 2.7099569788277023e-06, "loss": 0.581, "step": 4108 }, { "epoch": 0.66, "grad_norm": 0.651712147164041, "learning_rate": 2.7076373774696527e-06, "loss": 0.4652, "step": 4109 }, { "epoch": 0.66, "grad_norm": 1.1104690933267418, "learning_rate": 2.705318400614374e-06, "loss": 0.6867, "step": 4110 }, { "epoch": 0.66, "grad_norm": 1.062440968540112, "learning_rate": 2.703000048893613e-06, "loss": 0.5979, "step": 4111 }, { "epoch": 0.66, "grad_norm": 0.9005534877991328, "learning_rate": 2.7006823229389588e-06, "loss": 0.5076, "step": 4112 }, { "epoch": 0.66, "grad_norm": 1.0484482210944337, "learning_rate": 2.6983652233818205e-06, "loss": 0.6101, "step": 4113 }, { "epoch": 0.66, "grad_norm": 1.038326363492528, "learning_rate": 2.6960487508534383e-06, "loss": 0.6463, "step": 4114 }, { "epoch": 0.66, "grad_norm": 0.8697546720999839, "learning_rate": 2.693732905984885e-06, "loss": 0.5255, "step": 4115 }, { "epoch": 0.66, "grad_norm": 1.0339550298516553, "learning_rate": 2.6914176894070644e-06, "loss": 0.5927, "step": 4116 }, { "epoch": 0.66, "grad_norm": 1.0518696530453056, "learning_rate": 2.6891031017506986e-06, "loss": 0.6093, "step": 4117 }, { "epoch": 0.66, "grad_norm": 0.9746747746976071, "learning_rate": 2.686789143646347e-06, "loss": 0.5765, "step": 4118 }, { "epoch": 0.66, "grad_norm": 0.9545353133327568, "learning_rate": 2.6844758157244e-06, "loss": 0.4967, "step": 4119 }, { "epoch": 0.66, "grad_norm": 1.0112389334627743, "learning_rate": 2.6821631186150676e-06, "loss": 0.5435, "step": 4120 }, { "epoch": 0.66, "grad_norm": 0.9078802657303984, "learning_rate": 2.6798510529483913e-06, "loss": 0.5022, "step": 4121 }, { "epoch": 0.66, "grad_norm": 1.019573944843636, "learning_rate": 2.6775396193542436e-06, "loss": 0.6076, "step": 4122 }, { "epoch": 0.66, "grad_norm": 1.0762076879137368, "learning_rate": 2.675228818462327e-06, "loss": 0.5781, "step": 4123 }, { "epoch": 0.66, "grad_norm": 0.9626516893990852, "learning_rate": 2.6729186509021587e-06, "loss": 0.6379, "step": 4124 }, { "epoch": 0.66, "grad_norm": 1.0440073208478957, "learning_rate": 2.670609117303096e-06, "loss": 0.6741, "step": 4125 }, { "epoch": 0.66, "grad_norm": 0.9351192821926619, "learning_rate": 2.668300218294322e-06, "loss": 0.5034, "step": 4126 }, { "epoch": 0.66, "grad_norm": 0.9997945000517795, "learning_rate": 2.6659919545048424e-06, "loss": 0.6288, "step": 4127 }, { "epoch": 0.67, "grad_norm": 1.0364822621832968, "learning_rate": 2.663684326563489e-06, "loss": 0.5917, "step": 4128 }, { "epoch": 0.67, "grad_norm": 1.0171578350152857, "learning_rate": 2.6613773350989293e-06, "loss": 0.6218, "step": 4129 }, { "epoch": 0.67, "grad_norm": 1.0506184309431916, "learning_rate": 2.6590709807396464e-06, "loss": 0.571, "step": 4130 }, { "epoch": 0.67, "grad_norm": 0.9731584372164224, "learning_rate": 2.656765264113955e-06, "loss": 0.5707, "step": 4131 }, { "epoch": 0.67, "grad_norm": 1.0349383518243065, "learning_rate": 2.65446018585e-06, "loss": 0.5882, "step": 4132 }, { "epoch": 0.67, "grad_norm": 0.9412264459653292, "learning_rate": 2.652155746575743e-06, "loss": 0.5388, "step": 4133 }, { "epoch": 0.67, "grad_norm": 1.0045372851596146, "learning_rate": 2.6498519469189814e-06, "loss": 0.5531, "step": 4134 }, { "epoch": 0.67, "grad_norm": 1.1290671236547234, "learning_rate": 2.6475487875073318e-06, "loss": 0.6585, "step": 4135 }, { "epoch": 0.67, "grad_norm": 1.0472501482244814, "learning_rate": 2.6452462689682364e-06, "loss": 0.5867, "step": 4136 }, { "epoch": 0.67, "grad_norm": 0.9992871552936109, "learning_rate": 2.6429443919289676e-06, "loss": 0.5509, "step": 4137 }, { "epoch": 0.67, "grad_norm": 0.9673557712221101, "learning_rate": 2.640643157016618e-06, "loss": 0.5319, "step": 4138 }, { "epoch": 0.67, "grad_norm": 1.0872471036352866, "learning_rate": 2.63834256485811e-06, "loss": 0.6222, "step": 4139 }, { "epoch": 0.67, "grad_norm": 0.9953755076305282, "learning_rate": 2.636042616080185e-06, "loss": 0.5661, "step": 4140 }, { "epoch": 0.67, "grad_norm": 1.0337555029160617, "learning_rate": 2.6337433113094164e-06, "loss": 0.5521, "step": 4141 }, { "epoch": 0.67, "grad_norm": 0.9571091209345833, "learning_rate": 2.6314446511721957e-06, "loss": 0.5773, "step": 4142 }, { "epoch": 0.67, "grad_norm": 0.9922458353879587, "learning_rate": 2.6291466362947392e-06, "loss": 0.4751, "step": 4143 }, { "epoch": 0.67, "grad_norm": 1.005154945708394, "learning_rate": 2.626849267303093e-06, "loss": 0.6096, "step": 4144 }, { "epoch": 0.67, "grad_norm": 0.9603122056870393, "learning_rate": 2.624552544823126e-06, "loss": 0.5546, "step": 4145 }, { "epoch": 0.67, "grad_norm": 0.998811112173916, "learning_rate": 2.622256469480522e-06, "loss": 0.6307, "step": 4146 }, { "epoch": 0.67, "grad_norm": 0.9617728011031288, "learning_rate": 2.6199610419007983e-06, "loss": 0.591, "step": 4147 }, { "epoch": 0.67, "grad_norm": 1.0381645044997831, "learning_rate": 2.617666262709294e-06, "loss": 0.5982, "step": 4148 }, { "epoch": 0.67, "grad_norm": 0.9839611724703446, "learning_rate": 2.61537213253117e-06, "loss": 0.5437, "step": 4149 }, { "epoch": 0.67, "grad_norm": 1.11930758711911, "learning_rate": 2.6130786519914074e-06, "loss": 0.6655, "step": 4150 }, { "epoch": 0.67, "grad_norm": 1.025176827269826, "learning_rate": 2.6107858217148185e-06, "loss": 0.5655, "step": 4151 }, { "epoch": 0.67, "grad_norm": 0.9888260855867205, "learning_rate": 2.6084936423260303e-06, "loss": 0.5546, "step": 4152 }, { "epoch": 0.67, "grad_norm": 0.9496668109439113, "learning_rate": 2.606202114449495e-06, "loss": 0.5856, "step": 4153 }, { "epoch": 0.67, "grad_norm": 0.958141652196475, "learning_rate": 2.60391123870949e-06, "loss": 0.5585, "step": 4154 }, { "epoch": 0.67, "grad_norm": 1.0491309200885355, "learning_rate": 2.601621015730111e-06, "loss": 0.6014, "step": 4155 }, { "epoch": 0.67, "grad_norm": 0.9512044178850638, "learning_rate": 2.5993314461352802e-06, "loss": 0.5381, "step": 4156 }, { "epoch": 0.67, "grad_norm": 1.0685864043490851, "learning_rate": 2.597042530548737e-06, "loss": 0.608, "step": 4157 }, { "epoch": 0.67, "grad_norm": 0.9045842016294855, "learning_rate": 2.5947542695940485e-06, "loss": 0.4794, "step": 4158 }, { "epoch": 0.67, "grad_norm": 1.0339508753645148, "learning_rate": 2.5924666638945973e-06, "loss": 0.6068, "step": 4159 }, { "epoch": 0.67, "grad_norm": 0.9335131117724038, "learning_rate": 2.5901797140735895e-06, "loss": 0.5294, "step": 4160 }, { "epoch": 0.67, "grad_norm": 1.0336950839868349, "learning_rate": 2.5878934207540564e-06, "loss": 0.552, "step": 4161 }, { "epoch": 0.67, "grad_norm": 0.9638734238060196, "learning_rate": 2.5856077845588433e-06, "loss": 0.6377, "step": 4162 }, { "epoch": 0.67, "grad_norm": 0.9913705357017132, "learning_rate": 2.5833228061106253e-06, "loss": 0.605, "step": 4163 }, { "epoch": 0.67, "grad_norm": 1.0559041525037611, "learning_rate": 2.5810384860318904e-06, "loss": 0.532, "step": 4164 }, { "epoch": 0.67, "grad_norm": 0.9787071825297399, "learning_rate": 2.57875482494495e-06, "loss": 0.575, "step": 4165 }, { "epoch": 0.67, "grad_norm": 1.065966937096762, "learning_rate": 2.576471823471938e-06, "loss": 0.54, "step": 4166 }, { "epoch": 0.67, "grad_norm": 0.9863645580418978, "learning_rate": 2.5741894822348073e-06, "loss": 0.5754, "step": 4167 }, { "epoch": 0.67, "grad_norm": 0.9871672428386502, "learning_rate": 2.571907801855327e-06, "loss": 0.6072, "step": 4168 }, { "epoch": 0.67, "grad_norm": 1.0015140921790995, "learning_rate": 2.5696267829550926e-06, "loss": 0.4829, "step": 4169 }, { "epoch": 0.67, "grad_norm": 0.9119101577402052, "learning_rate": 2.5673464261555174e-06, "loss": 0.5422, "step": 4170 }, { "epoch": 0.67, "grad_norm": 1.0047353643708197, "learning_rate": 2.5650667320778333e-06, "loss": 0.5304, "step": 4171 }, { "epoch": 0.67, "grad_norm": 1.1171378668114342, "learning_rate": 2.562787701343088e-06, "loss": 0.6919, "step": 4172 }, { "epoch": 0.67, "grad_norm": 0.9707952096669856, "learning_rate": 2.5605093345721564e-06, "loss": 0.6045, "step": 4173 }, { "epoch": 0.67, "grad_norm": 1.0235726104671539, "learning_rate": 2.558231632385728e-06, "loss": 0.5485, "step": 4174 }, { "epoch": 0.67, "grad_norm": 0.9825484094276393, "learning_rate": 2.5559545954043086e-06, "loss": 0.5019, "step": 4175 }, { "epoch": 0.67, "grad_norm": 0.9468994100485664, "learning_rate": 2.5536782242482294e-06, "loss": 0.4679, "step": 4176 }, { "epoch": 0.67, "grad_norm": 1.0674821700239088, "learning_rate": 2.5514025195376336e-06, "loss": 0.5489, "step": 4177 }, { "epoch": 0.67, "grad_norm": 0.9846847122935792, "learning_rate": 2.5491274818924893e-06, "loss": 0.6565, "step": 4178 }, { "epoch": 0.67, "grad_norm": 1.2642556262049445, "learning_rate": 2.5468531119325756e-06, "loss": 0.5754, "step": 4179 }, { "epoch": 0.67, "grad_norm": 0.9995403192376702, "learning_rate": 2.544579410277498e-06, "loss": 0.5443, "step": 4180 }, { "epoch": 0.67, "grad_norm": 0.6405436646215436, "learning_rate": 2.542306377546673e-06, "loss": 0.5105, "step": 4181 }, { "epoch": 0.67, "grad_norm": 1.0270509837663513, "learning_rate": 2.540034014359335e-06, "loss": 0.5144, "step": 4182 }, { "epoch": 0.67, "grad_norm": 0.950759222975867, "learning_rate": 2.537762321334543e-06, "loss": 0.5047, "step": 4183 }, { "epoch": 0.67, "grad_norm": 0.6680771031194185, "learning_rate": 2.535491299091165e-06, "loss": 0.4919, "step": 4184 }, { "epoch": 0.67, "grad_norm": 0.9932084463512395, "learning_rate": 2.533220948247893e-06, "loss": 0.5669, "step": 4185 }, { "epoch": 0.67, "grad_norm": 1.0558010412566432, "learning_rate": 2.530951269423233e-06, "loss": 0.5509, "step": 4186 }, { "epoch": 0.67, "grad_norm": 1.0070353108861123, "learning_rate": 2.528682263235504e-06, "loss": 0.6196, "step": 4187 }, { "epoch": 0.67, "grad_norm": 1.0515677106285464, "learning_rate": 2.526413930302851e-06, "loss": 0.6381, "step": 4188 }, { "epoch": 0.67, "grad_norm": 1.109079647613207, "learning_rate": 2.5241462712432284e-06, "loss": 0.5929, "step": 4189 }, { "epoch": 0.68, "grad_norm": 1.06401687367605, "learning_rate": 2.5218792866744068e-06, "loss": 0.5805, "step": 4190 }, { "epoch": 0.68, "grad_norm": 1.0427495065370334, "learning_rate": 2.5196129772139766e-06, "loss": 0.6248, "step": 4191 }, { "epoch": 0.68, "grad_norm": 0.9698619245859882, "learning_rate": 2.517347343479346e-06, "loss": 0.5889, "step": 4192 }, { "epoch": 0.68, "grad_norm": 1.0173573977091384, "learning_rate": 2.5150823860877345e-06, "loss": 0.5907, "step": 4193 }, { "epoch": 0.68, "grad_norm": 1.0205188937172416, "learning_rate": 2.5128181056561755e-06, "loss": 0.6145, "step": 4194 }, { "epoch": 0.68, "grad_norm": 1.0859000590577317, "learning_rate": 2.5105545028015266e-06, "loss": 0.6684, "step": 4195 }, { "epoch": 0.68, "grad_norm": 1.067539270807866, "learning_rate": 2.508291578140453e-06, "loss": 0.5944, "step": 4196 }, { "epoch": 0.68, "grad_norm": 0.9812208485344739, "learning_rate": 2.5060293322894353e-06, "loss": 0.5039, "step": 4197 }, { "epoch": 0.68, "grad_norm": 1.0730453026024784, "learning_rate": 2.503767765864774e-06, "loss": 0.6313, "step": 4198 }, { "epoch": 0.68, "grad_norm": 1.055682591029642, "learning_rate": 2.5015068794825853e-06, "loss": 0.6127, "step": 4199 }, { "epoch": 0.68, "grad_norm": 1.017364367452376, "learning_rate": 2.4992466737587932e-06, "loss": 0.5112, "step": 4200 }, { "epoch": 0.68, "grad_norm": 0.9725027893805415, "learning_rate": 2.4969871493091398e-06, "loss": 0.5759, "step": 4201 }, { "epoch": 0.68, "grad_norm": 1.1145453529875105, "learning_rate": 2.494728306749184e-06, "loss": 0.5794, "step": 4202 }, { "epoch": 0.68, "grad_norm": 1.1103276577931425, "learning_rate": 2.4924701466942957e-06, "loss": 0.6807, "step": 4203 }, { "epoch": 0.68, "grad_norm": 0.9818666726430761, "learning_rate": 2.490212669759658e-06, "loss": 0.5569, "step": 4204 }, { "epoch": 0.68, "grad_norm": 1.044396937141999, "learning_rate": 2.487955876560274e-06, "loss": 0.5911, "step": 4205 }, { "epoch": 0.68, "grad_norm": 1.0532239681032975, "learning_rate": 2.485699767710952e-06, "loss": 0.6668, "step": 4206 }, { "epoch": 0.68, "grad_norm": 1.0466868715744622, "learning_rate": 2.4834443438263224e-06, "loss": 0.5529, "step": 4207 }, { "epoch": 0.68, "grad_norm": 1.1177457339927725, "learning_rate": 2.4811896055208227e-06, "loss": 0.5821, "step": 4208 }, { "epoch": 0.68, "grad_norm": 0.9621071520259866, "learning_rate": 2.4789355534087038e-06, "loss": 0.603, "step": 4209 }, { "epoch": 0.68, "grad_norm": 0.976281559879944, "learning_rate": 2.476682188104036e-06, "loss": 0.5211, "step": 4210 }, { "epoch": 0.68, "grad_norm": 0.9640236686646355, "learning_rate": 2.4744295102206954e-06, "loss": 0.5287, "step": 4211 }, { "epoch": 0.68, "grad_norm": 1.0616524712151376, "learning_rate": 2.4721775203723726e-06, "loss": 0.5912, "step": 4212 }, { "epoch": 0.68, "grad_norm": 0.9782968114215164, "learning_rate": 2.4699262191725726e-06, "loss": 0.5875, "step": 4213 }, { "epoch": 0.68, "grad_norm": 0.6527304073611394, "learning_rate": 2.467675607234615e-06, "loss": 0.4926, "step": 4214 }, { "epoch": 0.68, "grad_norm": 0.9931967102263395, "learning_rate": 2.465425685171625e-06, "loss": 0.5837, "step": 4215 }, { "epoch": 0.68, "grad_norm": 1.8878886028237913, "learning_rate": 2.463176453596543e-06, "loss": 0.618, "step": 4216 }, { "epoch": 0.68, "grad_norm": 1.095364786205635, "learning_rate": 2.4609279131221243e-06, "loss": 0.6461, "step": 4217 }, { "epoch": 0.68, "grad_norm": 1.0093553868229896, "learning_rate": 2.4586800643609326e-06, "loss": 0.6479, "step": 4218 }, { "epoch": 0.68, "grad_norm": 0.9645670020073929, "learning_rate": 2.456432907925341e-06, "loss": 0.6111, "step": 4219 }, { "epoch": 0.68, "grad_norm": 0.861160689218319, "learning_rate": 2.454186444427539e-06, "loss": 0.4385, "step": 4220 }, { "epoch": 0.68, "grad_norm": 0.9280475433651341, "learning_rate": 2.4519406744795275e-06, "loss": 0.6049, "step": 4221 }, { "epoch": 0.68, "grad_norm": 1.0764823811922422, "learning_rate": 2.4496955986931143e-06, "loss": 0.5823, "step": 4222 }, { "epoch": 0.68, "grad_norm": 0.6800656064268441, "learning_rate": 2.447451217679917e-06, "loss": 0.4759, "step": 4223 }, { "epoch": 0.68, "grad_norm": 0.6103010032403269, "learning_rate": 2.445207532051372e-06, "loss": 0.4488, "step": 4224 }, { "epoch": 0.68, "grad_norm": 1.0608226400989327, "learning_rate": 2.442964542418718e-06, "loss": 0.604, "step": 4225 }, { "epoch": 0.68, "grad_norm": 0.968264105863904, "learning_rate": 2.4407222493930063e-06, "loss": 0.6438, "step": 4226 }, { "epoch": 0.68, "grad_norm": 1.124588890962765, "learning_rate": 2.438480653585102e-06, "loss": 0.6845, "step": 4227 }, { "epoch": 0.68, "grad_norm": 1.043963024970984, "learning_rate": 2.436239755605675e-06, "loss": 0.6434, "step": 4228 }, { "epoch": 0.68, "grad_norm": 1.0137092436854946, "learning_rate": 2.433999556065211e-06, "loss": 0.506, "step": 4229 }, { "epoch": 0.68, "grad_norm": 0.9688744037280497, "learning_rate": 2.4317600555739997e-06, "loss": 0.5737, "step": 4230 }, { "epoch": 0.68, "grad_norm": 0.9374011263255312, "learning_rate": 2.4295212547421415e-06, "loss": 0.5451, "step": 4231 }, { "epoch": 0.68, "grad_norm": 1.0755521488943034, "learning_rate": 2.427283154179551e-06, "loss": 0.5794, "step": 4232 }, { "epoch": 0.68, "grad_norm": 0.9039070093462079, "learning_rate": 2.4250457544959465e-06, "loss": 0.5334, "step": 4233 }, { "epoch": 0.68, "grad_norm": 1.0078130332867878, "learning_rate": 2.422809056300856e-06, "loss": 0.5164, "step": 4234 }, { "epoch": 0.68, "grad_norm": 0.9737893491190986, "learning_rate": 2.4205730602036194e-06, "loss": 0.4805, "step": 4235 }, { "epoch": 0.68, "grad_norm": 1.045161043056691, "learning_rate": 2.4183377668133843e-06, "loss": 0.6696, "step": 4236 }, { "epoch": 0.68, "grad_norm": 1.0509801752114005, "learning_rate": 2.4161031767391067e-06, "loss": 0.5917, "step": 4237 }, { "epoch": 0.68, "grad_norm": 1.0751212508222223, "learning_rate": 2.4138692905895473e-06, "loss": 0.5391, "step": 4238 }, { "epoch": 0.68, "grad_norm": 1.1186338260123807, "learning_rate": 2.4116361089732815e-06, "loss": 0.6193, "step": 4239 }, { "epoch": 0.68, "grad_norm": 1.0955150998655903, "learning_rate": 2.4094036324986888e-06, "loss": 0.583, "step": 4240 }, { "epoch": 0.68, "grad_norm": 0.9768281747867427, "learning_rate": 2.4071718617739542e-06, "loss": 0.5097, "step": 4241 }, { "epoch": 0.68, "grad_norm": 1.0416028918638447, "learning_rate": 2.404940797407077e-06, "loss": 0.5973, "step": 4242 }, { "epoch": 0.68, "grad_norm": 1.071215029253211, "learning_rate": 2.4027104400058615e-06, "loss": 0.6126, "step": 4243 }, { "epoch": 0.68, "grad_norm": 1.0016019832517598, "learning_rate": 2.4004807901779164e-06, "loss": 0.6109, "step": 4244 }, { "epoch": 0.68, "grad_norm": 1.0528640004682408, "learning_rate": 2.3982518485306587e-06, "loss": 0.5732, "step": 4245 }, { "epoch": 0.68, "grad_norm": 1.10580606238819, "learning_rate": 2.3960236156713175e-06, "loss": 0.6594, "step": 4246 }, { "epoch": 0.68, "grad_norm": 1.0321446845105122, "learning_rate": 2.3937960922069213e-06, "loss": 0.5996, "step": 4247 }, { "epoch": 0.68, "grad_norm": 1.0518940432588118, "learning_rate": 2.391569278744309e-06, "loss": 0.5591, "step": 4248 }, { "epoch": 0.68, "grad_norm": 1.0105563382628076, "learning_rate": 2.3893431758901285e-06, "loss": 0.5654, "step": 4249 }, { "epoch": 0.68, "grad_norm": 1.0848417325617543, "learning_rate": 2.387117784250828e-06, "loss": 0.5923, "step": 4250 }, { "epoch": 0.68, "grad_norm": 1.0369607119868975, "learning_rate": 2.38489310443267e-06, "loss": 0.6218, "step": 4251 }, { "epoch": 0.69, "grad_norm": 1.0129207689590631, "learning_rate": 2.382669137041716e-06, "loss": 0.6521, "step": 4252 }, { "epoch": 0.69, "grad_norm": 1.0740799521333386, "learning_rate": 2.3804458826838337e-06, "loss": 0.572, "step": 4253 }, { "epoch": 0.69, "grad_norm": 1.0385039706048727, "learning_rate": 2.3782233419647043e-06, "loss": 0.6048, "step": 4254 }, { "epoch": 0.69, "grad_norm": 0.9763869625370863, "learning_rate": 2.376001515489806e-06, "loss": 0.5225, "step": 4255 }, { "epoch": 0.69, "grad_norm": 1.0787852225201433, "learning_rate": 2.3737804038644236e-06, "loss": 0.5765, "step": 4256 }, { "epoch": 0.69, "grad_norm": 0.6449513645642397, "learning_rate": 2.371560007693651e-06, "loss": 0.487, "step": 4257 }, { "epoch": 0.69, "grad_norm": 1.0375524730435397, "learning_rate": 2.3693403275823883e-06, "loss": 0.5515, "step": 4258 }, { "epoch": 0.69, "grad_norm": 1.080629989742259, "learning_rate": 2.367121364135335e-06, "loss": 0.6488, "step": 4259 }, { "epoch": 0.69, "grad_norm": 1.0748228522831667, "learning_rate": 2.3649031179569954e-06, "loss": 0.6009, "step": 4260 }, { "epoch": 0.69, "grad_norm": 1.009638178228135, "learning_rate": 2.3626855896516855e-06, "loss": 0.5926, "step": 4261 }, { "epoch": 0.69, "grad_norm": 1.0532746470902474, "learning_rate": 2.360468779823519e-06, "loss": 0.5781, "step": 4262 }, { "epoch": 0.69, "grad_norm": 1.0876090007156665, "learning_rate": 2.3582526890764135e-06, "loss": 0.6065, "step": 4263 }, { "epoch": 0.69, "grad_norm": 0.9688587447321712, "learning_rate": 2.356037318014096e-06, "loss": 0.5459, "step": 4264 }, { "epoch": 0.69, "grad_norm": 0.9871704478051079, "learning_rate": 2.3538226672400982e-06, "loss": 0.5321, "step": 4265 }, { "epoch": 0.69, "grad_norm": 0.925710277647997, "learning_rate": 2.3516087373577447e-06, "loss": 0.5454, "step": 4266 }, { "epoch": 0.69, "grad_norm": 1.0066311533423127, "learning_rate": 2.3493955289701744e-06, "loss": 0.503, "step": 4267 }, { "epoch": 0.69, "grad_norm": 0.9836004265667032, "learning_rate": 2.347183042680328e-06, "loss": 0.4816, "step": 4268 }, { "epoch": 0.69, "grad_norm": 1.0465209394909063, "learning_rate": 2.3449712790909465e-06, "loss": 0.5548, "step": 4269 }, { "epoch": 0.69, "grad_norm": 1.0514379669767695, "learning_rate": 2.3427602388045723e-06, "loss": 0.6567, "step": 4270 }, { "epoch": 0.69, "grad_norm": 0.9945471342843365, "learning_rate": 2.3405499224235583e-06, "loss": 0.57, "step": 4271 }, { "epoch": 0.69, "grad_norm": 0.6141325194890646, "learning_rate": 2.3383403305500523e-06, "loss": 0.4448, "step": 4272 }, { "epoch": 0.69, "grad_norm": 1.0514987457407428, "learning_rate": 2.3361314637860104e-06, "loss": 0.6204, "step": 4273 }, { "epoch": 0.69, "grad_norm": 1.0134794630826691, "learning_rate": 2.3339233227331887e-06, "loss": 0.6331, "step": 4274 }, { "epoch": 0.69, "grad_norm": 0.9600739970389689, "learning_rate": 2.331715907993142e-06, "loss": 0.541, "step": 4275 }, { "epoch": 0.69, "grad_norm": 1.0062314876880716, "learning_rate": 2.329509220167236e-06, "loss": 0.5615, "step": 4276 }, { "epoch": 0.69, "grad_norm": 0.9285020977391022, "learning_rate": 2.327303259856629e-06, "loss": 0.4819, "step": 4277 }, { "epoch": 0.69, "grad_norm": 0.9850668225425437, "learning_rate": 2.3250980276622893e-06, "loss": 0.5898, "step": 4278 }, { "epoch": 0.69, "grad_norm": 1.0288642390567753, "learning_rate": 2.322893524184979e-06, "loss": 0.5794, "step": 4279 }, { "epoch": 0.69, "grad_norm": 0.9388705756172252, "learning_rate": 2.320689750025269e-06, "loss": 0.4809, "step": 4280 }, { "epoch": 0.69, "grad_norm": 1.0188909249248297, "learning_rate": 2.3184867057835274e-06, "loss": 0.5767, "step": 4281 }, { "epoch": 0.69, "grad_norm": 0.9773519638522371, "learning_rate": 2.316284392059921e-06, "loss": 0.5615, "step": 4282 }, { "epoch": 0.69, "grad_norm": 1.0141484454483456, "learning_rate": 2.3140828094544253e-06, "loss": 0.6172, "step": 4283 }, { "epoch": 0.69, "grad_norm": 1.1581460539414263, "learning_rate": 2.3118819585668106e-06, "loss": 0.5405, "step": 4284 }, { "epoch": 0.69, "grad_norm": 0.9408137050777134, "learning_rate": 2.3096818399966465e-06, "loss": 0.5941, "step": 4285 }, { "epoch": 0.69, "grad_norm": 1.029944524956399, "learning_rate": 2.3074824543433084e-06, "loss": 0.5928, "step": 4286 }, { "epoch": 0.69, "grad_norm": 1.1642417762581787, "learning_rate": 2.305283802205973e-06, "loss": 0.5989, "step": 4287 }, { "epoch": 0.69, "grad_norm": 1.0751821084317132, "learning_rate": 2.3030858841836063e-06, "loss": 0.6036, "step": 4288 }, { "epoch": 0.69, "grad_norm": 0.9729737236314856, "learning_rate": 2.300888700874986e-06, "loss": 0.5738, "step": 4289 }, { "epoch": 0.69, "grad_norm": 0.9590697990400221, "learning_rate": 2.2986922528786865e-06, "loss": 0.5249, "step": 4290 }, { "epoch": 0.69, "grad_norm": 0.987308828859753, "learning_rate": 2.2964965407930796e-06, "loss": 0.6709, "step": 4291 }, { "epoch": 0.69, "grad_norm": 1.0200935595026772, "learning_rate": 2.2943015652163355e-06, "loss": 0.5552, "step": 4292 }, { "epoch": 0.69, "grad_norm": 1.071280413417095, "learning_rate": 2.29210732674643e-06, "loss": 0.6347, "step": 4293 }, { "epoch": 0.69, "grad_norm": 1.1153660272231845, "learning_rate": 2.289913825981132e-06, "loss": 0.5771, "step": 4294 }, { "epoch": 0.69, "grad_norm": 1.0132108436006937, "learning_rate": 2.2877210635180098e-06, "loss": 0.5826, "step": 4295 }, { "epoch": 0.69, "grad_norm": 1.1056380368813161, "learning_rate": 2.2855290399544346e-06, "loss": 0.631, "step": 4296 }, { "epoch": 0.69, "grad_norm": 1.021821561320138, "learning_rate": 2.2833377558875754e-06, "loss": 0.6237, "step": 4297 }, { "epoch": 0.69, "grad_norm": 0.9785955829227968, "learning_rate": 2.281147211914397e-06, "loss": 0.5759, "step": 4298 }, { "epoch": 0.69, "grad_norm": 0.9965246853753821, "learning_rate": 2.278957408631662e-06, "loss": 0.5739, "step": 4299 }, { "epoch": 0.69, "grad_norm": 0.9673545572633372, "learning_rate": 2.276768346635936e-06, "loss": 0.5616, "step": 4300 }, { "epoch": 0.69, "grad_norm": 0.9844179247709128, "learning_rate": 2.2745800265235773e-06, "loss": 0.5781, "step": 4301 }, { "epoch": 0.69, "grad_norm": 1.0278060625703047, "learning_rate": 2.2723924488907477e-06, "loss": 0.5597, "step": 4302 }, { "epoch": 0.69, "grad_norm": 1.095728777239182, "learning_rate": 2.2702056143334025e-06, "loss": 0.6157, "step": 4303 }, { "epoch": 0.69, "grad_norm": 1.0684900911957962, "learning_rate": 2.268019523447292e-06, "loss": 0.6255, "step": 4304 }, { "epoch": 0.69, "grad_norm": 1.057841643317205, "learning_rate": 2.265834176827974e-06, "loss": 0.5698, "step": 4305 }, { "epoch": 0.69, "grad_norm": 1.0313746198145808, "learning_rate": 2.263649575070794e-06, "loss": 0.5369, "step": 4306 }, { "epoch": 0.69, "grad_norm": 0.9958966049203255, "learning_rate": 2.261465718770895e-06, "loss": 0.5767, "step": 4307 }, { "epoch": 0.69, "grad_norm": 1.0000605341500162, "learning_rate": 2.259282608523223e-06, "loss": 0.5821, "step": 4308 }, { "epoch": 0.69, "grad_norm": 0.9888116323403905, "learning_rate": 2.25710024492252e-06, "loss": 0.5996, "step": 4309 }, { "epoch": 0.69, "grad_norm": 1.0104775279837934, "learning_rate": 2.254918628563315e-06, "loss": 0.581, "step": 4310 }, { "epoch": 0.69, "grad_norm": 0.6139474355332751, "learning_rate": 2.2527377600399446e-06, "loss": 0.4727, "step": 4311 }, { "epoch": 0.69, "grad_norm": 1.0181499617222898, "learning_rate": 2.2505576399465384e-06, "loss": 0.6151, "step": 4312 }, { "epoch": 0.69, "grad_norm": 1.1593228908256783, "learning_rate": 2.2483782688770208e-06, "loss": 0.6212, "step": 4313 }, { "epoch": 0.7, "grad_norm": 0.9952250140501226, "learning_rate": 2.246199647425109e-06, "loss": 0.5851, "step": 4314 }, { "epoch": 0.7, "grad_norm": 0.9699933128754111, "learning_rate": 2.2440217761843244e-06, "loss": 0.5349, "step": 4315 }, { "epoch": 0.7, "grad_norm": 1.0204612824565595, "learning_rate": 2.241844655747977e-06, "loss": 0.561, "step": 4316 }, { "epoch": 0.7, "grad_norm": 1.0372634162655423, "learning_rate": 2.2396682867091723e-06, "loss": 0.6273, "step": 4317 }, { "epoch": 0.7, "grad_norm": 0.9423573528464719, "learning_rate": 2.237492669660815e-06, "loss": 0.5285, "step": 4318 }, { "epoch": 0.7, "grad_norm": 0.957634302418033, "learning_rate": 2.235317805195606e-06, "loss": 0.4913, "step": 4319 }, { "epoch": 0.7, "grad_norm": 1.0166342127527939, "learning_rate": 2.2331436939060354e-06, "loss": 0.522, "step": 4320 }, { "epoch": 0.7, "grad_norm": 1.1101188505160835, "learning_rate": 2.2309703363843893e-06, "loss": 0.6177, "step": 4321 }, { "epoch": 0.7, "grad_norm": 0.9244857061004736, "learning_rate": 2.2287977332227543e-06, "loss": 0.6043, "step": 4322 }, { "epoch": 0.7, "grad_norm": 1.045407396206926, "learning_rate": 2.2266258850130055e-06, "loss": 0.5881, "step": 4323 }, { "epoch": 0.7, "grad_norm": 1.1283888141294376, "learning_rate": 2.224454792346813e-06, "loss": 0.597, "step": 4324 }, { "epoch": 0.7, "grad_norm": 1.0062355093952606, "learning_rate": 2.2222844558156444e-06, "loss": 0.6127, "step": 4325 }, { "epoch": 0.7, "grad_norm": 1.0296407087292927, "learning_rate": 2.2201148760107568e-06, "loss": 0.59, "step": 4326 }, { "epoch": 0.7, "grad_norm": 0.9510830070004738, "learning_rate": 2.217946053523207e-06, "loss": 0.4927, "step": 4327 }, { "epoch": 0.7, "grad_norm": 1.1201027069792013, "learning_rate": 2.2157779889438393e-06, "loss": 0.6687, "step": 4328 }, { "epoch": 0.7, "grad_norm": 0.9754356050119517, "learning_rate": 2.213610682863293e-06, "loss": 0.548, "step": 4329 }, { "epoch": 0.7, "grad_norm": 0.898160362247851, "learning_rate": 2.2114441358720046e-06, "loss": 0.5496, "step": 4330 }, { "epoch": 0.7, "grad_norm": 0.6505670553356139, "learning_rate": 2.209278348560204e-06, "loss": 0.4776, "step": 4331 }, { "epoch": 0.7, "grad_norm": 1.0686122207263347, "learning_rate": 2.2071133215179043e-06, "loss": 0.5962, "step": 4332 }, { "epoch": 0.7, "grad_norm": 1.024448749725595, "learning_rate": 2.204949055334922e-06, "loss": 0.606, "step": 4333 }, { "epoch": 0.7, "grad_norm": 1.0705894874019695, "learning_rate": 2.202785550600865e-06, "loss": 0.617, "step": 4334 }, { "epoch": 0.7, "grad_norm": 1.1051852097843025, "learning_rate": 2.20062280790513e-06, "loss": 0.6864, "step": 4335 }, { "epoch": 0.7, "grad_norm": 0.8997067110572752, "learning_rate": 2.198460827836905e-06, "loss": 0.588, "step": 4336 }, { "epoch": 0.7, "grad_norm": 1.0083473424166143, "learning_rate": 2.1962996109851757e-06, "loss": 0.5863, "step": 4337 }, { "epoch": 0.7, "grad_norm": 0.9787921633341143, "learning_rate": 2.1941391579387204e-06, "loss": 0.561, "step": 4338 }, { "epoch": 0.7, "grad_norm": 1.1118588866905192, "learning_rate": 2.1919794692860992e-06, "loss": 0.6166, "step": 4339 }, { "epoch": 0.7, "grad_norm": 1.0413830326226816, "learning_rate": 2.189820545615674e-06, "loss": 0.5707, "step": 4340 }, { "epoch": 0.7, "grad_norm": 0.998681071817628, "learning_rate": 2.187662387515597e-06, "loss": 0.5303, "step": 4341 }, { "epoch": 0.7, "grad_norm": 0.940447028568086, "learning_rate": 2.1855049955738077e-06, "loss": 0.5048, "step": 4342 }, { "epoch": 0.7, "grad_norm": 0.9218426914007682, "learning_rate": 2.1833483703780383e-06, "loss": 0.5227, "step": 4343 }, { "epoch": 0.7, "grad_norm": 0.9716910169013515, "learning_rate": 2.1811925125158154e-06, "loss": 0.5136, "step": 4344 }, { "epoch": 0.7, "grad_norm": 0.9314152784886676, "learning_rate": 2.1790374225744537e-06, "loss": 0.5814, "step": 4345 }, { "epoch": 0.7, "grad_norm": 0.98972658540485, "learning_rate": 2.1768831011410567e-06, "loss": 0.525, "step": 4346 }, { "epoch": 0.7, "grad_norm": 0.9605991279313632, "learning_rate": 2.174729548802524e-06, "loss": 0.5193, "step": 4347 }, { "epoch": 0.7, "grad_norm": 1.078976858560916, "learning_rate": 2.1725767661455393e-06, "loss": 0.595, "step": 4348 }, { "epoch": 0.7, "grad_norm": 0.6176878710571864, "learning_rate": 2.170424753756584e-06, "loss": 0.4309, "step": 4349 }, { "epoch": 0.7, "grad_norm": 0.979086988625556, "learning_rate": 2.1682735122219236e-06, "loss": 0.5947, "step": 4350 }, { "epoch": 0.7, "grad_norm": 0.9957832274404707, "learning_rate": 2.1661230421276134e-06, "loss": 0.565, "step": 4351 }, { "epoch": 0.7, "grad_norm": 0.9467792076769326, "learning_rate": 2.163973344059503e-06, "loss": 0.5047, "step": 4352 }, { "epoch": 0.7, "grad_norm": 1.1187075589984037, "learning_rate": 2.1618244186032328e-06, "loss": 0.6377, "step": 4353 }, { "epoch": 0.7, "grad_norm": 1.1229383198837464, "learning_rate": 2.159676266344222e-06, "loss": 0.63, "step": 4354 }, { "epoch": 0.7, "grad_norm": 1.0570499616688716, "learning_rate": 2.15752888786769e-06, "loss": 0.5871, "step": 4355 }, { "epoch": 0.7, "grad_norm": 1.0281149312130586, "learning_rate": 2.155382283758644e-06, "loss": 0.5501, "step": 4356 }, { "epoch": 0.7, "grad_norm": 0.7026934038065543, "learning_rate": 2.153236454601877e-06, "loss": 0.45, "step": 4357 }, { "epoch": 0.7, "grad_norm": 0.9983436858585417, "learning_rate": 2.1510914009819684e-06, "loss": 0.5696, "step": 4358 }, { "epoch": 0.7, "grad_norm": 1.0748328530273754, "learning_rate": 2.148947123483293e-06, "loss": 0.5201, "step": 4359 }, { "epoch": 0.7, "grad_norm": 0.9379861049735776, "learning_rate": 2.146803622690015e-06, "loss": 0.5862, "step": 4360 }, { "epoch": 0.7, "grad_norm": 0.9897210265157065, "learning_rate": 2.144660899186075e-06, "loss": 0.5239, "step": 4361 }, { "epoch": 0.7, "grad_norm": 1.118871363739599, "learning_rate": 2.1425189535552143e-06, "loss": 0.5699, "step": 4362 }, { "epoch": 0.7, "grad_norm": 0.9390580780275393, "learning_rate": 2.1403777863809594e-06, "loss": 0.6308, "step": 4363 }, { "epoch": 0.7, "grad_norm": 0.9429519936250398, "learning_rate": 2.1382373982466213e-06, "loss": 0.5525, "step": 4364 }, { "epoch": 0.7, "grad_norm": 1.103850596742432, "learning_rate": 2.1360977897352992e-06, "loss": 0.685, "step": 4365 }, { "epoch": 0.7, "grad_norm": 0.9594066969595867, "learning_rate": 2.1339589614298844e-06, "loss": 0.5418, "step": 4366 }, { "epoch": 0.7, "grad_norm": 0.9897303244456891, "learning_rate": 2.131820913913052e-06, "loss": 0.6349, "step": 4367 }, { "epoch": 0.7, "grad_norm": 1.0708054786150407, "learning_rate": 2.129683647767262e-06, "loss": 0.6332, "step": 4368 }, { "epoch": 0.7, "grad_norm": 1.0103269301851554, "learning_rate": 2.1275471635747695e-06, "loss": 0.5688, "step": 4369 }, { "epoch": 0.7, "grad_norm": 0.9747713866377347, "learning_rate": 2.125411461917607e-06, "loss": 0.5695, "step": 4370 }, { "epoch": 0.7, "grad_norm": 1.0066227660160711, "learning_rate": 2.1232765433776033e-06, "loss": 0.5459, "step": 4371 }, { "epoch": 0.7, "grad_norm": 1.0510292753178838, "learning_rate": 2.121142408536366e-06, "loss": 0.5927, "step": 4372 }, { "epoch": 0.7, "grad_norm": 0.9575320067407117, "learning_rate": 2.1190090579752908e-06, "loss": 0.608, "step": 4373 }, { "epoch": 0.7, "grad_norm": 0.9561336488417097, "learning_rate": 2.116876492275565e-06, "loss": 0.5464, "step": 4374 }, { "epoch": 0.7, "grad_norm": 1.1413714150507352, "learning_rate": 2.114744712018155e-06, "loss": 0.6146, "step": 4375 }, { "epoch": 0.71, "grad_norm": 1.0571317172190016, "learning_rate": 2.1126137177838197e-06, "loss": 0.6558, "step": 4376 }, { "epoch": 0.71, "grad_norm": 1.0514399351145252, "learning_rate": 2.110483510153097e-06, "loss": 0.5739, "step": 4377 }, { "epoch": 0.71, "grad_norm": 1.0652555507786652, "learning_rate": 2.108354089706318e-06, "loss": 0.5817, "step": 4378 }, { "epoch": 0.71, "grad_norm": 1.074606568415449, "learning_rate": 2.1062254570235934e-06, "loss": 0.6285, "step": 4379 }, { "epoch": 0.71, "grad_norm": 0.9495002344301338, "learning_rate": 2.1040976126848208e-06, "loss": 0.5789, "step": 4380 }, { "epoch": 0.71, "grad_norm": 0.9365276027535807, "learning_rate": 2.1019705572696836e-06, "loss": 0.5452, "step": 4381 }, { "epoch": 0.71, "grad_norm": 1.1532710146626786, "learning_rate": 2.099844291357655e-06, "loss": 0.6162, "step": 4382 }, { "epoch": 0.71, "grad_norm": 0.9566003170692688, "learning_rate": 2.0977188155279815e-06, "loss": 0.612, "step": 4383 }, { "epoch": 0.71, "grad_norm": 1.064451984457309, "learning_rate": 2.095594130359704e-06, "loss": 0.52, "step": 4384 }, { "epoch": 0.71, "grad_norm": 0.9721030269934459, "learning_rate": 2.093470236431648e-06, "loss": 0.5816, "step": 4385 }, { "epoch": 0.71, "grad_norm": 1.0263124023787649, "learning_rate": 2.0913471343224183e-06, "loss": 0.6105, "step": 4386 }, { "epoch": 0.71, "grad_norm": 1.0360248695281542, "learning_rate": 2.0892248246104047e-06, "loss": 0.5846, "step": 4387 }, { "epoch": 0.71, "grad_norm": 0.9599371113155775, "learning_rate": 2.0871033078737875e-06, "loss": 0.5488, "step": 4388 }, { "epoch": 0.71, "grad_norm": 1.0629735296058986, "learning_rate": 2.0849825846905223e-06, "loss": 0.5669, "step": 4389 }, { "epoch": 0.71, "grad_norm": 1.037818797492482, "learning_rate": 2.0828626556383534e-06, "loss": 0.551, "step": 4390 }, { "epoch": 0.71, "grad_norm": 1.001975935238851, "learning_rate": 2.0807435212948094e-06, "loss": 0.5208, "step": 4391 }, { "epoch": 0.71, "grad_norm": 1.0102894404564389, "learning_rate": 2.078625182237199e-06, "loss": 0.5894, "step": 4392 }, { "epoch": 0.71, "grad_norm": 0.9488442323108185, "learning_rate": 2.0765076390426187e-06, "loss": 0.5129, "step": 4393 }, { "epoch": 0.71, "grad_norm": 1.013179202559768, "learning_rate": 2.074390892287944e-06, "loss": 0.5734, "step": 4394 }, { "epoch": 0.71, "grad_norm": 1.0036265240563709, "learning_rate": 2.0722749425498332e-06, "loss": 0.599, "step": 4395 }, { "epoch": 0.71, "grad_norm": 1.0307054782037015, "learning_rate": 2.0701597904047332e-06, "loss": 0.6025, "step": 4396 }, { "epoch": 0.71, "grad_norm": 1.0780990306793368, "learning_rate": 2.068045436428866e-06, "loss": 0.6645, "step": 4397 }, { "epoch": 0.71, "grad_norm": 1.031579837261566, "learning_rate": 2.0659318811982433e-06, "loss": 0.5413, "step": 4398 }, { "epoch": 0.71, "grad_norm": 0.9942503653210693, "learning_rate": 2.0638191252886526e-06, "loss": 0.6074, "step": 4399 }, { "epoch": 0.71, "grad_norm": 0.973968583670157, "learning_rate": 2.06170716927567e-06, "loss": 0.5242, "step": 4400 }, { "epoch": 0.71, "grad_norm": 1.040580949940331, "learning_rate": 2.0595960137346494e-06, "loss": 0.5865, "step": 4401 }, { "epoch": 0.71, "grad_norm": 0.9637768632991652, "learning_rate": 2.0574856592407255e-06, "loss": 0.5554, "step": 4402 }, { "epoch": 0.71, "grad_norm": 0.9941152391196982, "learning_rate": 2.0553761063688204e-06, "loss": 0.5806, "step": 4403 }, { "epoch": 0.71, "grad_norm": 1.048443298701311, "learning_rate": 2.0532673556936333e-06, "loss": 0.6085, "step": 4404 }, { "epoch": 0.71, "grad_norm": 1.1441486685205917, "learning_rate": 2.051159407789644e-06, "loss": 0.6071, "step": 4405 }, { "epoch": 0.71, "grad_norm": 1.0288277200021714, "learning_rate": 2.0490522632311173e-06, "loss": 0.5338, "step": 4406 }, { "epoch": 0.71, "grad_norm": 1.0605927369688255, "learning_rate": 2.0469459225920987e-06, "loss": 0.5226, "step": 4407 }, { "epoch": 0.71, "grad_norm": 1.063853287339275, "learning_rate": 2.0448403864464123e-06, "loss": 0.6315, "step": 4408 }, { "epoch": 0.71, "grad_norm": 0.9568009469530431, "learning_rate": 2.0427356553676625e-06, "loss": 0.5455, "step": 4409 }, { "epoch": 0.71, "grad_norm": 0.9576294182197103, "learning_rate": 2.0406317299292394e-06, "loss": 0.5512, "step": 4410 }, { "epoch": 0.71, "grad_norm": 1.1457119517243424, "learning_rate": 2.0385286107043086e-06, "loss": 0.6531, "step": 4411 }, { "epoch": 0.71, "grad_norm": 0.9673911766082235, "learning_rate": 2.0364262982658155e-06, "loss": 0.5254, "step": 4412 }, { "epoch": 0.71, "grad_norm": 1.1156946921945767, "learning_rate": 2.0343247931864927e-06, "loss": 0.6367, "step": 4413 }, { "epoch": 0.71, "grad_norm": 1.023979847371299, "learning_rate": 2.0322240960388436e-06, "loss": 0.6001, "step": 4414 }, { "epoch": 0.71, "grad_norm": 1.1182797265223214, "learning_rate": 2.030124207395159e-06, "loss": 0.612, "step": 4415 }, { "epoch": 0.71, "grad_norm": 1.0626283932530691, "learning_rate": 2.0280251278275043e-06, "loss": 0.6504, "step": 4416 }, { "epoch": 0.71, "grad_norm": 0.9977280018997456, "learning_rate": 2.0259268579077295e-06, "loss": 0.5169, "step": 4417 }, { "epoch": 0.71, "grad_norm": 0.9332323780212993, "learning_rate": 2.0238293982074593e-06, "loss": 0.5012, "step": 4418 }, { "epoch": 0.71, "grad_norm": 1.0325264759062336, "learning_rate": 2.0217327492980977e-06, "loss": 0.6486, "step": 4419 }, { "epoch": 0.71, "grad_norm": 1.040979137140815, "learning_rate": 2.0196369117508336e-06, "loss": 0.5435, "step": 4420 }, { "epoch": 0.71, "grad_norm": 0.9525682949156852, "learning_rate": 2.017541886136627e-06, "loss": 0.5725, "step": 4421 }, { "epoch": 0.71, "grad_norm": 0.9340170760755132, "learning_rate": 2.0154476730262244e-06, "loss": 0.5903, "step": 4422 }, { "epoch": 0.71, "grad_norm": 1.0271993330307705, "learning_rate": 2.013354272990145e-06, "loss": 0.6254, "step": 4423 }, { "epoch": 0.71, "grad_norm": 1.078332499629157, "learning_rate": 2.011261686598688e-06, "loss": 0.5352, "step": 4424 }, { "epoch": 0.71, "grad_norm": 1.0487546083841914, "learning_rate": 2.009169914421934e-06, "loss": 0.6782, "step": 4425 }, { "epoch": 0.71, "grad_norm": 0.9895600591381529, "learning_rate": 2.0070789570297377e-06, "loss": 0.5546, "step": 4426 }, { "epoch": 0.71, "grad_norm": 1.0038166561717847, "learning_rate": 2.004988814991732e-06, "loss": 0.673, "step": 4427 }, { "epoch": 0.71, "grad_norm": 1.0515045812428239, "learning_rate": 2.002899488877332e-06, "loss": 0.5804, "step": 4428 }, { "epoch": 0.71, "grad_norm": 0.948833130784876, "learning_rate": 2.000810979255728e-06, "loss": 0.5491, "step": 4429 }, { "epoch": 0.71, "grad_norm": 1.0846637818769351, "learning_rate": 1.9987232866958863e-06, "loss": 0.6186, "step": 4430 }, { "epoch": 0.71, "grad_norm": 0.9651124960764791, "learning_rate": 1.9966364117665505e-06, "loss": 0.5391, "step": 4431 }, { "epoch": 0.71, "grad_norm": 1.0853738563186297, "learning_rate": 1.9945503550362453e-06, "loss": 0.6483, "step": 4432 }, { "epoch": 0.71, "grad_norm": 1.0792623974146165, "learning_rate": 1.99246511707327e-06, "loss": 0.6042, "step": 4433 }, { "epoch": 0.71, "grad_norm": 1.0440470598253824, "learning_rate": 1.990380698445698e-06, "loss": 0.5338, "step": 4434 }, { "epoch": 0.71, "grad_norm": 0.9688458860415644, "learning_rate": 1.988297099721384e-06, "loss": 0.5584, "step": 4435 }, { "epoch": 0.71, "grad_norm": 0.9717499282735592, "learning_rate": 1.9862143214679606e-06, "loss": 0.5282, "step": 4436 }, { "epoch": 0.71, "grad_norm": 1.0275641805829114, "learning_rate": 1.984132364252831e-06, "loss": 0.5551, "step": 4437 }, { "epoch": 0.72, "grad_norm": 0.6679739863897243, "learning_rate": 1.9820512286431764e-06, "loss": 0.4942, "step": 4438 }, { "epoch": 0.72, "grad_norm": 0.9798136990781231, "learning_rate": 1.979970915205959e-06, "loss": 0.5139, "step": 4439 }, { "epoch": 0.72, "grad_norm": 1.0515300364735751, "learning_rate": 1.9778914245079127e-06, "loss": 0.635, "step": 4440 }, { "epoch": 0.72, "grad_norm": 0.9938738727041243, "learning_rate": 1.975812757115545e-06, "loss": 0.6163, "step": 4441 }, { "epoch": 0.72, "grad_norm": 1.062051535182483, "learning_rate": 1.9737349135951463e-06, "loss": 0.6618, "step": 4442 }, { "epoch": 0.72, "grad_norm": 1.0235032628986918, "learning_rate": 1.971657894512775e-06, "loss": 0.5621, "step": 4443 }, { "epoch": 0.72, "grad_norm": 0.9913046908992199, "learning_rate": 1.9695817004342715e-06, "loss": 0.5873, "step": 4444 }, { "epoch": 0.72, "grad_norm": 1.0113034241138232, "learning_rate": 1.9675063319252467e-06, "loss": 0.5999, "step": 4445 }, { "epoch": 0.72, "grad_norm": 0.9625358337048183, "learning_rate": 1.9654317895510867e-06, "loss": 0.5033, "step": 4446 }, { "epoch": 0.72, "grad_norm": 0.9084037734582943, "learning_rate": 1.9633580738769577e-06, "loss": 0.4943, "step": 4447 }, { "epoch": 0.72, "grad_norm": 1.1276189660817588, "learning_rate": 1.961285185467794e-06, "loss": 0.6612, "step": 4448 }, { "epoch": 0.72, "grad_norm": 0.9858220928266545, "learning_rate": 1.959213124888307e-06, "loss": 0.5048, "step": 4449 }, { "epoch": 0.72, "grad_norm": 0.9688420214644086, "learning_rate": 1.957141892702985e-06, "loss": 0.5402, "step": 4450 }, { "epoch": 0.72, "grad_norm": 1.104374233471592, "learning_rate": 1.9550714894760913e-06, "loss": 0.6701, "step": 4451 }, { "epoch": 0.72, "grad_norm": 0.981839711976729, "learning_rate": 1.9530019157716547e-06, "loss": 0.5938, "step": 4452 }, { "epoch": 0.72, "grad_norm": 0.6822615439349085, "learning_rate": 1.9509331721534873e-06, "loss": 0.4551, "step": 4453 }, { "epoch": 0.72, "grad_norm": 1.1094499461385463, "learning_rate": 1.9488652591851732e-06, "loss": 0.6013, "step": 4454 }, { "epoch": 0.72, "grad_norm": 0.9779556835374327, "learning_rate": 1.9467981774300676e-06, "loss": 0.4785, "step": 4455 }, { "epoch": 0.72, "grad_norm": 0.995450621811704, "learning_rate": 1.944731927451299e-06, "loss": 0.5694, "step": 4456 }, { "epoch": 0.72, "grad_norm": 1.051327162307236, "learning_rate": 1.942666509811772e-06, "loss": 0.5931, "step": 4457 }, { "epoch": 0.72, "grad_norm": 1.0767348300926474, "learning_rate": 1.940601925074165e-06, "loss": 0.6805, "step": 4458 }, { "epoch": 0.72, "grad_norm": 1.1608250767833006, "learning_rate": 1.9385381738009273e-06, "loss": 0.6607, "step": 4459 }, { "epoch": 0.72, "grad_norm": 0.9909462952714984, "learning_rate": 1.936475256554279e-06, "loss": 0.5638, "step": 4460 }, { "epoch": 0.72, "grad_norm": 1.0592733690936156, "learning_rate": 1.9344131738962183e-06, "loss": 0.498, "step": 4461 }, { "epoch": 0.72, "grad_norm": 1.0938205556088303, "learning_rate": 1.932351926388513e-06, "loss": 0.654, "step": 4462 }, { "epoch": 0.72, "grad_norm": 1.064658127442864, "learning_rate": 1.9302915145927015e-06, "loss": 0.5682, "step": 4463 }, { "epoch": 0.72, "grad_norm": 1.038664699191305, "learning_rate": 1.9282319390701e-06, "loss": 0.6282, "step": 4464 }, { "epoch": 0.72, "grad_norm": 1.0010602205718497, "learning_rate": 1.92617320038179e-06, "loss": 0.6063, "step": 4465 }, { "epoch": 0.72, "grad_norm": 1.0443817895368612, "learning_rate": 1.924115299088633e-06, "loss": 0.6471, "step": 4466 }, { "epoch": 0.72, "grad_norm": 1.0522851667469617, "learning_rate": 1.9220582357512555e-06, "loss": 0.6579, "step": 4467 }, { "epoch": 0.72, "grad_norm": 1.039322542338591, "learning_rate": 1.9200020109300567e-06, "loss": 0.5714, "step": 4468 }, { "epoch": 0.72, "grad_norm": 1.0459744987336177, "learning_rate": 1.917946625185213e-06, "loss": 0.5427, "step": 4469 }, { "epoch": 0.72, "grad_norm": 0.9745878427971678, "learning_rate": 1.9158920790766657e-06, "loss": 0.5372, "step": 4470 }, { "epoch": 0.72, "grad_norm": 1.0193460149233513, "learning_rate": 1.913838373164128e-06, "loss": 0.5104, "step": 4471 }, { "epoch": 0.72, "grad_norm": 0.9241309298565304, "learning_rate": 1.911785508007088e-06, "loss": 0.5137, "step": 4472 }, { "epoch": 0.72, "grad_norm": 0.9824555908978065, "learning_rate": 1.9097334841648064e-06, "loss": 0.5437, "step": 4473 }, { "epoch": 0.72, "grad_norm": 1.0243003199284917, "learning_rate": 1.9076823021963036e-06, "loss": 0.5912, "step": 4474 }, { "epoch": 0.72, "grad_norm": 0.9221318019575095, "learning_rate": 1.9056319626603826e-06, "loss": 0.5095, "step": 4475 }, { "epoch": 0.72, "grad_norm": 1.048009878169771, "learning_rate": 1.9035824661156128e-06, "loss": 0.6009, "step": 4476 }, { "epoch": 0.72, "grad_norm": 0.9068629041941682, "learning_rate": 1.9015338131203325e-06, "loss": 0.5156, "step": 4477 }, { "epoch": 0.72, "grad_norm": 1.0595245521769754, "learning_rate": 1.8994860042326501e-06, "loss": 0.582, "step": 4478 }, { "epoch": 0.72, "grad_norm": 0.9963073402615256, "learning_rate": 1.8974390400104458e-06, "loss": 0.5351, "step": 4479 }, { "epoch": 0.72, "grad_norm": 0.9916796836916534, "learning_rate": 1.895392921011373e-06, "loss": 0.5337, "step": 4480 }, { "epoch": 0.72, "grad_norm": 1.080926975716654, "learning_rate": 1.8933476477928447e-06, "loss": 0.6249, "step": 4481 }, { "epoch": 0.72, "grad_norm": 1.054033648421879, "learning_rate": 1.8913032209120519e-06, "loss": 0.6573, "step": 4482 }, { "epoch": 0.72, "grad_norm": 0.98402513733256, "learning_rate": 1.8892596409259556e-06, "loss": 0.6416, "step": 4483 }, { "epoch": 0.72, "grad_norm": 1.058914311388047, "learning_rate": 1.8872169083912806e-06, "loss": 0.5672, "step": 4484 }, { "epoch": 0.72, "grad_norm": 1.0838310564215012, "learning_rate": 1.8851750238645228e-06, "loss": 0.5914, "step": 4485 }, { "epoch": 0.72, "grad_norm": 1.144504469791738, "learning_rate": 1.8831339879019511e-06, "loss": 0.6833, "step": 4486 }, { "epoch": 0.72, "grad_norm": 1.1008378449182945, "learning_rate": 1.881093801059596e-06, "loss": 0.6428, "step": 4487 }, { "epoch": 0.72, "grad_norm": 1.1440230328453846, "learning_rate": 1.879054463893264e-06, "loss": 0.6747, "step": 4488 }, { "epoch": 0.72, "grad_norm": 0.9691767322484067, "learning_rate": 1.8770159769585261e-06, "loss": 0.5642, "step": 4489 }, { "epoch": 0.72, "grad_norm": 0.9882803546430539, "learning_rate": 1.8749783408107192e-06, "loss": 0.5864, "step": 4490 }, { "epoch": 0.72, "grad_norm": 1.0537737422081317, "learning_rate": 1.8729415560049563e-06, "loss": 0.6091, "step": 4491 }, { "epoch": 0.72, "grad_norm": 1.0202667004053467, "learning_rate": 1.8709056230961115e-06, "loss": 0.6378, "step": 4492 }, { "epoch": 0.72, "grad_norm": 0.9731768304347831, "learning_rate": 1.8688705426388264e-06, "loss": 0.6426, "step": 4493 }, { "epoch": 0.72, "grad_norm": 0.9973015812365162, "learning_rate": 1.8668363151875163e-06, "loss": 0.6096, "step": 4494 }, { "epoch": 0.72, "grad_norm": 0.8999630077015925, "learning_rate": 1.864802941296362e-06, "loss": 0.5093, "step": 4495 }, { "epoch": 0.72, "grad_norm": 1.015264257127109, "learning_rate": 1.8627704215193082e-06, "loss": 0.6019, "step": 4496 }, { "epoch": 0.72, "grad_norm": 0.950875653822951, "learning_rate": 1.8607387564100681e-06, "loss": 0.5558, "step": 4497 }, { "epoch": 0.72, "grad_norm": 0.6648692148125289, "learning_rate": 1.858707946522127e-06, "loss": 0.4642, "step": 4498 }, { "epoch": 0.72, "grad_norm": 1.0278146275732263, "learning_rate": 1.8566779924087315e-06, "loss": 0.5462, "step": 4499 }, { "epoch": 0.73, "grad_norm": 1.0370917727633262, "learning_rate": 1.8546488946228952e-06, "loss": 0.5279, "step": 4500 }, { "epoch": 0.73, "grad_norm": 1.0406354720486597, "learning_rate": 1.8526206537174019e-06, "loss": 0.6215, "step": 4501 }, { "epoch": 0.73, "grad_norm": 1.0023033611571697, "learning_rate": 1.8505932702448042e-06, "loss": 0.6234, "step": 4502 }, { "epoch": 0.73, "grad_norm": 0.6682318885547555, "learning_rate": 1.8485667447574101e-06, "loss": 0.4722, "step": 4503 }, { "epoch": 0.73, "grad_norm": 1.1003850765855483, "learning_rate": 1.8465410778073046e-06, "loss": 0.5873, "step": 4504 }, { "epoch": 0.73, "grad_norm": 0.973967877420844, "learning_rate": 1.8445162699463365e-06, "loss": 0.5606, "step": 4505 }, { "epoch": 0.73, "grad_norm": 1.0884594394899498, "learning_rate": 1.842492321726118e-06, "loss": 0.5656, "step": 4506 }, { "epoch": 0.73, "grad_norm": 0.7179925487800976, "learning_rate": 1.8404692336980272e-06, "loss": 0.5039, "step": 4507 }, { "epoch": 0.73, "grad_norm": 1.0411976164716523, "learning_rate": 1.838447006413211e-06, "loss": 0.6579, "step": 4508 }, { "epoch": 0.73, "grad_norm": 0.9937321839601436, "learning_rate": 1.8364256404225777e-06, "loss": 0.5954, "step": 4509 }, { "epoch": 0.73, "grad_norm": 1.1133740945256696, "learning_rate": 1.834405136276806e-06, "loss": 0.5877, "step": 4510 }, { "epoch": 0.73, "grad_norm": 1.0179292627722212, "learning_rate": 1.8323854945263354e-06, "loss": 0.5665, "step": 4511 }, { "epoch": 0.73, "grad_norm": 1.0433403483680341, "learning_rate": 1.83036671572137e-06, "loss": 0.627, "step": 4512 }, { "epoch": 0.73, "grad_norm": 1.0358019075219171, "learning_rate": 1.828348800411885e-06, "loss": 0.5829, "step": 4513 }, { "epoch": 0.73, "grad_norm": 0.9492425744452456, "learning_rate": 1.8263317491476112e-06, "loss": 0.5176, "step": 4514 }, { "epoch": 0.73, "grad_norm": 1.0682577920385135, "learning_rate": 1.824315562478054e-06, "loss": 0.5408, "step": 4515 }, { "epoch": 0.73, "grad_norm": 1.1026432289393973, "learning_rate": 1.8223002409524736e-06, "loss": 0.6425, "step": 4516 }, { "epoch": 0.73, "grad_norm": 1.043240757393761, "learning_rate": 1.8202857851199034e-06, "loss": 0.469, "step": 4517 }, { "epoch": 0.73, "grad_norm": 1.0258577824967918, "learning_rate": 1.818272195529135e-06, "loss": 0.5508, "step": 4518 }, { "epoch": 0.73, "grad_norm": 1.074717332755621, "learning_rate": 1.816259472728723e-06, "loss": 0.6264, "step": 4519 }, { "epoch": 0.73, "grad_norm": 1.1268831032505362, "learning_rate": 1.8142476172669932e-06, "loss": 0.5856, "step": 4520 }, { "epoch": 0.73, "grad_norm": 1.1311594111881769, "learning_rate": 1.812236629692028e-06, "loss": 0.5408, "step": 4521 }, { "epoch": 0.73, "grad_norm": 0.9533680480349744, "learning_rate": 1.8102265105516741e-06, "loss": 0.5281, "step": 4522 }, { "epoch": 0.73, "grad_norm": 0.9735073767640183, "learning_rate": 1.8082172603935455e-06, "loss": 0.536, "step": 4523 }, { "epoch": 0.73, "grad_norm": 1.0303294669484855, "learning_rate": 1.8062088797650208e-06, "loss": 0.6707, "step": 4524 }, { "epoch": 0.73, "grad_norm": 0.913712449516118, "learning_rate": 1.804201369213231e-06, "loss": 0.5109, "step": 4525 }, { "epoch": 0.73, "grad_norm": 1.09304499984354, "learning_rate": 1.8021947292850811e-06, "loss": 0.5639, "step": 4526 }, { "epoch": 0.73, "grad_norm": 1.0055565905617532, "learning_rate": 1.8001889605272366e-06, "loss": 0.5189, "step": 4527 }, { "epoch": 0.73, "grad_norm": 0.6665609002423036, "learning_rate": 1.7981840634861225e-06, "loss": 0.4354, "step": 4528 }, { "epoch": 0.73, "grad_norm": 0.9525491987749914, "learning_rate": 1.7961800387079265e-06, "loss": 0.5369, "step": 4529 }, { "epoch": 0.73, "grad_norm": 1.076572602414472, "learning_rate": 1.794176886738604e-06, "loss": 0.5392, "step": 4530 }, { "epoch": 0.73, "grad_norm": 0.9667290953092357, "learning_rate": 1.7921746081238656e-06, "loss": 0.5126, "step": 4531 }, { "epoch": 0.73, "grad_norm": 1.008991971535808, "learning_rate": 1.7901732034091867e-06, "loss": 0.6412, "step": 4532 }, { "epoch": 0.73, "grad_norm": 1.1056895841284429, "learning_rate": 1.788172673139807e-06, "loss": 0.6228, "step": 4533 }, { "epoch": 0.73, "grad_norm": 0.9456323638609678, "learning_rate": 1.7861730178607274e-06, "loss": 0.6157, "step": 4534 }, { "epoch": 0.73, "grad_norm": 1.0313676991501415, "learning_rate": 1.784174238116707e-06, "loss": 0.5862, "step": 4535 }, { "epoch": 0.73, "grad_norm": 0.6908546940926898, "learning_rate": 1.7821763344522675e-06, "loss": 0.4944, "step": 4536 }, { "epoch": 0.73, "grad_norm": 1.00372118721112, "learning_rate": 1.7801793074116958e-06, "loss": 0.5437, "step": 4537 }, { "epoch": 0.73, "grad_norm": 0.9820989058181645, "learning_rate": 1.778183157539034e-06, "loss": 0.6172, "step": 4538 }, { "epoch": 0.73, "grad_norm": 0.9793883487040431, "learning_rate": 1.7761878853780918e-06, "loss": 0.5017, "step": 4539 }, { "epoch": 0.73, "grad_norm": 0.9950062727924022, "learning_rate": 1.774193491472434e-06, "loss": 0.6108, "step": 4540 }, { "epoch": 0.73, "grad_norm": 1.0557316922369198, "learning_rate": 1.772199976365388e-06, "loss": 0.5814, "step": 4541 }, { "epoch": 0.73, "grad_norm": 1.0398378311937757, "learning_rate": 1.7702073406000447e-06, "loss": 0.6045, "step": 4542 }, { "epoch": 0.73, "grad_norm": 0.9760984174009428, "learning_rate": 1.768215584719251e-06, "loss": 0.5338, "step": 4543 }, { "epoch": 0.73, "grad_norm": 0.9503920570903627, "learning_rate": 1.7662247092656159e-06, "loss": 0.564, "step": 4544 }, { "epoch": 0.73, "grad_norm": 1.0012512473734976, "learning_rate": 1.7642347147815091e-06, "loss": 0.5631, "step": 4545 }, { "epoch": 0.73, "grad_norm": 0.9919883870155742, "learning_rate": 1.7622456018090638e-06, "loss": 0.5994, "step": 4546 }, { "epoch": 0.73, "grad_norm": 1.0007699799377054, "learning_rate": 1.7602573708901627e-06, "loss": 0.5202, "step": 4547 }, { "epoch": 0.73, "grad_norm": 0.9630859865282322, "learning_rate": 1.7582700225664574e-06, "loss": 0.5019, "step": 4548 }, { "epoch": 0.73, "grad_norm": 0.9886830241969943, "learning_rate": 1.7562835573793585e-06, "loss": 0.6152, "step": 4549 }, { "epoch": 0.73, "grad_norm": 0.9918944738179931, "learning_rate": 1.7542979758700323e-06, "loss": 0.5511, "step": 4550 }, { "epoch": 0.73, "grad_norm": 1.0186850525783768, "learning_rate": 1.752313278579404e-06, "loss": 0.6056, "step": 4551 }, { "epoch": 0.73, "grad_norm": 1.0692293347575916, "learning_rate": 1.7503294660481635e-06, "loss": 0.5979, "step": 4552 }, { "epoch": 0.73, "grad_norm": 0.9605800372580544, "learning_rate": 1.7483465388167542e-06, "loss": 0.5034, "step": 4553 }, { "epoch": 0.73, "grad_norm": 1.0168235329214348, "learning_rate": 1.746364497425378e-06, "loss": 0.623, "step": 4554 }, { "epoch": 0.73, "grad_norm": 0.975057260985416, "learning_rate": 1.744383342414e-06, "loss": 0.4495, "step": 4555 }, { "epoch": 0.73, "grad_norm": 1.0235361254140782, "learning_rate": 1.7424030743223424e-06, "loss": 0.6545, "step": 4556 }, { "epoch": 0.73, "grad_norm": 1.0008824798564044, "learning_rate": 1.7404236936898837e-06, "loss": 0.5508, "step": 4557 }, { "epoch": 0.73, "grad_norm": 1.0077385296832553, "learning_rate": 1.7384452010558605e-06, "loss": 0.5707, "step": 4558 }, { "epoch": 0.73, "grad_norm": 1.0186551325364035, "learning_rate": 1.736467596959271e-06, "loss": 0.5517, "step": 4559 }, { "epoch": 0.73, "grad_norm": 1.03883226524103, "learning_rate": 1.7344908819388678e-06, "loss": 0.5843, "step": 4560 }, { "epoch": 0.73, "grad_norm": 1.033411049254337, "learning_rate": 1.7325150565331612e-06, "loss": 0.5348, "step": 4561 }, { "epoch": 0.74, "grad_norm": 0.987120115160067, "learning_rate": 1.7305401212804235e-06, "loss": 0.5503, "step": 4562 }, { "epoch": 0.74, "grad_norm": 1.0726670284041682, "learning_rate": 1.7285660767186778e-06, "loss": 0.6184, "step": 4563 }, { "epoch": 0.74, "grad_norm": 0.910549684606902, "learning_rate": 1.7265929233857125e-06, "loss": 0.5881, "step": 4564 }, { "epoch": 0.74, "grad_norm": 1.0959819749300486, "learning_rate": 1.7246206618190676e-06, "loss": 0.6323, "step": 4565 }, { "epoch": 0.74, "grad_norm": 0.6986199185576496, "learning_rate": 1.7226492925560385e-06, "loss": 0.4331, "step": 4566 }, { "epoch": 0.74, "grad_norm": 1.0016474581240205, "learning_rate": 1.7206788161336825e-06, "loss": 0.6044, "step": 4567 }, { "epoch": 0.74, "grad_norm": 1.053930163379281, "learning_rate": 1.7187092330888155e-06, "loss": 0.5755, "step": 4568 }, { "epoch": 0.74, "grad_norm": 0.8222728565654627, "learning_rate": 1.7167405439579994e-06, "loss": 0.4907, "step": 4569 }, { "epoch": 0.74, "grad_norm": 1.015326467079108, "learning_rate": 1.714772749277563e-06, "loss": 0.649, "step": 4570 }, { "epoch": 0.74, "grad_norm": 0.6309767094193446, "learning_rate": 1.7128058495835886e-06, "loss": 0.4573, "step": 4571 }, { "epoch": 0.74, "grad_norm": 1.075567636986535, "learning_rate": 1.7108398454119134e-06, "loss": 0.6686, "step": 4572 }, { "epoch": 0.74, "grad_norm": 1.0369795383662215, "learning_rate": 1.7088747372981285e-06, "loss": 0.5756, "step": 4573 }, { "epoch": 0.74, "grad_norm": 1.0215088796316194, "learning_rate": 1.7069105257775859e-06, "loss": 0.5845, "step": 4574 }, { "epoch": 0.74, "grad_norm": 1.0149531918264814, "learning_rate": 1.7049472113853937e-06, "loss": 0.6007, "step": 4575 }, { "epoch": 0.74, "grad_norm": 0.9497023349984758, "learning_rate": 1.7029847946564066e-06, "loss": 0.5005, "step": 4576 }, { "epoch": 0.74, "grad_norm": 1.0430082551972177, "learning_rate": 1.7010232761252438e-06, "loss": 0.5598, "step": 4577 }, { "epoch": 0.74, "grad_norm": 1.074633601607668, "learning_rate": 1.6990626563262797e-06, "loss": 0.5296, "step": 4578 }, { "epoch": 0.74, "grad_norm": 1.0226885140224544, "learning_rate": 1.6971029357936385e-06, "loss": 0.6796, "step": 4579 }, { "epoch": 0.74, "grad_norm": 0.9326700438658629, "learning_rate": 1.6951441150612008e-06, "loss": 0.5227, "step": 4580 }, { "epoch": 0.74, "grad_norm": 1.0244232479068807, "learning_rate": 1.6931861946626066e-06, "loss": 0.5533, "step": 4581 }, { "epoch": 0.74, "grad_norm": 0.9938922024662841, "learning_rate": 1.6912291751312465e-06, "loss": 0.5854, "step": 4582 }, { "epoch": 0.74, "grad_norm": 1.0084104109247258, "learning_rate": 1.6892730570002635e-06, "loss": 0.5213, "step": 4583 }, { "epoch": 0.74, "grad_norm": 0.9822383826338872, "learning_rate": 1.6873178408025625e-06, "loss": 0.5351, "step": 4584 }, { "epoch": 0.74, "grad_norm": 1.0777081440874667, "learning_rate": 1.6853635270707947e-06, "loss": 0.6314, "step": 4585 }, { "epoch": 0.74, "grad_norm": 0.8765669779006965, "learning_rate": 1.6834101163373716e-06, "loss": 0.4682, "step": 4586 }, { "epoch": 0.74, "grad_norm": 1.0546422316207198, "learning_rate": 1.6814576091344559e-06, "loss": 0.6105, "step": 4587 }, { "epoch": 0.74, "grad_norm": 1.0460391994620626, "learning_rate": 1.6795060059939617e-06, "loss": 0.629, "step": 4588 }, { "epoch": 0.74, "grad_norm": 0.9760723416311925, "learning_rate": 1.6775553074475631e-06, "loss": 0.5809, "step": 4589 }, { "epoch": 0.74, "grad_norm": 1.004342744954546, "learning_rate": 1.6756055140266826e-06, "loss": 0.6542, "step": 4590 }, { "epoch": 0.74, "grad_norm": 0.9876713791277192, "learning_rate": 1.6736566262624959e-06, "loss": 0.6452, "step": 4591 }, { "epoch": 0.74, "grad_norm": 1.030443627761999, "learning_rate": 1.6717086446859354e-06, "loss": 0.5704, "step": 4592 }, { "epoch": 0.74, "grad_norm": 1.12360749664529, "learning_rate": 1.6697615698276864e-06, "loss": 0.5641, "step": 4593 }, { "epoch": 0.74, "grad_norm": 0.9941591519871712, "learning_rate": 1.6678154022181842e-06, "loss": 0.5799, "step": 4594 }, { "epoch": 0.74, "grad_norm": 0.9740282086805269, "learning_rate": 1.6658701423876172e-06, "loss": 0.5553, "step": 4595 }, { "epoch": 0.74, "grad_norm": 1.0068896513255563, "learning_rate": 1.6639257908659294e-06, "loss": 0.6338, "step": 4596 }, { "epoch": 0.74, "grad_norm": 1.0111096460758524, "learning_rate": 1.661982348182819e-06, "loss": 0.6418, "step": 4597 }, { "epoch": 0.74, "grad_norm": 0.9917540142360353, "learning_rate": 1.660039814867726e-06, "loss": 0.5822, "step": 4598 }, { "epoch": 0.74, "grad_norm": 0.9388540241978435, "learning_rate": 1.6580981914498545e-06, "loss": 0.5764, "step": 4599 }, { "epoch": 0.74, "grad_norm": 1.0216502354150883, "learning_rate": 1.6561574784581574e-06, "loss": 0.5439, "step": 4600 }, { "epoch": 0.74, "grad_norm": 1.0258042131043732, "learning_rate": 1.6542176764213364e-06, "loss": 0.6145, "step": 4601 }, { "epoch": 0.74, "grad_norm": 0.9457805795717893, "learning_rate": 1.6522787858678463e-06, "loss": 0.6158, "step": 4602 }, { "epoch": 0.74, "grad_norm": 1.0211130300977866, "learning_rate": 1.6503408073258964e-06, "loss": 0.5648, "step": 4603 }, { "epoch": 0.74, "grad_norm": 1.1836207721646184, "learning_rate": 1.648403741323445e-06, "loss": 0.5979, "step": 4604 }, { "epoch": 0.74, "grad_norm": 1.0726368798535384, "learning_rate": 1.6464675883881998e-06, "loss": 0.514, "step": 4605 }, { "epoch": 0.74, "grad_norm": 1.001707961364406, "learning_rate": 1.6445323490476255e-06, "loss": 0.6539, "step": 4606 }, { "epoch": 0.74, "grad_norm": 1.0060576450149683, "learning_rate": 1.6425980238289324e-06, "loss": 0.6151, "step": 4607 }, { "epoch": 0.74, "grad_norm": 1.0317817956918052, "learning_rate": 1.6406646132590865e-06, "loss": 0.5337, "step": 4608 }, { "epoch": 0.74, "grad_norm": 1.0965988725387565, "learning_rate": 1.6387321178648003e-06, "loss": 0.666, "step": 4609 }, { "epoch": 0.74, "grad_norm": 1.0306348325177166, "learning_rate": 1.6368005381725372e-06, "loss": 0.5865, "step": 4610 }, { "epoch": 0.74, "grad_norm": 1.0350629539880678, "learning_rate": 1.6348698747085168e-06, "loss": 0.6547, "step": 4611 }, { "epoch": 0.74, "grad_norm": 1.052627907632733, "learning_rate": 1.6329401279987023e-06, "loss": 0.5901, "step": 4612 }, { "epoch": 0.74, "grad_norm": 1.2953140377677184, "learning_rate": 1.6310112985688093e-06, "loss": 0.5436, "step": 4613 }, { "epoch": 0.74, "grad_norm": 1.0289040927026327, "learning_rate": 1.629083386944305e-06, "loss": 0.5794, "step": 4614 }, { "epoch": 0.74, "grad_norm": 1.0610051271619096, "learning_rate": 1.6271563936504082e-06, "loss": 0.5643, "step": 4615 }, { "epoch": 0.74, "grad_norm": 1.0501535680735308, "learning_rate": 1.6252303192120821e-06, "loss": 0.5454, "step": 4616 }, { "epoch": 0.74, "grad_norm": 1.0754897924423756, "learning_rate": 1.6233051641540415e-06, "loss": 0.6368, "step": 4617 }, { "epoch": 0.74, "grad_norm": 0.8599721851114317, "learning_rate": 1.6213809290007554e-06, "loss": 0.5357, "step": 4618 }, { "epoch": 0.74, "grad_norm": 1.007530512748564, "learning_rate": 1.6194576142764363e-06, "loss": 0.6103, "step": 4619 }, { "epoch": 0.74, "grad_norm": 0.9223225239681228, "learning_rate": 1.617535220505046e-06, "loss": 0.486, "step": 4620 }, { "epoch": 0.74, "grad_norm": 2.461727661840068, "learning_rate": 1.6156137482103006e-06, "loss": 0.6026, "step": 4621 }, { "epoch": 0.74, "grad_norm": 0.9702424794330411, "learning_rate": 1.6136931979156628e-06, "loss": 0.5598, "step": 4622 }, { "epoch": 0.74, "grad_norm": 1.0954686620276628, "learning_rate": 1.6117735701443416e-06, "loss": 0.5132, "step": 4623 }, { "epoch": 0.75, "grad_norm": 1.0320996486463314, "learning_rate": 1.6098548654192958e-06, "loss": 0.5218, "step": 4624 }, { "epoch": 0.75, "grad_norm": 0.9881557658220156, "learning_rate": 1.6079370842632358e-06, "loss": 0.6314, "step": 4625 }, { "epoch": 0.75, "grad_norm": 0.9993048510708834, "learning_rate": 1.606020227198617e-06, "loss": 0.5527, "step": 4626 }, { "epoch": 0.75, "grad_norm": 1.0049627443895865, "learning_rate": 1.604104294747642e-06, "loss": 0.6379, "step": 4627 }, { "epoch": 0.75, "grad_norm": 0.6653413309808434, "learning_rate": 1.6021892874322675e-06, "loss": 0.4696, "step": 4628 }, { "epoch": 0.75, "grad_norm": 0.9910164953967522, "learning_rate": 1.6002752057741906e-06, "loss": 0.5776, "step": 4629 }, { "epoch": 0.75, "grad_norm": 1.0135103755728023, "learning_rate": 1.5983620502948632e-06, "loss": 0.536, "step": 4630 }, { "epoch": 0.75, "grad_norm": 1.0056511059751256, "learning_rate": 1.59644982151548e-06, "loss": 0.547, "step": 4631 }, { "epoch": 0.75, "grad_norm": 1.0747398896875249, "learning_rate": 1.5945385199569836e-06, "loss": 0.5497, "step": 4632 }, { "epoch": 0.75, "grad_norm": 0.9650985332350642, "learning_rate": 1.5926281461400673e-06, "loss": 0.5782, "step": 4633 }, { "epoch": 0.75, "grad_norm": 1.0019111542824164, "learning_rate": 1.5907187005851676e-06, "loss": 0.539, "step": 4634 }, { "epoch": 0.75, "grad_norm": 0.6518307694193087, "learning_rate": 1.588810183812473e-06, "loss": 0.4661, "step": 4635 }, { "epoch": 0.75, "grad_norm": 1.0117046829158827, "learning_rate": 1.5869025963419122e-06, "loss": 0.6053, "step": 4636 }, { "epoch": 0.75, "grad_norm": 0.9912204196638209, "learning_rate": 1.584995938693169e-06, "loss": 0.6106, "step": 4637 }, { "epoch": 0.75, "grad_norm": 1.034026816954276, "learning_rate": 1.5830902113856667e-06, "loss": 0.5606, "step": 4638 }, { "epoch": 0.75, "grad_norm": 1.0038063957306251, "learning_rate": 1.581185414938577e-06, "loss": 0.6445, "step": 4639 }, { "epoch": 0.75, "grad_norm": 1.0960056877163022, "learning_rate": 1.579281549870822e-06, "loss": 0.6455, "step": 4640 }, { "epoch": 0.75, "grad_norm": 1.0493224612790566, "learning_rate": 1.5773786167010657e-06, "loss": 0.636, "step": 4641 }, { "epoch": 0.75, "grad_norm": 1.0104172329272194, "learning_rate": 1.5754766159477174e-06, "loss": 0.5466, "step": 4642 }, { "epoch": 0.75, "grad_norm": 0.9364076472764195, "learning_rate": 1.5735755481289371e-06, "loss": 0.5073, "step": 4643 }, { "epoch": 0.75, "grad_norm": 0.6921175689223544, "learning_rate": 1.5716754137626284e-06, "loss": 0.5014, "step": 4644 }, { "epoch": 0.75, "grad_norm": 1.0571832740687772, "learning_rate": 1.5697762133664396e-06, "loss": 0.5495, "step": 4645 }, { "epoch": 0.75, "grad_norm": 1.0705032073965264, "learning_rate": 1.5678779474577633e-06, "loss": 0.6357, "step": 4646 }, { "epoch": 0.75, "grad_norm": 1.0527774804630468, "learning_rate": 1.5659806165537428e-06, "loss": 0.5679, "step": 4647 }, { "epoch": 0.75, "grad_norm": 0.6439222756368792, "learning_rate": 1.5640842211712614e-06, "loss": 0.4629, "step": 4648 }, { "epoch": 0.75, "grad_norm": 1.0353919131271123, "learning_rate": 1.5621887618269481e-06, "loss": 0.6412, "step": 4649 }, { "epoch": 0.75, "grad_norm": 0.9652472075259956, "learning_rate": 1.5602942390371817e-06, "loss": 0.5282, "step": 4650 }, { "epoch": 0.75, "grad_norm": 1.0112878234701226, "learning_rate": 1.558400653318079e-06, "loss": 0.5504, "step": 4651 }, { "epoch": 0.75, "grad_norm": 1.1112490357883558, "learning_rate": 1.556508005185508e-06, "loss": 0.618, "step": 4652 }, { "epoch": 0.75, "grad_norm": 1.031563008682259, "learning_rate": 1.5546162951550759e-06, "loss": 0.6235, "step": 4653 }, { "epoch": 0.75, "grad_norm": 1.100984084546513, "learning_rate": 1.5527255237421384e-06, "loss": 0.5266, "step": 4654 }, { "epoch": 0.75, "grad_norm": 0.9949047889332807, "learning_rate": 1.5508356914617933e-06, "loss": 0.5041, "step": 4655 }, { "epoch": 0.75, "grad_norm": 0.6986522745554307, "learning_rate": 1.5489467988288809e-06, "loss": 0.4694, "step": 4656 }, { "epoch": 0.75, "grad_norm": 1.0619180370878778, "learning_rate": 1.5470588463579906e-06, "loss": 0.6401, "step": 4657 }, { "epoch": 0.75, "grad_norm": 0.9846845786639264, "learning_rate": 1.5451718345634503e-06, "loss": 0.5767, "step": 4658 }, { "epoch": 0.75, "grad_norm": 1.0556847420811397, "learning_rate": 1.5432857639593362e-06, "loss": 0.567, "step": 4659 }, { "epoch": 0.75, "grad_norm": 1.0182353760342577, "learning_rate": 1.5414006350594658e-06, "loss": 0.5491, "step": 4660 }, { "epoch": 0.75, "grad_norm": 1.1140348348767246, "learning_rate": 1.5395164483773966e-06, "loss": 0.6311, "step": 4661 }, { "epoch": 0.75, "grad_norm": 1.0450967558161928, "learning_rate": 1.537633204426438e-06, "loss": 0.5454, "step": 4662 }, { "epoch": 0.75, "grad_norm": 0.9866874873012441, "learning_rate": 1.535750903719635e-06, "loss": 0.5485, "step": 4663 }, { "epoch": 0.75, "grad_norm": 0.9911353208078594, "learning_rate": 1.5338695467697767e-06, "loss": 0.5321, "step": 4664 }, { "epoch": 0.75, "grad_norm": 1.1003559874522615, "learning_rate": 1.531989134089399e-06, "loss": 0.5864, "step": 4665 }, { "epoch": 0.75, "grad_norm": 0.9172789887628691, "learning_rate": 1.5301096661907782e-06, "loss": 0.4962, "step": 4666 }, { "epoch": 0.75, "grad_norm": 0.9851700993836898, "learning_rate": 1.5282311435859325e-06, "loss": 0.6045, "step": 4667 }, { "epoch": 0.75, "grad_norm": 1.031288476652932, "learning_rate": 1.526353566786622e-06, "loss": 0.572, "step": 4668 }, { "epoch": 0.75, "grad_norm": 0.9060648684565664, "learning_rate": 1.5244769363043527e-06, "loss": 0.5392, "step": 4669 }, { "epoch": 0.75, "grad_norm": 0.9992479866474782, "learning_rate": 1.5226012526503698e-06, "loss": 0.5973, "step": 4670 }, { "epoch": 0.75, "grad_norm": 1.0179862322703919, "learning_rate": 1.5207265163356588e-06, "loss": 0.538, "step": 4671 }, { "epoch": 0.75, "grad_norm": 1.0951226527434563, "learning_rate": 1.5188527278709514e-06, "loss": 0.5914, "step": 4672 }, { "epoch": 0.75, "grad_norm": 0.6836788356717511, "learning_rate": 1.5169798877667207e-06, "loss": 0.4824, "step": 4673 }, { "epoch": 0.75, "grad_norm": 1.11121861056405, "learning_rate": 1.5151079965331788e-06, "loss": 0.5995, "step": 4674 }, { "epoch": 0.75, "grad_norm": 0.985113203331552, "learning_rate": 1.5132370546802794e-06, "loss": 0.6253, "step": 4675 }, { "epoch": 0.75, "grad_norm": 1.0118519168362552, "learning_rate": 1.5113670627177202e-06, "loss": 0.6672, "step": 4676 }, { "epoch": 0.75, "grad_norm": 1.039865145090428, "learning_rate": 1.5094980211549382e-06, "loss": 0.6125, "step": 4677 }, { "epoch": 0.75, "grad_norm": 1.1086955962485365, "learning_rate": 1.5076299305011095e-06, "loss": 0.5682, "step": 4678 }, { "epoch": 0.75, "grad_norm": 0.9723495519802594, "learning_rate": 1.5057627912651574e-06, "loss": 0.5224, "step": 4679 }, { "epoch": 0.75, "grad_norm": 1.091586004249404, "learning_rate": 1.5038966039557384e-06, "loss": 0.6349, "step": 4680 }, { "epoch": 0.75, "grad_norm": 1.0099828131721313, "learning_rate": 1.502031369081257e-06, "loss": 0.5339, "step": 4681 }, { "epoch": 0.75, "grad_norm": 1.0287233837077, "learning_rate": 1.5001670871498524e-06, "loss": 0.5469, "step": 4682 }, { "epoch": 0.75, "grad_norm": 0.9446446320678786, "learning_rate": 1.4983037586694055e-06, "loss": 0.5334, "step": 4683 }, { "epoch": 0.75, "grad_norm": 0.9429833480287023, "learning_rate": 1.4964413841475412e-06, "loss": 0.607, "step": 4684 }, { "epoch": 0.75, "grad_norm": 1.0510871424396153, "learning_rate": 1.4945799640916198e-06, "loss": 0.5784, "step": 4685 }, { "epoch": 0.76, "grad_norm": 0.9808270452977982, "learning_rate": 1.4927194990087418e-06, "loss": 0.536, "step": 4686 }, { "epoch": 0.76, "grad_norm": 1.0326406840165137, "learning_rate": 1.4908599894057512e-06, "loss": 0.6012, "step": 4687 }, { "epoch": 0.76, "grad_norm": 1.0092856723731785, "learning_rate": 1.489001435789233e-06, "loss": 0.5617, "step": 4688 }, { "epoch": 0.76, "grad_norm": 1.0651477063697676, "learning_rate": 1.4871438386655018e-06, "loss": 0.585, "step": 4689 }, { "epoch": 0.76, "grad_norm": 1.1943771231322091, "learning_rate": 1.4852871985406208e-06, "loss": 0.5912, "step": 4690 }, { "epoch": 0.76, "grad_norm": 1.0801611439199188, "learning_rate": 1.4834315159203927e-06, "loss": 0.6097, "step": 4691 }, { "epoch": 0.76, "grad_norm": 1.0415334332116688, "learning_rate": 1.481576791310354e-06, "loss": 0.5874, "step": 4692 }, { "epoch": 0.76, "grad_norm": 0.9298210419573935, "learning_rate": 1.4797230252157818e-06, "loss": 0.5302, "step": 4693 }, { "epoch": 0.76, "grad_norm": 1.1297748903233178, "learning_rate": 1.4778702181416949e-06, "loss": 0.5628, "step": 4694 }, { "epoch": 0.76, "grad_norm": 0.9763905336524379, "learning_rate": 1.4760183705928499e-06, "loss": 0.5381, "step": 4695 }, { "epoch": 0.76, "grad_norm": 1.0617805259257787, "learning_rate": 1.4741674830737396e-06, "loss": 0.5564, "step": 4696 }, { "epoch": 0.76, "grad_norm": 1.0433474367308533, "learning_rate": 1.472317556088596e-06, "loss": 0.6013, "step": 4697 }, { "epoch": 0.76, "grad_norm": 0.9424849864818384, "learning_rate": 1.4704685901413928e-06, "loss": 0.6186, "step": 4698 }, { "epoch": 0.76, "grad_norm": 1.0660765502687504, "learning_rate": 1.468620585735837e-06, "loss": 0.5698, "step": 4699 }, { "epoch": 0.76, "grad_norm": 0.9741464983824553, "learning_rate": 1.466773543375376e-06, "loss": 0.4499, "step": 4700 }, { "epoch": 0.76, "grad_norm": 1.0339087777584084, "learning_rate": 1.4649274635631972e-06, "loss": 0.6527, "step": 4701 }, { "epoch": 0.76, "grad_norm": 1.034828524824342, "learning_rate": 1.4630823468022204e-06, "loss": 0.5456, "step": 4702 }, { "epoch": 0.76, "grad_norm": 0.9564945923499976, "learning_rate": 1.4612381935951093e-06, "loss": 0.4925, "step": 4703 }, { "epoch": 0.76, "grad_norm": 0.9541351045378871, "learning_rate": 1.4593950044442612e-06, "loss": 0.6145, "step": 4704 }, { "epoch": 0.76, "grad_norm": 1.1259528414984303, "learning_rate": 1.4575527798518097e-06, "loss": 0.5822, "step": 4705 }, { "epoch": 0.76, "grad_norm": 1.129462630285597, "learning_rate": 1.4557115203196304e-06, "loss": 0.6138, "step": 4706 }, { "epoch": 0.76, "grad_norm": 1.047974713743179, "learning_rate": 1.453871226349332e-06, "loss": 0.5968, "step": 4707 }, { "epoch": 0.76, "grad_norm": 0.6920814329824146, "learning_rate": 1.45203189844226e-06, "loss": 0.5023, "step": 4708 }, { "epoch": 0.76, "grad_norm": 1.055623339586582, "learning_rate": 1.4501935370994985e-06, "loss": 0.5413, "step": 4709 }, { "epoch": 0.76, "grad_norm": 1.0422968758341848, "learning_rate": 1.4483561428218717e-06, "loss": 0.5834, "step": 4710 }, { "epoch": 0.76, "grad_norm": 1.0129936279358076, "learning_rate": 1.4465197161099305e-06, "loss": 0.6008, "step": 4711 }, { "epoch": 0.76, "grad_norm": 1.0713758779015725, "learning_rate": 1.4446842574639708e-06, "loss": 0.6416, "step": 4712 }, { "epoch": 0.76, "grad_norm": 0.979327402017095, "learning_rate": 1.4428497673840235e-06, "loss": 0.5835, "step": 4713 }, { "epoch": 0.76, "grad_norm": 1.0577682939072912, "learning_rate": 1.441016246369853e-06, "loss": 0.6223, "step": 4714 }, { "epoch": 0.76, "grad_norm": 1.044786129858887, "learning_rate": 1.4391836949209597e-06, "loss": 0.5502, "step": 4715 }, { "epoch": 0.76, "grad_norm": 1.0510924896776026, "learning_rate": 1.437352113536582e-06, "loss": 0.582, "step": 4716 }, { "epoch": 0.76, "grad_norm": 0.9759427130140866, "learning_rate": 1.435521502715697e-06, "loss": 0.5725, "step": 4717 }, { "epoch": 0.76, "grad_norm": 0.9927967192431314, "learning_rate": 1.4336918629570069e-06, "loss": 0.6091, "step": 4718 }, { "epoch": 0.76, "grad_norm": 1.0140102930957529, "learning_rate": 1.431863194758959e-06, "loss": 0.5954, "step": 4719 }, { "epoch": 0.76, "grad_norm": 1.080223294085055, "learning_rate": 1.4300354986197345e-06, "loss": 0.5361, "step": 4720 }, { "epoch": 0.76, "grad_norm": 0.6146691283157173, "learning_rate": 1.4282087750372475e-06, "loss": 0.4262, "step": 4721 }, { "epoch": 0.76, "grad_norm": 1.0592016983361388, "learning_rate": 1.4263830245091454e-06, "loss": 0.5903, "step": 4722 }, { "epoch": 0.76, "grad_norm": 1.0076396928016627, "learning_rate": 1.4245582475328156e-06, "loss": 0.6539, "step": 4723 }, { "epoch": 0.76, "grad_norm": 1.0053110030153154, "learning_rate": 1.4227344446053759e-06, "loss": 0.6438, "step": 4724 }, { "epoch": 0.76, "grad_norm": 1.0818826403837445, "learning_rate": 1.420911616223683e-06, "loss": 0.6361, "step": 4725 }, { "epoch": 0.76, "grad_norm": 1.1270284096979695, "learning_rate": 1.4190897628843242e-06, "loss": 0.6209, "step": 4726 }, { "epoch": 0.76, "grad_norm": 0.9876169412889974, "learning_rate": 1.4172688850836202e-06, "loss": 0.6053, "step": 4727 }, { "epoch": 0.76, "grad_norm": 1.0587138338951596, "learning_rate": 1.4154489833176322e-06, "loss": 0.488, "step": 4728 }, { "epoch": 0.76, "grad_norm": 1.0617718876916362, "learning_rate": 1.41363005808215e-06, "loss": 0.6007, "step": 4729 }, { "epoch": 0.76, "grad_norm": 0.642851754740661, "learning_rate": 1.4118121098726972e-06, "loss": 0.4287, "step": 4730 }, { "epoch": 0.76, "grad_norm": 0.9811546491786569, "learning_rate": 1.4099951391845345e-06, "loss": 0.5174, "step": 4731 }, { "epoch": 0.76, "grad_norm": 0.9804370804678393, "learning_rate": 1.408179146512657e-06, "loss": 0.4698, "step": 4732 }, { "epoch": 0.76, "grad_norm": 1.0558167756053627, "learning_rate": 1.4063641323517886e-06, "loss": 0.6615, "step": 4733 }, { "epoch": 0.76, "grad_norm": 1.0553494180830536, "learning_rate": 1.4045500971963882e-06, "loss": 0.6089, "step": 4734 }, { "epoch": 0.76, "grad_norm": 0.9199044935692631, "learning_rate": 1.4027370415406528e-06, "loss": 0.543, "step": 4735 }, { "epoch": 0.76, "grad_norm": 0.9600058626192374, "learning_rate": 1.4009249658785058e-06, "loss": 0.5626, "step": 4736 }, { "epoch": 0.76, "grad_norm": 1.0085699880186747, "learning_rate": 1.399113870703605e-06, "loss": 0.5655, "step": 4737 }, { "epoch": 0.76, "grad_norm": 1.0819619886971028, "learning_rate": 1.3973037565093455e-06, "loss": 0.6312, "step": 4738 }, { "epoch": 0.76, "grad_norm": 1.0033080411648958, "learning_rate": 1.395494623788855e-06, "loss": 0.5609, "step": 4739 }, { "epoch": 0.76, "grad_norm": 1.0100537800487672, "learning_rate": 1.3936864730349842e-06, "loss": 0.6115, "step": 4740 }, { "epoch": 0.76, "grad_norm": 1.0955957047143055, "learning_rate": 1.3918793047403268e-06, "loss": 0.6378, "step": 4741 }, { "epoch": 0.76, "grad_norm": 0.9682088035205518, "learning_rate": 1.3900731193972073e-06, "loss": 0.5358, "step": 4742 }, { "epoch": 0.76, "grad_norm": 1.1086444138545544, "learning_rate": 1.3882679174976777e-06, "loss": 0.5694, "step": 4743 }, { "epoch": 0.76, "grad_norm": 1.0744788218048613, "learning_rate": 1.386463699533524e-06, "loss": 0.62, "step": 4744 }, { "epoch": 0.76, "grad_norm": 0.9546603740743992, "learning_rate": 1.3846604659962676e-06, "loss": 0.5554, "step": 4745 }, { "epoch": 0.76, "grad_norm": 1.0208412539181988, "learning_rate": 1.3828582173771576e-06, "loss": 0.5437, "step": 4746 }, { "epoch": 0.76, "grad_norm": 0.990630020751945, "learning_rate": 1.3810569541671754e-06, "loss": 0.522, "step": 4747 }, { "epoch": 0.77, "grad_norm": 0.953695675484812, "learning_rate": 1.3792566768570364e-06, "loss": 0.5056, "step": 4748 }, { "epoch": 0.77, "grad_norm": 0.9512996204025755, "learning_rate": 1.3774573859371842e-06, "loss": 0.5249, "step": 4749 }, { "epoch": 0.77, "grad_norm": 1.0491823377594638, "learning_rate": 1.3756590818977972e-06, "loss": 0.5555, "step": 4750 }, { "epoch": 0.77, "grad_norm": 0.6900097313162797, "learning_rate": 1.37386176522878e-06, "loss": 0.4798, "step": 4751 }, { "epoch": 0.77, "grad_norm": 1.0441613387976707, "learning_rate": 1.372065436419775e-06, "loss": 0.5971, "step": 4752 }, { "epoch": 0.77, "grad_norm": 0.9901339001285573, "learning_rate": 1.3702700959601483e-06, "loss": 0.5175, "step": 4753 }, { "epoch": 0.77, "grad_norm": 1.0070733461256531, "learning_rate": 1.368475744339003e-06, "loss": 0.6231, "step": 4754 }, { "epoch": 0.77, "grad_norm": 1.0273213000445216, "learning_rate": 1.366682382045168e-06, "loss": 0.6205, "step": 4755 }, { "epoch": 0.77, "grad_norm": 0.9223905965180255, "learning_rate": 1.364890009567204e-06, "loss": 0.5709, "step": 4756 }, { "epoch": 0.77, "grad_norm": 0.9979029804802128, "learning_rate": 1.3630986273934054e-06, "loss": 0.5988, "step": 4757 }, { "epoch": 0.77, "grad_norm": 1.0688403940386748, "learning_rate": 1.3613082360117924e-06, "loss": 0.5789, "step": 4758 }, { "epoch": 0.77, "grad_norm": 1.0226584745375502, "learning_rate": 1.3595188359101152e-06, "loss": 0.5907, "step": 4759 }, { "epoch": 0.77, "grad_norm": 0.998762771165808, "learning_rate": 1.357730427575858e-06, "loss": 0.5086, "step": 4760 }, { "epoch": 0.77, "grad_norm": 1.101812334761135, "learning_rate": 1.3559430114962345e-06, "loss": 0.5884, "step": 4761 }, { "epoch": 0.77, "grad_norm": 1.1066168158737948, "learning_rate": 1.3541565881581815e-06, "loss": 0.5909, "step": 4762 }, { "epoch": 0.77, "grad_norm": 0.6466744174642701, "learning_rate": 1.3523711580483717e-06, "loss": 0.4377, "step": 4763 }, { "epoch": 0.77, "grad_norm": 1.0994280606255211, "learning_rate": 1.350586721653207e-06, "loss": 0.5579, "step": 4764 }, { "epoch": 0.77, "grad_norm": 1.0602953577208425, "learning_rate": 1.3488032794588168e-06, "loss": 0.6064, "step": 4765 }, { "epoch": 0.77, "grad_norm": 1.152778111492181, "learning_rate": 1.347020831951057e-06, "loss": 0.6428, "step": 4766 }, { "epoch": 0.77, "grad_norm": 1.1205416009889089, "learning_rate": 1.3452393796155194e-06, "loss": 0.5771, "step": 4767 }, { "epoch": 0.77, "grad_norm": 0.9762382449955147, "learning_rate": 1.343458922937519e-06, "loss": 0.4884, "step": 4768 }, { "epoch": 0.77, "grad_norm": 1.049547374661361, "learning_rate": 1.341679462402099e-06, "loss": 0.5797, "step": 4769 }, { "epoch": 0.77, "grad_norm": 1.1160523816536585, "learning_rate": 1.3399009984940376e-06, "loss": 0.6091, "step": 4770 }, { "epoch": 0.77, "grad_norm": 0.9077240129334275, "learning_rate": 1.338123531697834e-06, "loss": 0.5351, "step": 4771 }, { "epoch": 0.77, "grad_norm": 0.9700415414759279, "learning_rate": 1.3363470624977221e-06, "loss": 0.5269, "step": 4772 }, { "epoch": 0.77, "grad_norm": 1.0629540853488928, "learning_rate": 1.3345715913776575e-06, "loss": 0.529, "step": 4773 }, { "epoch": 0.77, "grad_norm": 1.1110386214513914, "learning_rate": 1.3327971188213317e-06, "loss": 0.52, "step": 4774 }, { "epoch": 0.77, "grad_norm": 1.0340044516619724, "learning_rate": 1.3310236453121562e-06, "loss": 0.5697, "step": 4775 }, { "epoch": 0.77, "grad_norm": 1.1617930378848875, "learning_rate": 1.3292511713332767e-06, "loss": 0.6597, "step": 4776 }, { "epoch": 0.77, "grad_norm": 1.0218908291623725, "learning_rate": 1.3274796973675629e-06, "loss": 0.5914, "step": 4777 }, { "epoch": 0.77, "grad_norm": 0.9801214601429244, "learning_rate": 1.3257092238976122e-06, "loss": 0.5198, "step": 4778 }, { "epoch": 0.77, "grad_norm": 1.0268192281898316, "learning_rate": 1.3239397514057523e-06, "loss": 0.5969, "step": 4779 }, { "epoch": 0.77, "grad_norm": 1.0939345103262594, "learning_rate": 1.3221712803740356e-06, "loss": 0.5738, "step": 4780 }, { "epoch": 0.77, "grad_norm": 0.9043580143327139, "learning_rate": 1.3204038112842404e-06, "loss": 0.5433, "step": 4781 }, { "epoch": 0.77, "grad_norm": 1.0705442305321387, "learning_rate": 1.3186373446178757e-06, "loss": 0.6263, "step": 4782 }, { "epoch": 0.77, "grad_norm": 0.9372205255549791, "learning_rate": 1.3168718808561793e-06, "loss": 0.5278, "step": 4783 }, { "epoch": 0.77, "grad_norm": 1.0727860235877, "learning_rate": 1.3151074204801046e-06, "loss": 0.5905, "step": 4784 }, { "epoch": 0.77, "grad_norm": 0.9957230085026105, "learning_rate": 1.313343963970344e-06, "loss": 0.4986, "step": 4785 }, { "epoch": 0.77, "grad_norm": 1.021198510233983, "learning_rate": 1.3115815118073116e-06, "loss": 0.5299, "step": 4786 }, { "epoch": 0.77, "grad_norm": 1.0195905084845667, "learning_rate": 1.3098200644711478e-06, "loss": 0.6474, "step": 4787 }, { "epoch": 0.77, "grad_norm": 1.0291298947187988, "learning_rate": 1.3080596224417174e-06, "loss": 0.5757, "step": 4788 }, { "epoch": 0.77, "grad_norm": 1.0861047473881578, "learning_rate": 1.3063001861986162e-06, "loss": 0.5584, "step": 4789 }, { "epoch": 0.77, "grad_norm": 1.0322124145542402, "learning_rate": 1.3045417562211616e-06, "loss": 0.6033, "step": 4790 }, { "epoch": 0.77, "grad_norm": 0.9895760867156055, "learning_rate": 1.3027843329883972e-06, "loss": 0.5136, "step": 4791 }, { "epoch": 0.77, "grad_norm": 1.0368715567197082, "learning_rate": 1.3010279169790947e-06, "loss": 0.5749, "step": 4792 }, { "epoch": 0.77, "grad_norm": 0.9821782597806226, "learning_rate": 1.2992725086717518e-06, "loss": 0.5896, "step": 4793 }, { "epoch": 0.77, "grad_norm": 0.9734868166777456, "learning_rate": 1.2975181085445887e-06, "loss": 0.5706, "step": 4794 }, { "epoch": 0.77, "grad_norm": 1.0566996243198126, "learning_rate": 1.2957647170755504e-06, "loss": 0.6214, "step": 4795 }, { "epoch": 0.77, "grad_norm": 1.0052147943887189, "learning_rate": 1.2940123347423133e-06, "loss": 0.5403, "step": 4796 }, { "epoch": 0.77, "grad_norm": 1.0247135774526586, "learning_rate": 1.292260962022272e-06, "loss": 0.6127, "step": 4797 }, { "epoch": 0.77, "grad_norm": 0.9880507581468722, "learning_rate": 1.2905105993925477e-06, "loss": 0.5573, "step": 4798 }, { "epoch": 0.77, "grad_norm": 1.0055752594790681, "learning_rate": 1.2887612473299905e-06, "loss": 0.6452, "step": 4799 }, { "epoch": 0.77, "grad_norm": 0.9570762270499491, "learning_rate": 1.2870129063111685e-06, "loss": 0.5762, "step": 4800 }, { "epoch": 0.77, "grad_norm": 1.1612245717039151, "learning_rate": 1.2852655768123811e-06, "loss": 0.5618, "step": 4801 }, { "epoch": 0.77, "grad_norm": 1.0780271235733663, "learning_rate": 1.2835192593096485e-06, "loss": 0.6211, "step": 4802 }, { "epoch": 0.77, "grad_norm": 1.0272610365790378, "learning_rate": 1.2817739542787134e-06, "loss": 0.6028, "step": 4803 }, { "epoch": 0.77, "grad_norm": 0.9308020187175112, "learning_rate": 1.2800296621950463e-06, "loss": 0.5242, "step": 4804 }, { "epoch": 0.77, "grad_norm": 0.9996328125949665, "learning_rate": 1.2782863835338444e-06, "loss": 0.646, "step": 4805 }, { "epoch": 0.77, "grad_norm": 1.0582797614044683, "learning_rate": 1.2765441187700179e-06, "loss": 0.6205, "step": 4806 }, { "epoch": 0.77, "grad_norm": 0.9939757887695724, "learning_rate": 1.2748028683782115e-06, "loss": 0.5454, "step": 4807 }, { "epoch": 0.77, "grad_norm": 1.0809741304944729, "learning_rate": 1.2730626328327906e-06, "loss": 0.5349, "step": 4808 }, { "epoch": 0.77, "grad_norm": 1.016857539652991, "learning_rate": 1.2713234126078423e-06, "loss": 0.6498, "step": 4809 }, { "epoch": 0.77, "grad_norm": 0.9763743942256143, "learning_rate": 1.2695852081771758e-06, "loss": 0.5509, "step": 4810 }, { "epoch": 0.78, "grad_norm": 0.897441652388817, "learning_rate": 1.267848020014329e-06, "loss": 0.4787, "step": 4811 }, { "epoch": 0.78, "grad_norm": 1.0361103215170073, "learning_rate": 1.266111848592561e-06, "loss": 0.5771, "step": 4812 }, { "epoch": 0.78, "grad_norm": 0.9979941579886407, "learning_rate": 1.2643766943848484e-06, "loss": 0.6079, "step": 4813 }, { "epoch": 0.78, "grad_norm": 1.1683656780290095, "learning_rate": 1.2626425578638973e-06, "loss": 0.6393, "step": 4814 }, { "epoch": 0.78, "grad_norm": 1.164056577321666, "learning_rate": 1.2609094395021354e-06, "loss": 0.5224, "step": 4815 }, { "epoch": 0.78, "grad_norm": 0.9992703714677759, "learning_rate": 1.259177339771711e-06, "loss": 0.5495, "step": 4816 }, { "epoch": 0.78, "grad_norm": 0.6615808699624711, "learning_rate": 1.257446259144494e-06, "loss": 0.4415, "step": 4817 }, { "epoch": 0.78, "grad_norm": 1.1307706345043902, "learning_rate": 1.2557161980920824e-06, "loss": 0.6388, "step": 4818 }, { "epoch": 0.78, "grad_norm": 1.0865392636521165, "learning_rate": 1.2539871570857893e-06, "loss": 0.614, "step": 4819 }, { "epoch": 0.78, "grad_norm": 1.0310294755539249, "learning_rate": 1.252259136596653e-06, "loss": 0.5445, "step": 4820 }, { "epoch": 0.78, "grad_norm": 1.003256213122981, "learning_rate": 1.250532137095436e-06, "loss": 0.5397, "step": 4821 }, { "epoch": 0.78, "grad_norm": 1.0131353005072248, "learning_rate": 1.2488061590526185e-06, "loss": 0.5955, "step": 4822 }, { "epoch": 0.78, "grad_norm": 1.1267250074824153, "learning_rate": 1.2470812029384071e-06, "loss": 0.571, "step": 4823 }, { "epoch": 0.78, "grad_norm": 1.0569334713592216, "learning_rate": 1.2453572692227257e-06, "loss": 0.5973, "step": 4824 }, { "epoch": 0.78, "grad_norm": 1.090326288810078, "learning_rate": 1.2436343583752197e-06, "loss": 0.5725, "step": 4825 }, { "epoch": 0.78, "grad_norm": 1.0397862221096719, "learning_rate": 1.2419124708652607e-06, "loss": 0.5649, "step": 4826 }, { "epoch": 0.78, "grad_norm": 1.013949833800736, "learning_rate": 1.2401916071619374e-06, "loss": 0.5968, "step": 4827 }, { "epoch": 0.78, "grad_norm": 0.7003614548727249, "learning_rate": 1.2384717677340585e-06, "loss": 0.4832, "step": 4828 }, { "epoch": 0.78, "grad_norm": 1.1993203312056389, "learning_rate": 1.2367529530501571e-06, "loss": 0.6199, "step": 4829 }, { "epoch": 0.78, "grad_norm": 1.0570309506820976, "learning_rate": 1.2350351635784875e-06, "loss": 0.6219, "step": 4830 }, { "epoch": 0.78, "grad_norm": 0.9740990567289188, "learning_rate": 1.2333183997870207e-06, "loss": 0.6379, "step": 4831 }, { "epoch": 0.78, "grad_norm": 0.9126363079168041, "learning_rate": 1.2316026621434502e-06, "loss": 0.494, "step": 4832 }, { "epoch": 0.78, "grad_norm": 1.0477745684724922, "learning_rate": 1.2298879511151906e-06, "loss": 0.5694, "step": 4833 }, { "epoch": 0.78, "grad_norm": 1.0782862611213115, "learning_rate": 1.2281742671693798e-06, "loss": 0.6169, "step": 4834 }, { "epoch": 0.78, "grad_norm": 1.0301941379476045, "learning_rate": 1.2264616107728666e-06, "loss": 0.5389, "step": 4835 }, { "epoch": 0.78, "grad_norm": 0.9593152225838427, "learning_rate": 1.2247499823922287e-06, "loss": 0.5486, "step": 4836 }, { "epoch": 0.78, "grad_norm": 1.0404751963798367, "learning_rate": 1.2230393824937631e-06, "loss": 0.5849, "step": 4837 }, { "epoch": 0.78, "grad_norm": 1.041562605162502, "learning_rate": 1.2213298115434812e-06, "loss": 0.5269, "step": 4838 }, { "epoch": 0.78, "grad_norm": 1.0538988111924779, "learning_rate": 1.219621270007118e-06, "loss": 0.6464, "step": 4839 }, { "epoch": 0.78, "grad_norm": 1.0371658609559797, "learning_rate": 1.2179137583501282e-06, "loss": 0.623, "step": 4840 }, { "epoch": 0.78, "grad_norm": 0.9447912804203711, "learning_rate": 1.2162072770376848e-06, "loss": 0.4805, "step": 4841 }, { "epoch": 0.78, "grad_norm": 1.111514848970632, "learning_rate": 1.2145018265346786e-06, "loss": 0.5777, "step": 4842 }, { "epoch": 0.78, "grad_norm": 0.9769974053682706, "learning_rate": 1.2127974073057241e-06, "loss": 0.5377, "step": 4843 }, { "epoch": 0.78, "grad_norm": 0.9862941487838945, "learning_rate": 1.2110940198151489e-06, "loss": 0.603, "step": 4844 }, { "epoch": 0.78, "grad_norm": 1.0130055728344844, "learning_rate": 1.2093916645270066e-06, "loss": 0.5406, "step": 4845 }, { "epoch": 0.78, "grad_norm": 1.0765615085256808, "learning_rate": 1.2076903419050629e-06, "loss": 0.6312, "step": 4846 }, { "epoch": 0.78, "grad_norm": 1.0232266963208256, "learning_rate": 1.2059900524128048e-06, "loss": 0.5312, "step": 4847 }, { "epoch": 0.78, "grad_norm": 0.9982036253533321, "learning_rate": 1.2042907965134404e-06, "loss": 0.6082, "step": 4848 }, { "epoch": 0.78, "grad_norm": 1.0435942369917213, "learning_rate": 1.2025925746698918e-06, "loss": 0.6235, "step": 4849 }, { "epoch": 0.78, "grad_norm": 0.9442277551631802, "learning_rate": 1.200895387344801e-06, "loss": 0.493, "step": 4850 }, { "epoch": 0.78, "grad_norm": 1.0198683266561288, "learning_rate": 1.1991992350005294e-06, "loss": 0.6288, "step": 4851 }, { "epoch": 0.78, "grad_norm": 1.0030860410892632, "learning_rate": 1.1975041180991576e-06, "loss": 0.5933, "step": 4852 }, { "epoch": 0.78, "grad_norm": 0.9540299322996658, "learning_rate": 1.19581003710248e-06, "loss": 0.5504, "step": 4853 }, { "epoch": 0.78, "grad_norm": 0.9785886419739728, "learning_rate": 1.1941169924720103e-06, "loss": 0.518, "step": 4854 }, { "epoch": 0.78, "grad_norm": 1.0224131136273988, "learning_rate": 1.1924249846689835e-06, "loss": 0.5245, "step": 4855 }, { "epoch": 0.78, "grad_norm": 0.942766505277974, "learning_rate": 1.1907340141543466e-06, "loss": 0.5372, "step": 4856 }, { "epoch": 0.78, "grad_norm": 1.137324298188351, "learning_rate": 1.189044081388766e-06, "loss": 0.6422, "step": 4857 }, { "epoch": 0.78, "grad_norm": 0.6170500544403583, "learning_rate": 1.1873551868326272e-06, "loss": 0.4597, "step": 4858 }, { "epoch": 0.78, "grad_norm": 0.970180802878914, "learning_rate": 1.185667330946033e-06, "loss": 0.5872, "step": 4859 }, { "epoch": 0.78, "grad_norm": 0.9432730718589339, "learning_rate": 1.1839805141888012e-06, "loss": 0.507, "step": 4860 }, { "epoch": 0.78, "grad_norm": 0.998595924766067, "learning_rate": 1.1822947370204647e-06, "loss": 0.5216, "step": 4861 }, { "epoch": 0.78, "grad_norm": 1.0692182074604981, "learning_rate": 1.180609999900279e-06, "loss": 0.5224, "step": 4862 }, { "epoch": 0.78, "grad_norm": 1.1028190461750578, "learning_rate": 1.1789263032872112e-06, "loss": 0.596, "step": 4863 }, { "epoch": 0.78, "grad_norm": 0.972959880949075, "learning_rate": 1.1772436476399456e-06, "loss": 0.5082, "step": 4864 }, { "epoch": 0.78, "grad_norm": 1.035319628683588, "learning_rate": 1.1755620334168866e-06, "loss": 0.6499, "step": 4865 }, { "epoch": 0.78, "grad_norm": 1.0631279209034292, "learning_rate": 1.1738814610761496e-06, "loss": 0.6126, "step": 4866 }, { "epoch": 0.78, "grad_norm": 1.0435218505550952, "learning_rate": 1.1722019310755717e-06, "loss": 0.6054, "step": 4867 }, { "epoch": 0.78, "grad_norm": 0.9892401393099272, "learning_rate": 1.1705234438727015e-06, "loss": 0.5468, "step": 4868 }, { "epoch": 0.78, "grad_norm": 1.087570974149394, "learning_rate": 1.1688459999248042e-06, "loss": 0.6364, "step": 4869 }, { "epoch": 0.78, "grad_norm": 1.0356677646484551, "learning_rate": 1.1671695996888637e-06, "loss": 0.551, "step": 4870 }, { "epoch": 0.78, "grad_norm": 0.9773303193903512, "learning_rate": 1.165494243621576e-06, "loss": 0.4945, "step": 4871 }, { "epoch": 0.78, "grad_norm": 1.0469000872910896, "learning_rate": 1.1638199321793563e-06, "loss": 0.6011, "step": 4872 }, { "epoch": 0.79, "grad_norm": 0.6385754600794116, "learning_rate": 1.1621466658183306e-06, "loss": 0.4718, "step": 4873 }, { "epoch": 0.79, "grad_norm": 0.9796142423707253, "learning_rate": 1.1604744449943455e-06, "loss": 0.6125, "step": 4874 }, { "epoch": 0.79, "grad_norm": 0.9449258643922108, "learning_rate": 1.1588032701629592e-06, "loss": 0.4749, "step": 4875 }, { "epoch": 0.79, "grad_norm": 0.9889890888196375, "learning_rate": 1.1571331417794435e-06, "loss": 0.6487, "step": 4876 }, { "epoch": 0.79, "grad_norm": 1.0570777451080675, "learning_rate": 1.1554640602987905e-06, "loss": 0.587, "step": 4877 }, { "epoch": 0.79, "grad_norm": 0.9952627243540655, "learning_rate": 1.153796026175703e-06, "loss": 0.5791, "step": 4878 }, { "epoch": 0.79, "grad_norm": 0.9907503735854721, "learning_rate": 1.1521290398645978e-06, "loss": 0.5526, "step": 4879 }, { "epoch": 0.79, "grad_norm": 1.0404907860076669, "learning_rate": 1.150463101819609e-06, "loss": 0.6021, "step": 4880 }, { "epoch": 0.79, "grad_norm": 0.9774002395645606, "learning_rate": 1.1487982124945861e-06, "loss": 0.4974, "step": 4881 }, { "epoch": 0.79, "grad_norm": 1.0297807045282714, "learning_rate": 1.147134372343089e-06, "loss": 0.5757, "step": 4882 }, { "epoch": 0.79, "grad_norm": 1.1197887877110793, "learning_rate": 1.1454715818183927e-06, "loss": 0.6369, "step": 4883 }, { "epoch": 0.79, "grad_norm": 0.9772213488602425, "learning_rate": 1.1438098413734888e-06, "loss": 0.4955, "step": 4884 }, { "epoch": 0.79, "grad_norm": 1.0546868775547618, "learning_rate": 1.142149151461081e-06, "loss": 0.5916, "step": 4885 }, { "epoch": 0.79, "grad_norm": 1.0184133691652397, "learning_rate": 1.1404895125335859e-06, "loss": 0.5497, "step": 4886 }, { "epoch": 0.79, "grad_norm": 1.0336824100110868, "learning_rate": 1.1388309250431363e-06, "loss": 0.614, "step": 4887 }, { "epoch": 0.79, "grad_norm": 0.9875244583500011, "learning_rate": 1.1371733894415748e-06, "loss": 0.5408, "step": 4888 }, { "epoch": 0.79, "grad_norm": 1.1588956728125823, "learning_rate": 1.1355169061804632e-06, "loss": 0.6184, "step": 4889 }, { "epoch": 0.79, "grad_norm": 0.994247924798844, "learning_rate": 1.1338614757110706e-06, "loss": 0.6087, "step": 4890 }, { "epoch": 0.79, "grad_norm": 1.047226355969621, "learning_rate": 1.1322070984843837e-06, "loss": 0.5653, "step": 4891 }, { "epoch": 0.79, "grad_norm": 1.0332391159342798, "learning_rate": 1.1305537749510993e-06, "loss": 0.6379, "step": 4892 }, { "epoch": 0.79, "grad_norm": 0.9083993973695406, "learning_rate": 1.128901505561627e-06, "loss": 0.5347, "step": 4893 }, { "epoch": 0.79, "grad_norm": 1.0579780289118221, "learning_rate": 1.1272502907660937e-06, "loss": 0.5138, "step": 4894 }, { "epoch": 0.79, "grad_norm": 0.9773930014324707, "learning_rate": 1.1256001310143327e-06, "loss": 0.5644, "step": 4895 }, { "epoch": 0.79, "grad_norm": 0.6356582770548203, "learning_rate": 1.1239510267558962e-06, "loss": 0.4536, "step": 4896 }, { "epoch": 0.79, "grad_norm": 0.9905671215844932, "learning_rate": 1.1223029784400436e-06, "loss": 0.5845, "step": 4897 }, { "epoch": 0.79, "grad_norm": 1.0013842924128515, "learning_rate": 1.120655986515748e-06, "loss": 0.5283, "step": 4898 }, { "epoch": 0.79, "grad_norm": 0.9990999749162804, "learning_rate": 1.1190100514316977e-06, "loss": 0.5798, "step": 4899 }, { "epoch": 0.79, "grad_norm": 0.9457899880933173, "learning_rate": 1.1173651736362889e-06, "loss": 0.5252, "step": 4900 }, { "epoch": 0.79, "grad_norm": 0.9810971111077363, "learning_rate": 1.1157213535776312e-06, "loss": 0.5067, "step": 4901 }, { "epoch": 0.79, "grad_norm": 0.9905835598361677, "learning_rate": 1.114078591703548e-06, "loss": 0.5389, "step": 4902 }, { "epoch": 0.79, "grad_norm": 0.9524603670153281, "learning_rate": 1.1124368884615748e-06, "loss": 0.5458, "step": 4903 }, { "epoch": 0.79, "grad_norm": 1.0509611355104258, "learning_rate": 1.1107962442989518e-06, "loss": 0.5741, "step": 4904 }, { "epoch": 0.79, "grad_norm": 1.0205756768538135, "learning_rate": 1.1091566596626384e-06, "loss": 0.5731, "step": 4905 }, { "epoch": 0.79, "grad_norm": 0.9603002272771002, "learning_rate": 1.1075181349993042e-06, "loss": 0.583, "step": 4906 }, { "epoch": 0.79, "grad_norm": 0.9621613892360988, "learning_rate": 1.1058806707553266e-06, "loss": 0.5466, "step": 4907 }, { "epoch": 0.79, "grad_norm": 0.9918935232987662, "learning_rate": 1.104244267376795e-06, "loss": 0.6225, "step": 4908 }, { "epoch": 0.79, "grad_norm": 1.0730732088409445, "learning_rate": 1.1026089253095134e-06, "loss": 0.6161, "step": 4909 }, { "epoch": 0.79, "grad_norm": 1.037998289469748, "learning_rate": 1.1009746449989916e-06, "loss": 0.6627, "step": 4910 }, { "epoch": 0.79, "grad_norm": 1.0111287066663015, "learning_rate": 1.0993414268904552e-06, "loss": 0.5532, "step": 4911 }, { "epoch": 0.79, "grad_norm": 1.0122731097177788, "learning_rate": 1.0977092714288345e-06, "loss": 0.5719, "step": 4912 }, { "epoch": 0.79, "grad_norm": 0.9441848115210496, "learning_rate": 1.0960781790587776e-06, "loss": 0.6116, "step": 4913 }, { "epoch": 0.79, "grad_norm": 0.9630579437179908, "learning_rate": 1.0944481502246368e-06, "loss": 0.5264, "step": 4914 }, { "epoch": 0.79, "grad_norm": 1.118526695402412, "learning_rate": 1.0928191853704757e-06, "loss": 0.5988, "step": 4915 }, { "epoch": 0.79, "grad_norm": 1.0739731711401403, "learning_rate": 1.0911912849400712e-06, "loss": 0.6152, "step": 4916 }, { "epoch": 0.79, "grad_norm": 0.9748309955896621, "learning_rate": 1.089564449376907e-06, "loss": 0.5271, "step": 4917 }, { "epoch": 0.79, "grad_norm": 1.0871552072922026, "learning_rate": 1.0879386791241797e-06, "loss": 0.6041, "step": 4918 }, { "epoch": 0.79, "grad_norm": 0.9902936428489488, "learning_rate": 1.086313974624793e-06, "loss": 0.5552, "step": 4919 }, { "epoch": 0.79, "grad_norm": 0.9883066875537733, "learning_rate": 1.0846903363213595e-06, "loss": 0.5691, "step": 4920 }, { "epoch": 0.79, "grad_norm": 0.9851862367287978, "learning_rate": 1.083067764656206e-06, "loss": 0.5583, "step": 4921 }, { "epoch": 0.79, "grad_norm": 0.6792032706664342, "learning_rate": 1.0814462600713642e-06, "loss": 0.4752, "step": 4922 }, { "epoch": 0.79, "grad_norm": 1.0477393794242218, "learning_rate": 1.0798258230085756e-06, "loss": 0.6159, "step": 4923 }, { "epoch": 0.79, "grad_norm": 1.090374123848842, "learning_rate": 1.078206453909293e-06, "loss": 0.6091, "step": 4924 }, { "epoch": 0.79, "grad_norm": 1.082930731788072, "learning_rate": 1.0765881532146793e-06, "loss": 0.5768, "step": 4925 }, { "epoch": 0.79, "grad_norm": 1.011558509102789, "learning_rate": 1.0749709213656001e-06, "loss": 0.6002, "step": 4926 }, { "epoch": 0.79, "grad_norm": 1.0534283111384604, "learning_rate": 1.0733547588026355e-06, "loss": 0.6246, "step": 4927 }, { "epoch": 0.79, "grad_norm": 1.0503463487545455, "learning_rate": 1.071739665966075e-06, "loss": 0.5964, "step": 4928 }, { "epoch": 0.79, "grad_norm": 0.952565066555294, "learning_rate": 1.0701256432959123e-06, "loss": 0.4964, "step": 4929 }, { "epoch": 0.79, "grad_norm": 1.0351319118407494, "learning_rate": 1.0685126912318511e-06, "loss": 0.5809, "step": 4930 }, { "epoch": 0.79, "grad_norm": 1.0375116794214496, "learning_rate": 1.0669008102133044e-06, "loss": 0.6148, "step": 4931 }, { "epoch": 0.79, "grad_norm": 1.0185609557380317, "learning_rate": 1.0652900006793953e-06, "loss": 0.5386, "step": 4932 }, { "epoch": 0.79, "grad_norm": 1.0583794530259187, "learning_rate": 1.0636802630689508e-06, "loss": 0.6591, "step": 4933 }, { "epoch": 0.79, "grad_norm": 0.9618537872001663, "learning_rate": 1.0620715978205066e-06, "loss": 0.4807, "step": 4934 }, { "epoch": 0.8, "grad_norm": 1.0793447640657676, "learning_rate": 1.0604640053723098e-06, "loss": 0.5949, "step": 4935 }, { "epoch": 0.8, "grad_norm": 0.9692603450797759, "learning_rate": 1.058857486162312e-06, "loss": 0.4843, "step": 4936 }, { "epoch": 0.8, "grad_norm": 0.9515855143046564, "learning_rate": 1.0572520406281716e-06, "loss": 0.4982, "step": 4937 }, { "epoch": 0.8, "grad_norm": 0.8428090222497984, "learning_rate": 1.0556476692072598e-06, "loss": 0.4593, "step": 4938 }, { "epoch": 0.8, "grad_norm": 1.1489433698340852, "learning_rate": 1.0540443723366478e-06, "loss": 0.6145, "step": 4939 }, { "epoch": 0.8, "grad_norm": 1.040249326547989, "learning_rate": 1.0524421504531212e-06, "loss": 0.5862, "step": 4940 }, { "epoch": 0.8, "grad_norm": 1.1446455674362987, "learning_rate": 1.0508410039931683e-06, "loss": 0.5761, "step": 4941 }, { "epoch": 0.8, "grad_norm": 1.0084965514507758, "learning_rate": 1.0492409333929833e-06, "loss": 0.5588, "step": 4942 }, { "epoch": 0.8, "grad_norm": 1.0031684686432332, "learning_rate": 1.0476419390884723e-06, "loss": 0.5603, "step": 4943 }, { "epoch": 0.8, "grad_norm": 0.9861083791968547, "learning_rate": 1.046044021515245e-06, "loss": 0.5194, "step": 4944 }, { "epoch": 0.8, "grad_norm": 0.9840869949261994, "learning_rate": 1.0444471811086166e-06, "loss": 0.5077, "step": 4945 }, { "epoch": 0.8, "grad_norm": 1.0438772357655342, "learning_rate": 1.0428514183036109e-06, "loss": 0.5302, "step": 4946 }, { "epoch": 0.8, "grad_norm": 0.6761246331243677, "learning_rate": 1.0412567335349616e-06, "loss": 0.5009, "step": 4947 }, { "epoch": 0.8, "grad_norm": 1.0899582308742042, "learning_rate": 1.0396631272370982e-06, "loss": 0.5148, "step": 4948 }, { "epoch": 0.8, "grad_norm": 0.9175558586235654, "learning_rate": 1.0380705998441664e-06, "loss": 0.4998, "step": 4949 }, { "epoch": 0.8, "grad_norm": 1.0027869980696893, "learning_rate": 1.0364791517900164e-06, "loss": 0.5917, "step": 4950 }, { "epoch": 0.8, "grad_norm": 1.0236817363530604, "learning_rate": 1.0348887835082e-06, "loss": 0.5375, "step": 4951 }, { "epoch": 0.8, "grad_norm": 1.1092562323388573, "learning_rate": 1.0332994954319763e-06, "loss": 0.6763, "step": 4952 }, { "epoch": 0.8, "grad_norm": 1.096851544727364, "learning_rate": 1.031711287994313e-06, "loss": 0.6057, "step": 4953 }, { "epoch": 0.8, "grad_norm": 1.0081914274933619, "learning_rate": 1.0301241616278845e-06, "loss": 0.5634, "step": 4954 }, { "epoch": 0.8, "grad_norm": 0.9401353287228967, "learning_rate": 1.0285381167650615e-06, "loss": 0.5424, "step": 4955 }, { "epoch": 0.8, "grad_norm": 1.081096666706904, "learning_rate": 1.0269531538379295e-06, "loss": 0.6202, "step": 4956 }, { "epoch": 0.8, "grad_norm": 1.0396890556432319, "learning_rate": 1.0253692732782778e-06, "loss": 0.5984, "step": 4957 }, { "epoch": 0.8, "grad_norm": 1.036942196192119, "learning_rate": 1.0237864755175969e-06, "loss": 0.6014, "step": 4958 }, { "epoch": 0.8, "grad_norm": 0.9657725184447055, "learning_rate": 1.022204760987084e-06, "loss": 0.531, "step": 4959 }, { "epoch": 0.8, "grad_norm": 0.6186170350549213, "learning_rate": 1.0206241301176439e-06, "loss": 0.5075, "step": 4960 }, { "epoch": 0.8, "grad_norm": 0.9992049887019192, "learning_rate": 1.0190445833398814e-06, "loss": 0.6213, "step": 4961 }, { "epoch": 0.8, "grad_norm": 1.0269592697199725, "learning_rate": 1.0174661210841119e-06, "loss": 0.6366, "step": 4962 }, { "epoch": 0.8, "grad_norm": 1.0058328402061356, "learning_rate": 1.0158887437803499e-06, "loss": 0.6275, "step": 4963 }, { "epoch": 0.8, "grad_norm": 1.0966748109436368, "learning_rate": 1.0143124518583158e-06, "loss": 0.6268, "step": 4964 }, { "epoch": 0.8, "grad_norm": 1.1438911659012767, "learning_rate": 1.012737245747437e-06, "loss": 0.631, "step": 4965 }, { "epoch": 0.8, "grad_norm": 1.1132784331554506, "learning_rate": 1.0111631258768416e-06, "loss": 0.6037, "step": 4966 }, { "epoch": 0.8, "grad_norm": 0.995544545072958, "learning_rate": 1.0095900926753632e-06, "loss": 0.5652, "step": 4967 }, { "epoch": 0.8, "grad_norm": 1.0700174365312556, "learning_rate": 1.0080181465715394e-06, "loss": 0.5622, "step": 4968 }, { "epoch": 0.8, "grad_norm": 1.0168888905909792, "learning_rate": 1.0064472879936132e-06, "loss": 0.5884, "step": 4969 }, { "epoch": 0.8, "grad_norm": 0.9641316570333548, "learning_rate": 1.0048775173695285e-06, "loss": 0.4881, "step": 4970 }, { "epoch": 0.8, "grad_norm": 0.9930317705232964, "learning_rate": 1.0033088351269338e-06, "loss": 0.583, "step": 4971 }, { "epoch": 0.8, "grad_norm": 0.9227064522184814, "learning_rate": 1.0017412416931826e-06, "loss": 0.5044, "step": 4972 }, { "epoch": 0.8, "grad_norm": 0.9721127347044944, "learning_rate": 1.0001747374953297e-06, "loss": 0.5121, "step": 4973 }, { "epoch": 0.8, "grad_norm": 0.9301788476217522, "learning_rate": 9.986093229601328e-07, "loss": 0.463, "step": 4974 }, { "epoch": 0.8, "grad_norm": 1.0340793639657742, "learning_rate": 9.970449985140557e-07, "loss": 0.5298, "step": 4975 }, { "epoch": 0.8, "grad_norm": 1.0368418654616984, "learning_rate": 9.954817645832654e-07, "loss": 0.6128, "step": 4976 }, { "epoch": 0.8, "grad_norm": 1.1220250053136023, "learning_rate": 9.939196215936253e-07, "loss": 0.6609, "step": 4977 }, { "epoch": 0.8, "grad_norm": 0.8750649879507977, "learning_rate": 9.923585699707084e-07, "loss": 0.534, "step": 4978 }, { "epoch": 0.8, "grad_norm": 1.0870802803605275, "learning_rate": 9.907986101397898e-07, "loss": 0.6635, "step": 4979 }, { "epoch": 0.8, "grad_norm": 1.0473413486167582, "learning_rate": 9.892397425258437e-07, "loss": 0.6698, "step": 4980 }, { "epoch": 0.8, "grad_norm": 1.0635177373556555, "learning_rate": 9.876819675535477e-07, "loss": 0.6301, "step": 4981 }, { "epoch": 0.8, "grad_norm": 0.9979297387600963, "learning_rate": 9.861252856472857e-07, "loss": 0.5618, "step": 4982 }, { "epoch": 0.8, "grad_norm": 0.6278419211672942, "learning_rate": 9.845696972311385e-07, "loss": 0.4588, "step": 4983 }, { "epoch": 0.8, "grad_norm": 0.9704510060528942, "learning_rate": 9.830152027288907e-07, "loss": 0.5604, "step": 4984 }, { "epoch": 0.8, "grad_norm": 0.9323322040649142, "learning_rate": 9.81461802564032e-07, "loss": 0.4842, "step": 4985 }, { "epoch": 0.8, "grad_norm": 0.9540889196475997, "learning_rate": 9.799094971597483e-07, "loss": 0.5627, "step": 4986 }, { "epoch": 0.8, "grad_norm": 0.9578975960902553, "learning_rate": 9.783582869389336e-07, "loss": 0.5684, "step": 4987 }, { "epoch": 0.8, "grad_norm": 0.9808323851785565, "learning_rate": 9.768081723241785e-07, "loss": 0.5474, "step": 4988 }, { "epoch": 0.8, "grad_norm": 1.030864255329231, "learning_rate": 9.752591537377758e-07, "loss": 0.5895, "step": 4989 }, { "epoch": 0.8, "grad_norm": 0.9439998640126077, "learning_rate": 9.737112316017221e-07, "loss": 0.5285, "step": 4990 }, { "epoch": 0.8, "grad_norm": 0.9901448231053444, "learning_rate": 9.72164406337716e-07, "loss": 0.5821, "step": 4991 }, { "epoch": 0.8, "grad_norm": 1.011354174543654, "learning_rate": 9.706186783671535e-07, "loss": 0.6013, "step": 4992 }, { "epoch": 0.8, "grad_norm": 0.9880411345959761, "learning_rate": 9.69074048111132e-07, "loss": 0.5288, "step": 4993 }, { "epoch": 0.8, "grad_norm": 0.9702355136333534, "learning_rate": 9.675305159904546e-07, "loss": 0.5088, "step": 4994 }, { "epoch": 0.8, "grad_norm": 1.0492935030882344, "learning_rate": 9.659880824256202e-07, "loss": 0.6256, "step": 4995 }, { "epoch": 0.8, "grad_norm": 1.1357593399319108, "learning_rate": 9.644467478368286e-07, "loss": 0.6289, "step": 4996 }, { "epoch": 0.81, "grad_norm": 1.1038472585163972, "learning_rate": 9.629065126439842e-07, "loss": 0.6172, "step": 4997 }, { "epoch": 0.81, "grad_norm": 1.054392538073765, "learning_rate": 9.61367377266691e-07, "loss": 0.6636, "step": 4998 }, { "epoch": 0.81, "grad_norm": 0.9564277456754303, "learning_rate": 9.59829342124247e-07, "loss": 0.5632, "step": 4999 }, { "epoch": 0.81, "grad_norm": 1.0717058676526376, "learning_rate": 9.582924076356587e-07, "loss": 0.6295, "step": 5000 }, { "epoch": 0.81, "grad_norm": 1.0453968510410017, "learning_rate": 9.5675657421963e-07, "loss": 0.5734, "step": 5001 }, { "epoch": 0.81, "grad_norm": 1.0753582593412454, "learning_rate": 9.552218422945636e-07, "loss": 0.6152, "step": 5002 }, { "epoch": 0.81, "grad_norm": 1.0153285747586902, "learning_rate": 9.536882122785602e-07, "loss": 0.5109, "step": 5003 }, { "epoch": 0.81, "grad_norm": 1.0645105974326663, "learning_rate": 9.52155684589427e-07, "loss": 0.5695, "step": 5004 }, { "epoch": 0.81, "grad_norm": 1.083354189531401, "learning_rate": 9.506242596446641e-07, "loss": 0.5537, "step": 5005 }, { "epoch": 0.81, "grad_norm": 1.0327720156730036, "learning_rate": 9.490939378614739e-07, "loss": 0.6127, "step": 5006 }, { "epoch": 0.81, "grad_norm": 0.9695052713525548, "learning_rate": 9.4756471965676e-07, "loss": 0.5779, "step": 5007 }, { "epoch": 0.81, "grad_norm": 0.6269649875165874, "learning_rate": 9.46036605447121e-07, "loss": 0.4677, "step": 5008 }, { "epoch": 0.81, "grad_norm": 0.9839377671206231, "learning_rate": 9.445095956488604e-07, "loss": 0.6018, "step": 5009 }, { "epoch": 0.81, "grad_norm": 1.0219459590451694, "learning_rate": 9.42983690677975e-07, "loss": 0.6332, "step": 5010 }, { "epoch": 0.81, "grad_norm": 0.9418588904057287, "learning_rate": 9.414588909501654e-07, "loss": 0.5481, "step": 5011 }, { "epoch": 0.81, "grad_norm": 1.0355557497987793, "learning_rate": 9.399351968808285e-07, "loss": 0.6182, "step": 5012 }, { "epoch": 0.81, "grad_norm": 1.0228155541236919, "learning_rate": 9.384126088850592e-07, "loss": 0.6801, "step": 5013 }, { "epoch": 0.81, "grad_norm": 1.0183066234612663, "learning_rate": 9.368911273776543e-07, "loss": 0.539, "step": 5014 }, { "epoch": 0.81, "grad_norm": 1.055635608981327, "learning_rate": 9.35370752773106e-07, "loss": 0.6091, "step": 5015 }, { "epoch": 0.81, "grad_norm": 1.068706245982093, "learning_rate": 9.338514854856073e-07, "loss": 0.5586, "step": 5016 }, { "epoch": 0.81, "grad_norm": 1.03503789870056, "learning_rate": 9.323333259290484e-07, "loss": 0.6088, "step": 5017 }, { "epoch": 0.81, "grad_norm": 1.0312428312048019, "learning_rate": 9.308162745170163e-07, "loss": 0.5213, "step": 5018 }, { "epoch": 0.81, "grad_norm": 1.0199959250533983, "learning_rate": 9.293003316627985e-07, "loss": 0.51, "step": 5019 }, { "epoch": 0.81, "grad_norm": 1.07047894711648, "learning_rate": 9.277854977793827e-07, "loss": 0.5867, "step": 5020 }, { "epoch": 0.81, "grad_norm": 1.0438064132575007, "learning_rate": 9.262717732794457e-07, "loss": 0.5915, "step": 5021 }, { "epoch": 0.81, "grad_norm": 1.1378786486287622, "learning_rate": 9.247591585753707e-07, "loss": 0.6518, "step": 5022 }, { "epoch": 0.81, "grad_norm": 1.0239993100400713, "learning_rate": 9.232476540792367e-07, "loss": 0.578, "step": 5023 }, { "epoch": 0.81, "grad_norm": 1.0379823234146377, "learning_rate": 9.217372602028185e-07, "loss": 0.5991, "step": 5024 }, { "epoch": 0.81, "grad_norm": 1.0185060104501789, "learning_rate": 9.202279773575873e-07, "loss": 0.5003, "step": 5025 }, { "epoch": 0.81, "grad_norm": 1.025473491498839, "learning_rate": 9.187198059547153e-07, "loss": 0.5897, "step": 5026 }, { "epoch": 0.81, "grad_norm": 1.0339441141660415, "learning_rate": 9.172127464050701e-07, "loss": 0.5715, "step": 5027 }, { "epoch": 0.81, "grad_norm": 1.0518444237270355, "learning_rate": 9.157067991192137e-07, "loss": 0.6409, "step": 5028 }, { "epoch": 0.81, "grad_norm": 1.111267927420634, "learning_rate": 9.14201964507409e-07, "loss": 0.5596, "step": 5029 }, { "epoch": 0.81, "grad_norm": 1.1076686365906552, "learning_rate": 9.126982429796172e-07, "loss": 0.607, "step": 5030 }, { "epoch": 0.81, "grad_norm": 1.0227393228347113, "learning_rate": 9.111956349454904e-07, "loss": 0.5185, "step": 5031 }, { "epoch": 0.81, "grad_norm": 1.0110920105080827, "learning_rate": 9.0969414081438e-07, "loss": 0.5662, "step": 5032 }, { "epoch": 0.81, "grad_norm": 0.9992000953081548, "learning_rate": 9.081937609953367e-07, "loss": 0.5491, "step": 5033 }, { "epoch": 0.81, "grad_norm": 1.0841741332390904, "learning_rate": 9.066944958971046e-07, "loss": 0.613, "step": 5034 }, { "epoch": 0.81, "grad_norm": 0.919752911436365, "learning_rate": 9.051963459281232e-07, "loss": 0.5204, "step": 5035 }, { "epoch": 0.81, "grad_norm": 1.0950299932441798, "learning_rate": 9.03699311496532e-07, "loss": 0.6478, "step": 5036 }, { "epoch": 0.81, "grad_norm": 0.9942210681481561, "learning_rate": 9.022033930101625e-07, "loss": 0.594, "step": 5037 }, { "epoch": 0.81, "grad_norm": 1.0162820108552946, "learning_rate": 9.007085908765467e-07, "loss": 0.5698, "step": 5038 }, { "epoch": 0.81, "grad_norm": 1.0505083274964164, "learning_rate": 8.992149055029081e-07, "loss": 0.5232, "step": 5039 }, { "epoch": 0.81, "grad_norm": 0.9402461886859322, "learning_rate": 8.977223372961663e-07, "loss": 0.4646, "step": 5040 }, { "epoch": 0.81, "grad_norm": 1.0404755761060818, "learning_rate": 8.962308866629416e-07, "loss": 0.6223, "step": 5041 }, { "epoch": 0.81, "grad_norm": 1.017697041412758, "learning_rate": 8.947405540095444e-07, "loss": 0.567, "step": 5042 }, { "epoch": 0.81, "grad_norm": 1.0305988427828752, "learning_rate": 8.932513397419812e-07, "loss": 0.5982, "step": 5043 }, { "epoch": 0.81, "grad_norm": 1.0136835639863133, "learning_rate": 8.917632442659563e-07, "loss": 0.4931, "step": 5044 }, { "epoch": 0.81, "grad_norm": 0.9680439121932444, "learning_rate": 8.902762679868687e-07, "loss": 0.5431, "step": 5045 }, { "epoch": 0.81, "grad_norm": 0.7138367926788204, "learning_rate": 8.887904113098111e-07, "loss": 0.5283, "step": 5046 }, { "epoch": 0.81, "grad_norm": 1.0111051074043325, "learning_rate": 8.873056746395703e-07, "loss": 0.658, "step": 5047 }, { "epoch": 0.81, "grad_norm": 1.136741552687729, "learning_rate": 8.858220583806309e-07, "loss": 0.7093, "step": 5048 }, { "epoch": 0.81, "grad_norm": 1.0078540802622933, "learning_rate": 8.843395629371738e-07, "loss": 0.6422, "step": 5049 }, { "epoch": 0.81, "grad_norm": 1.0496875565093102, "learning_rate": 8.828581887130655e-07, "loss": 0.5896, "step": 5050 }, { "epoch": 0.81, "grad_norm": 1.0155197565061074, "learning_rate": 8.813779361118763e-07, "loss": 0.5786, "step": 5051 }, { "epoch": 0.81, "grad_norm": 1.1143675949384952, "learning_rate": 8.79898805536869e-07, "loss": 0.5729, "step": 5052 }, { "epoch": 0.81, "grad_norm": 1.0613994944710665, "learning_rate": 8.784207973909986e-07, "loss": 0.5842, "step": 5053 }, { "epoch": 0.81, "grad_norm": 1.0137764686236503, "learning_rate": 8.769439120769135e-07, "loss": 0.6061, "step": 5054 }, { "epoch": 0.81, "grad_norm": 1.027095287741836, "learning_rate": 8.754681499969608e-07, "loss": 0.5739, "step": 5055 }, { "epoch": 0.81, "grad_norm": 1.0379607098038166, "learning_rate": 8.739935115531772e-07, "loss": 0.6461, "step": 5056 }, { "epoch": 0.81, "grad_norm": 0.886098059639951, "learning_rate": 8.725199971472942e-07, "loss": 0.4745, "step": 5057 }, { "epoch": 0.81, "grad_norm": 1.0076398706435228, "learning_rate": 8.7104760718074e-07, "loss": 0.4842, "step": 5058 }, { "epoch": 0.82, "grad_norm": 1.0517277608006768, "learning_rate": 8.69576342054631e-07, "loss": 0.5611, "step": 5059 }, { "epoch": 0.82, "grad_norm": 1.0821786247663827, "learning_rate": 8.681062021697839e-07, "loss": 0.6009, "step": 5060 }, { "epoch": 0.82, "grad_norm": 1.0478631117812514, "learning_rate": 8.666371879267038e-07, "loss": 0.5638, "step": 5061 }, { "epoch": 0.82, "grad_norm": 1.075180113684228, "learning_rate": 8.65169299725589e-07, "loss": 0.6464, "step": 5062 }, { "epoch": 0.82, "grad_norm": 1.085654015984366, "learning_rate": 8.637025379663355e-07, "loss": 0.6513, "step": 5063 }, { "epoch": 0.82, "grad_norm": 1.0095910583274752, "learning_rate": 8.622369030485283e-07, "loss": 0.5331, "step": 5064 }, { "epoch": 0.82, "grad_norm": 0.9785614591180458, "learning_rate": 8.607723953714453e-07, "loss": 0.615, "step": 5065 }, { "epoch": 0.82, "grad_norm": 0.984089999876762, "learning_rate": 8.593090153340611e-07, "loss": 0.5328, "step": 5066 }, { "epoch": 0.82, "grad_norm": 0.9109170058534404, "learning_rate": 8.578467633350407e-07, "loss": 0.547, "step": 5067 }, { "epoch": 0.82, "grad_norm": 1.067273200051999, "learning_rate": 8.56385639772741e-07, "loss": 0.4914, "step": 5068 }, { "epoch": 0.82, "grad_norm": 0.9935404959291039, "learning_rate": 8.549256450452109e-07, "loss": 0.5732, "step": 5069 }, { "epoch": 0.82, "grad_norm": 1.0340418370817044, "learning_rate": 8.534667795501955e-07, "loss": 0.5325, "step": 5070 }, { "epoch": 0.82, "grad_norm": 0.9849397104151747, "learning_rate": 8.520090436851314e-07, "loss": 0.5574, "step": 5071 }, { "epoch": 0.82, "grad_norm": 1.0453074640754032, "learning_rate": 8.505524378471408e-07, "loss": 0.6129, "step": 5072 }, { "epoch": 0.82, "grad_norm": 0.9978611782867323, "learning_rate": 8.490969624330469e-07, "loss": 0.5705, "step": 5073 }, { "epoch": 0.82, "grad_norm": 0.9486073749000541, "learning_rate": 8.476426178393621e-07, "loss": 0.4886, "step": 5074 }, { "epoch": 0.82, "grad_norm": 1.039823271279133, "learning_rate": 8.461894044622882e-07, "loss": 0.5588, "step": 5075 }, { "epoch": 0.82, "grad_norm": 0.7071239991668153, "learning_rate": 8.447373226977201e-07, "loss": 0.4618, "step": 5076 }, { "epoch": 0.82, "grad_norm": 1.0809769701938214, "learning_rate": 8.432863729412466e-07, "loss": 0.5829, "step": 5077 }, { "epoch": 0.82, "grad_norm": 1.0280928437236523, "learning_rate": 8.418365555881458e-07, "loss": 0.5774, "step": 5078 }, { "epoch": 0.82, "grad_norm": 1.1143425231876194, "learning_rate": 8.403878710333868e-07, "loss": 0.5906, "step": 5079 }, { "epoch": 0.82, "grad_norm": 1.02656261952526, "learning_rate": 8.389403196716328e-07, "loss": 0.5758, "step": 5080 }, { "epoch": 0.82, "grad_norm": 1.1115519067503328, "learning_rate": 8.37493901897235e-07, "loss": 0.59, "step": 5081 }, { "epoch": 0.82, "grad_norm": 1.012005791942428, "learning_rate": 8.360486181042399e-07, "loss": 0.5591, "step": 5082 }, { "epoch": 0.82, "grad_norm": 1.1303983119964944, "learning_rate": 8.346044686863808e-07, "loss": 0.587, "step": 5083 }, { "epoch": 0.82, "grad_norm": 0.6726855383353931, "learning_rate": 8.331614540370836e-07, "loss": 0.4923, "step": 5084 }, { "epoch": 0.82, "grad_norm": 1.0479890656908446, "learning_rate": 8.317195745494666e-07, "loss": 0.6057, "step": 5085 }, { "epoch": 0.82, "grad_norm": 1.0277858972007798, "learning_rate": 8.302788306163373e-07, "loss": 0.6242, "step": 5086 }, { "epoch": 0.82, "grad_norm": 1.0169531995334915, "learning_rate": 8.288392226301917e-07, "loss": 0.6007, "step": 5087 }, { "epoch": 0.82, "grad_norm": 1.0837306325390166, "learning_rate": 8.27400750983221e-07, "loss": 0.5331, "step": 5088 }, { "epoch": 0.82, "grad_norm": 0.9746158882202549, "learning_rate": 8.259634160673052e-07, "loss": 0.556, "step": 5089 }, { "epoch": 0.82, "grad_norm": 1.0441735335881588, "learning_rate": 8.245272182740133e-07, "loss": 0.6728, "step": 5090 }, { "epoch": 0.82, "grad_norm": 1.0151290827942323, "learning_rate": 8.230921579946027e-07, "loss": 0.5634, "step": 5091 }, { "epoch": 0.82, "grad_norm": 1.0652104511619458, "learning_rate": 8.21658235620027e-07, "loss": 0.4907, "step": 5092 }, { "epoch": 0.82, "grad_norm": 1.1011730919467875, "learning_rate": 8.202254515409247e-07, "loss": 0.5704, "step": 5093 }, { "epoch": 0.82, "grad_norm": 0.9569429331107602, "learning_rate": 8.187938061476242e-07, "loss": 0.5973, "step": 5094 }, { "epoch": 0.82, "grad_norm": 1.0668874503317447, "learning_rate": 8.173632998301468e-07, "loss": 0.591, "step": 5095 }, { "epoch": 0.82, "grad_norm": 1.027311198385021, "learning_rate": 8.159339329782023e-07, "loss": 0.556, "step": 5096 }, { "epoch": 0.82, "grad_norm": 1.0591581798481031, "learning_rate": 8.145057059811895e-07, "loss": 0.603, "step": 5097 }, { "epoch": 0.82, "grad_norm": 1.050515987303814, "learning_rate": 8.130786192281947e-07, "loss": 0.5685, "step": 5098 }, { "epoch": 0.82, "grad_norm": 1.0928953415925775, "learning_rate": 8.116526731079982e-07, "loss": 0.6424, "step": 5099 }, { "epoch": 0.82, "grad_norm": 0.9548030843388295, "learning_rate": 8.102278680090664e-07, "loss": 0.5206, "step": 5100 }, { "epoch": 0.82, "grad_norm": 0.6348145776017352, "learning_rate": 8.088042043195538e-07, "loss": 0.461, "step": 5101 }, { "epoch": 0.82, "grad_norm": 1.0000483019142208, "learning_rate": 8.073816824273078e-07, "loss": 0.551, "step": 5102 }, { "epoch": 0.82, "grad_norm": 1.0372890415216258, "learning_rate": 8.059603027198609e-07, "loss": 0.5141, "step": 5103 }, { "epoch": 0.82, "grad_norm": 0.9271157683425791, "learning_rate": 8.045400655844382e-07, "loss": 0.4809, "step": 5104 }, { "epoch": 0.82, "grad_norm": 1.008115362066479, "learning_rate": 8.031209714079496e-07, "loss": 0.5326, "step": 5105 }, { "epoch": 0.82, "grad_norm": 0.9205227161731733, "learning_rate": 8.017030205769955e-07, "loss": 0.5881, "step": 5106 }, { "epoch": 0.82, "grad_norm": 0.977175772526508, "learning_rate": 8.002862134778661e-07, "loss": 0.5581, "step": 5107 }, { "epoch": 0.82, "grad_norm": 0.9667871281550026, "learning_rate": 7.988705504965372e-07, "loss": 0.6322, "step": 5108 }, { "epoch": 0.82, "grad_norm": 1.0106334631779426, "learning_rate": 7.974560320186759e-07, "loss": 0.5755, "step": 5109 }, { "epoch": 0.82, "grad_norm": 1.0057387657596193, "learning_rate": 7.960426584296338e-07, "loss": 0.541, "step": 5110 }, { "epoch": 0.82, "grad_norm": 0.9860136698805336, "learning_rate": 7.946304301144564e-07, "loss": 0.5262, "step": 5111 }, { "epoch": 0.82, "grad_norm": 1.0051271369597181, "learning_rate": 7.932193474578709e-07, "loss": 0.6064, "step": 5112 }, { "epoch": 0.82, "grad_norm": 1.0697768423670966, "learning_rate": 7.918094108442947e-07, "loss": 0.6358, "step": 5113 }, { "epoch": 0.82, "grad_norm": 1.039532736858145, "learning_rate": 7.904006206578358e-07, "loss": 0.5825, "step": 5114 }, { "epoch": 0.82, "grad_norm": 0.9846617040753357, "learning_rate": 7.889929772822857e-07, "loss": 0.5077, "step": 5115 }, { "epoch": 0.82, "grad_norm": 1.0837520996474137, "learning_rate": 7.875864811011247e-07, "loss": 0.6116, "step": 5116 }, { "epoch": 0.82, "grad_norm": 1.0520145630122697, "learning_rate": 7.861811324975221e-07, "loss": 0.6123, "step": 5117 }, { "epoch": 0.82, "grad_norm": 1.1221305956199863, "learning_rate": 7.847769318543346e-07, "loss": 0.6041, "step": 5118 }, { "epoch": 0.82, "grad_norm": 1.0052952155167758, "learning_rate": 7.833738795541046e-07, "loss": 0.5577, "step": 5119 }, { "epoch": 0.82, "grad_norm": 1.028942333888086, "learning_rate": 7.819719759790606e-07, "loss": 0.6149, "step": 5120 }, { "epoch": 0.83, "grad_norm": 1.0188849906539101, "learning_rate": 7.805712215111216e-07, "loss": 0.5782, "step": 5121 }, { "epoch": 0.83, "grad_norm": 1.0703155929777164, "learning_rate": 7.791716165318913e-07, "loss": 0.5724, "step": 5122 }, { "epoch": 0.83, "grad_norm": 1.0205935358746852, "learning_rate": 7.777731614226596e-07, "loss": 0.5549, "step": 5123 }, { "epoch": 0.83, "grad_norm": 1.0734011355527322, "learning_rate": 7.763758565644064e-07, "loss": 0.5708, "step": 5124 }, { "epoch": 0.83, "grad_norm": 1.0720785869308467, "learning_rate": 7.749797023377931e-07, "loss": 0.5936, "step": 5125 }, { "epoch": 0.83, "grad_norm": 0.990457673213393, "learning_rate": 7.735846991231738e-07, "loss": 0.5729, "step": 5126 }, { "epoch": 0.83, "grad_norm": 1.0477546517006748, "learning_rate": 7.721908473005829e-07, "loss": 0.5654, "step": 5127 }, { "epoch": 0.83, "grad_norm": 1.051014318195096, "learning_rate": 7.707981472497467e-07, "loss": 0.5449, "step": 5128 }, { "epoch": 0.83, "grad_norm": 1.115453626012281, "learning_rate": 7.694065993500732e-07, "loss": 0.6238, "step": 5129 }, { "epoch": 0.83, "grad_norm": 0.9984220253090474, "learning_rate": 7.680162039806588e-07, "loss": 0.5176, "step": 5130 }, { "epoch": 0.83, "grad_norm": 1.016093153709853, "learning_rate": 7.666269615202865e-07, "loss": 0.5862, "step": 5131 }, { "epoch": 0.83, "grad_norm": 1.0499406862219314, "learning_rate": 7.652388723474224e-07, "loss": 0.6143, "step": 5132 }, { "epoch": 0.83, "grad_norm": 0.9165837686305148, "learning_rate": 7.638519368402225e-07, "loss": 0.451, "step": 5133 }, { "epoch": 0.83, "grad_norm": 1.078373030892962, "learning_rate": 7.62466155376525e-07, "loss": 0.5965, "step": 5134 }, { "epoch": 0.83, "grad_norm": 1.0323207238559315, "learning_rate": 7.61081528333854e-07, "loss": 0.6433, "step": 5135 }, { "epoch": 0.83, "grad_norm": 0.9599229560634489, "learning_rate": 7.596980560894224e-07, "loss": 0.6164, "step": 5136 }, { "epoch": 0.83, "grad_norm": 0.9541945120732931, "learning_rate": 7.583157390201246e-07, "loss": 0.5529, "step": 5137 }, { "epoch": 0.83, "grad_norm": 0.9416686763491351, "learning_rate": 7.56934577502541e-07, "loss": 0.4379, "step": 5138 }, { "epoch": 0.83, "grad_norm": 0.9883795472859372, "learning_rate": 7.555545719129398e-07, "loss": 0.4941, "step": 5139 }, { "epoch": 0.83, "grad_norm": 0.9695803875549589, "learning_rate": 7.541757226272744e-07, "loss": 0.5427, "step": 5140 }, { "epoch": 0.83, "grad_norm": 0.9660900829162621, "learning_rate": 7.527980300211762e-07, "loss": 0.5015, "step": 5141 }, { "epoch": 0.83, "grad_norm": 0.9979269322415791, "learning_rate": 7.514214944699694e-07, "loss": 0.5207, "step": 5142 }, { "epoch": 0.83, "grad_norm": 1.012012508514017, "learning_rate": 7.500461163486616e-07, "loss": 0.588, "step": 5143 }, { "epoch": 0.83, "grad_norm": 1.0523008410780177, "learning_rate": 7.486718960319428e-07, "loss": 0.6478, "step": 5144 }, { "epoch": 0.83, "grad_norm": 1.066941122629187, "learning_rate": 7.472988338941861e-07, "loss": 0.6042, "step": 5145 }, { "epoch": 0.83, "grad_norm": 0.6423651733070286, "learning_rate": 7.459269303094552e-07, "loss": 0.4425, "step": 5146 }, { "epoch": 0.83, "grad_norm": 1.0770133591551112, "learning_rate": 7.445561856514916e-07, "loss": 0.5971, "step": 5147 }, { "epoch": 0.83, "grad_norm": 1.0174484184860932, "learning_rate": 7.431866002937254e-07, "loss": 0.5339, "step": 5148 }, { "epoch": 0.83, "grad_norm": 1.0791158691756308, "learning_rate": 7.41818174609269e-07, "loss": 0.6028, "step": 5149 }, { "epoch": 0.83, "grad_norm": 1.0049929975606402, "learning_rate": 7.404509089709194e-07, "loss": 0.5719, "step": 5150 }, { "epoch": 0.83, "grad_norm": 0.9952842135292382, "learning_rate": 7.390848037511578e-07, "loss": 0.5508, "step": 5151 }, { "epoch": 0.83, "grad_norm": 1.0396021794573012, "learning_rate": 7.377198593221474e-07, "loss": 0.6277, "step": 5152 }, { "epoch": 0.83, "grad_norm": 1.0124824095086598, "learning_rate": 7.363560760557392e-07, "loss": 0.6066, "step": 5153 }, { "epoch": 0.83, "grad_norm": 1.14007864059285, "learning_rate": 7.349934543234621e-07, "loss": 0.6046, "step": 5154 }, { "epoch": 0.83, "grad_norm": 1.0547704247584988, "learning_rate": 7.336319944965353e-07, "loss": 0.6492, "step": 5155 }, { "epoch": 0.83, "grad_norm": 1.0498673742591713, "learning_rate": 7.32271696945856e-07, "loss": 0.6493, "step": 5156 }, { "epoch": 0.83, "grad_norm": 0.9812665219738321, "learning_rate": 7.30912562042006e-07, "loss": 0.613, "step": 5157 }, { "epoch": 0.83, "grad_norm": 1.0778590053183552, "learning_rate": 7.295545901552536e-07, "loss": 0.6437, "step": 5158 }, { "epoch": 0.83, "grad_norm": 1.1156138773829636, "learning_rate": 7.281977816555463e-07, "loss": 0.5987, "step": 5159 }, { "epoch": 0.83, "grad_norm": 1.0170515620023057, "learning_rate": 7.268421369125145e-07, "loss": 0.5749, "step": 5160 }, { "epoch": 0.83, "grad_norm": 1.0234315438274797, "learning_rate": 7.254876562954755e-07, "loss": 0.5794, "step": 5161 }, { "epoch": 0.83, "grad_norm": 1.01689867505469, "learning_rate": 7.241343401734285e-07, "loss": 0.5766, "step": 5162 }, { "epoch": 0.83, "grad_norm": 0.6565716326712019, "learning_rate": 7.2278218891505e-07, "loss": 0.4686, "step": 5163 }, { "epoch": 0.83, "grad_norm": 0.9792088246171785, "learning_rate": 7.214312028887055e-07, "loss": 0.5413, "step": 5164 }, { "epoch": 0.83, "grad_norm": 0.6593046810552637, "learning_rate": 7.20081382462442e-07, "loss": 0.4763, "step": 5165 }, { "epoch": 0.83, "grad_norm": 0.928707873666654, "learning_rate": 7.187327280039863e-07, "loss": 0.5257, "step": 5166 }, { "epoch": 0.83, "grad_norm": 0.9576603355790668, "learning_rate": 7.173852398807485e-07, "loss": 0.5072, "step": 5167 }, { "epoch": 0.83, "grad_norm": 1.0605474922685603, "learning_rate": 7.160389184598221e-07, "loss": 0.5857, "step": 5168 }, { "epoch": 0.83, "grad_norm": 1.0224333894207813, "learning_rate": 7.146937641079849e-07, "loss": 0.5833, "step": 5169 }, { "epoch": 0.83, "grad_norm": 0.6676944675232807, "learning_rate": 7.133497771916886e-07, "loss": 0.472, "step": 5170 }, { "epoch": 0.83, "grad_norm": 1.122445741067534, "learning_rate": 7.120069580770755e-07, "loss": 0.6159, "step": 5171 }, { "epoch": 0.83, "grad_norm": 1.0375104525408834, "learning_rate": 7.10665307129968e-07, "loss": 0.6538, "step": 5172 }, { "epoch": 0.83, "grad_norm": 0.9761671422683879, "learning_rate": 7.093248247158663e-07, "loss": 0.5181, "step": 5173 }, { "epoch": 0.83, "grad_norm": 0.9456267845536658, "learning_rate": 7.079855111999545e-07, "loss": 0.5611, "step": 5174 }, { "epoch": 0.83, "grad_norm": 1.0457549808870232, "learning_rate": 7.066473669471008e-07, "loss": 0.5433, "step": 5175 }, { "epoch": 0.83, "grad_norm": 1.095570837836015, "learning_rate": 7.053103923218501e-07, "loss": 0.6666, "step": 5176 }, { "epoch": 0.83, "grad_norm": 1.0542381801141385, "learning_rate": 7.039745876884335e-07, "loss": 0.547, "step": 5177 }, { "epoch": 0.83, "grad_norm": 1.1079500406046754, "learning_rate": 7.026399534107603e-07, "loss": 0.5841, "step": 5178 }, { "epoch": 0.83, "grad_norm": 0.9951796445141703, "learning_rate": 7.013064898524196e-07, "loss": 0.5908, "step": 5179 }, { "epoch": 0.83, "grad_norm": 0.9310716384104486, "learning_rate": 6.99974197376686e-07, "loss": 0.5281, "step": 5180 }, { "epoch": 0.83, "grad_norm": 1.0804487572968202, "learning_rate": 6.986430763465124e-07, "loss": 0.6431, "step": 5181 }, { "epoch": 0.83, "grad_norm": 1.050529711038866, "learning_rate": 6.973131271245315e-07, "loss": 0.5737, "step": 5182 }, { "epoch": 0.84, "grad_norm": 1.0721895807143778, "learning_rate": 6.95984350073059e-07, "loss": 0.5865, "step": 5183 }, { "epoch": 0.84, "grad_norm": 0.966865845983161, "learning_rate": 6.94656745554092e-07, "loss": 0.5353, "step": 5184 }, { "epoch": 0.84, "grad_norm": 0.9578115026971961, "learning_rate": 6.933303139293035e-07, "loss": 0.5209, "step": 5185 }, { "epoch": 0.84, "grad_norm": 1.044091653804256, "learning_rate": 6.92005055560051e-07, "loss": 0.61, "step": 5186 }, { "epoch": 0.84, "grad_norm": 0.9787382138261209, "learning_rate": 6.906809708073736e-07, "loss": 0.597, "step": 5187 }, { "epoch": 0.84, "grad_norm": 0.9779440823391227, "learning_rate": 6.893580600319865e-07, "loss": 0.581, "step": 5188 }, { "epoch": 0.84, "grad_norm": 1.0423914030143775, "learning_rate": 6.880363235942861e-07, "loss": 0.5778, "step": 5189 }, { "epoch": 0.84, "grad_norm": 1.132869353505299, "learning_rate": 6.867157618543513e-07, "loss": 0.6691, "step": 5190 }, { "epoch": 0.84, "grad_norm": 1.0642442805174035, "learning_rate": 6.853963751719417e-07, "loss": 0.6326, "step": 5191 }, { "epoch": 0.84, "grad_norm": 1.0168163899897797, "learning_rate": 6.840781639064897e-07, "loss": 0.5739, "step": 5192 }, { "epoch": 0.84, "grad_norm": 0.9972374342589777, "learning_rate": 6.827611284171154e-07, "loss": 0.6182, "step": 5193 }, { "epoch": 0.84, "grad_norm": 0.9677317311000039, "learning_rate": 6.814452690626161e-07, "loss": 0.5708, "step": 5194 }, { "epoch": 0.84, "grad_norm": 1.0617202236943117, "learning_rate": 6.801305862014667e-07, "loss": 0.6479, "step": 5195 }, { "epoch": 0.84, "grad_norm": 1.107700966398236, "learning_rate": 6.788170801918231e-07, "loss": 0.5896, "step": 5196 }, { "epoch": 0.84, "grad_norm": 0.678818516525114, "learning_rate": 6.775047513915218e-07, "loss": 0.4881, "step": 5197 }, { "epoch": 0.84, "grad_norm": 0.9851041418772838, "learning_rate": 6.76193600158076e-07, "loss": 0.5657, "step": 5198 }, { "epoch": 0.84, "grad_norm": 1.0029237829470858, "learning_rate": 6.748836268486797e-07, "loss": 0.5668, "step": 5199 }, { "epoch": 0.84, "grad_norm": 1.0000137716349071, "learning_rate": 6.735748318202062e-07, "loss": 0.5519, "step": 5200 }, { "epoch": 0.84, "grad_norm": 0.9298971378669835, "learning_rate": 6.722672154292065e-07, "loss": 0.491, "step": 5201 }, { "epoch": 0.84, "grad_norm": 0.6469655861912761, "learning_rate": 6.709607780319133e-07, "loss": 0.4532, "step": 5202 }, { "epoch": 0.84, "grad_norm": 1.0692189987071181, "learning_rate": 6.696555199842347e-07, "loss": 0.6318, "step": 5203 }, { "epoch": 0.84, "grad_norm": 0.9564436171121385, "learning_rate": 6.683514416417574e-07, "loss": 0.5563, "step": 5204 }, { "epoch": 0.84, "grad_norm": 0.9672300721502858, "learning_rate": 6.670485433597507e-07, "loss": 0.5317, "step": 5205 }, { "epoch": 0.84, "grad_norm": 0.9490091749015241, "learning_rate": 6.657468254931615e-07, "loss": 0.5369, "step": 5206 }, { "epoch": 0.84, "grad_norm": 1.0270188757465508, "learning_rate": 6.644462883966085e-07, "loss": 0.5934, "step": 5207 }, { "epoch": 0.84, "grad_norm": 1.001116697757867, "learning_rate": 6.631469324243978e-07, "loss": 0.5315, "step": 5208 }, { "epoch": 0.84, "grad_norm": 1.1399380939896522, "learning_rate": 6.618487579305089e-07, "loss": 0.6084, "step": 5209 }, { "epoch": 0.84, "grad_norm": 0.9697330846666572, "learning_rate": 6.605517652686005e-07, "loss": 0.4909, "step": 5210 }, { "epoch": 0.84, "grad_norm": 1.022122055597291, "learning_rate": 6.59255954792008e-07, "loss": 0.5588, "step": 5211 }, { "epoch": 0.84, "grad_norm": 0.96744048994743, "learning_rate": 6.579613268537466e-07, "loss": 0.5232, "step": 5212 }, { "epoch": 0.84, "grad_norm": 0.983173415945006, "learning_rate": 6.566678818065108e-07, "loss": 0.6211, "step": 5213 }, { "epoch": 0.84, "grad_norm": 0.9599810272101134, "learning_rate": 6.553756200026668e-07, "loss": 0.6128, "step": 5214 }, { "epoch": 0.84, "grad_norm": 1.0293366246887152, "learning_rate": 6.540845417942637e-07, "loss": 0.5605, "step": 5215 }, { "epoch": 0.84, "grad_norm": 0.9203235635111477, "learning_rate": 6.527946475330288e-07, "loss": 0.5024, "step": 5216 }, { "epoch": 0.84, "grad_norm": 1.0714088413915348, "learning_rate": 6.51505937570363e-07, "loss": 0.5708, "step": 5217 }, { "epoch": 0.84, "grad_norm": 1.0160707888060683, "learning_rate": 6.502184122573457e-07, "loss": 0.5779, "step": 5218 }, { "epoch": 0.84, "grad_norm": 1.149816131632796, "learning_rate": 6.489320719447367e-07, "loss": 0.6189, "step": 5219 }, { "epoch": 0.84, "grad_norm": 0.6408134234686275, "learning_rate": 6.476469169829691e-07, "loss": 0.4619, "step": 5220 }, { "epoch": 0.84, "grad_norm": 0.6106888116828263, "learning_rate": 6.463629477221533e-07, "loss": 0.4358, "step": 5221 }, { "epoch": 0.84, "grad_norm": 0.9728220809545649, "learning_rate": 6.450801645120808e-07, "loss": 0.5557, "step": 5222 }, { "epoch": 0.84, "grad_norm": 1.0566893530617847, "learning_rate": 6.43798567702214e-07, "loss": 0.6574, "step": 5223 }, { "epoch": 0.84, "grad_norm": 0.9658884551846721, "learning_rate": 6.425181576416978e-07, "loss": 0.5301, "step": 5224 }, { "epoch": 0.84, "grad_norm": 1.0676052046611186, "learning_rate": 6.412389346793507e-07, "loss": 0.5757, "step": 5225 }, { "epoch": 0.84, "grad_norm": 0.9394752600821392, "learning_rate": 6.399608991636663e-07, "loss": 0.5734, "step": 5226 }, { "epoch": 0.84, "grad_norm": 1.032603789988249, "learning_rate": 6.386840514428183e-07, "loss": 0.564, "step": 5227 }, { "epoch": 0.84, "grad_norm": 0.9877613769050175, "learning_rate": 6.374083918646557e-07, "loss": 0.5242, "step": 5228 }, { "epoch": 0.84, "grad_norm": 0.9692263331623233, "learning_rate": 6.361339207767031e-07, "loss": 0.5867, "step": 5229 }, { "epoch": 0.84, "grad_norm": 0.9198409120777696, "learning_rate": 6.348606385261602e-07, "loss": 0.5133, "step": 5230 }, { "epoch": 0.84, "grad_norm": 1.0632677504898758, "learning_rate": 6.335885454599061e-07, "loss": 0.6763, "step": 5231 }, { "epoch": 0.84, "grad_norm": 1.0128961745916334, "learning_rate": 6.323176419244925e-07, "loss": 0.6009, "step": 5232 }, { "epoch": 0.84, "grad_norm": 1.0621316668681846, "learning_rate": 6.310479282661485e-07, "loss": 0.5704, "step": 5233 }, { "epoch": 0.84, "grad_norm": 1.0288003693743362, "learning_rate": 6.297794048307798e-07, "loss": 0.6068, "step": 5234 }, { "epoch": 0.84, "grad_norm": 1.0464694220683792, "learning_rate": 6.285120719639693e-07, "loss": 0.6224, "step": 5235 }, { "epoch": 0.84, "grad_norm": 0.991584675037723, "learning_rate": 6.27245930010969e-07, "loss": 0.6026, "step": 5236 }, { "epoch": 0.84, "grad_norm": 0.9413290003372221, "learning_rate": 6.259809793167127e-07, "loss": 0.5643, "step": 5237 }, { "epoch": 0.84, "grad_norm": 0.952635429305962, "learning_rate": 6.247172202258095e-07, "loss": 0.5474, "step": 5238 }, { "epoch": 0.84, "grad_norm": 1.036603394967367, "learning_rate": 6.234546530825408e-07, "loss": 0.6389, "step": 5239 }, { "epoch": 0.84, "grad_norm": 1.0002023264889008, "learning_rate": 6.221932782308637e-07, "loss": 0.4919, "step": 5240 }, { "epoch": 0.84, "grad_norm": 0.995700157944254, "learning_rate": 6.209330960144139e-07, "loss": 0.6141, "step": 5241 }, { "epoch": 0.84, "grad_norm": 1.0086452883489023, "learning_rate": 6.196741067764977e-07, "loss": 0.5505, "step": 5242 }, { "epoch": 0.84, "grad_norm": 0.9757833782736011, "learning_rate": 6.184163108600988e-07, "loss": 0.5076, "step": 5243 }, { "epoch": 0.84, "grad_norm": 0.9632799634860469, "learning_rate": 6.171597086078767e-07, "loss": 0.5165, "step": 5244 }, { "epoch": 0.85, "grad_norm": 1.0346272079190675, "learning_rate": 6.159043003621628e-07, "loss": 0.5843, "step": 5245 }, { "epoch": 0.85, "grad_norm": 1.0415911331082455, "learning_rate": 6.146500864649668e-07, "loss": 0.5898, "step": 5246 }, { "epoch": 0.85, "grad_norm": 1.007810722457339, "learning_rate": 6.133970672579693e-07, "loss": 0.6512, "step": 5247 }, { "epoch": 0.85, "grad_norm": 1.058573892657414, "learning_rate": 6.121452430825287e-07, "loss": 0.5704, "step": 5248 }, { "epoch": 0.85, "grad_norm": 1.1039467023089087, "learning_rate": 6.108946142796757e-07, "loss": 0.5817, "step": 5249 }, { "epoch": 0.85, "grad_norm": 1.0738270460719461, "learning_rate": 6.096451811901155e-07, "loss": 0.5812, "step": 5250 }, { "epoch": 0.85, "grad_norm": 1.0503873475282457, "learning_rate": 6.083969441542298e-07, "loss": 0.5889, "step": 5251 }, { "epoch": 0.85, "grad_norm": 0.9341395579951143, "learning_rate": 6.071499035120703e-07, "loss": 0.4751, "step": 5252 }, { "epoch": 0.85, "grad_norm": 1.0049265052526277, "learning_rate": 6.059040596033682e-07, "loss": 0.6007, "step": 5253 }, { "epoch": 0.85, "grad_norm": 1.0368483016671652, "learning_rate": 6.046594127675242e-07, "loss": 0.5871, "step": 5254 }, { "epoch": 0.85, "grad_norm": 1.0767091744444386, "learning_rate": 6.034159633436132e-07, "loss": 0.5543, "step": 5255 }, { "epoch": 0.85, "grad_norm": 1.0497149171843612, "learning_rate": 6.021737116703868e-07, "loss": 0.5938, "step": 5256 }, { "epoch": 0.85, "grad_norm": 1.0049565644471161, "learning_rate": 6.009326580862696e-07, "loss": 0.5503, "step": 5257 }, { "epoch": 0.85, "grad_norm": 1.060304302174428, "learning_rate": 5.996928029293559e-07, "loss": 0.5424, "step": 5258 }, { "epoch": 0.85, "grad_norm": 0.9904335420074606, "learning_rate": 5.984541465374172e-07, "loss": 0.5788, "step": 5259 }, { "epoch": 0.85, "grad_norm": 1.0404857441271587, "learning_rate": 5.972166892478998e-07, "loss": 0.5688, "step": 5260 }, { "epoch": 0.85, "grad_norm": 1.0255195350761621, "learning_rate": 5.959804313979195e-07, "loss": 0.4614, "step": 5261 }, { "epoch": 0.85, "grad_norm": 1.056828412829864, "learning_rate": 5.947453733242659e-07, "loss": 0.5373, "step": 5262 }, { "epoch": 0.85, "grad_norm": 1.0243987665164414, "learning_rate": 5.935115153634058e-07, "loss": 0.5476, "step": 5263 }, { "epoch": 0.85, "grad_norm": 1.01334532216102, "learning_rate": 5.922788578514737e-07, "loss": 0.5651, "step": 5264 }, { "epoch": 0.85, "grad_norm": 0.9994849118720266, "learning_rate": 5.910474011242801e-07, "loss": 0.5809, "step": 5265 }, { "epoch": 0.85, "grad_norm": 1.0000600061611418, "learning_rate": 5.898171455173074e-07, "loss": 0.5356, "step": 5266 }, { "epoch": 0.85, "grad_norm": 1.0362272088701074, "learning_rate": 5.885880913657127e-07, "loss": 0.5122, "step": 5267 }, { "epoch": 0.85, "grad_norm": 1.0763910483118724, "learning_rate": 5.873602390043231e-07, "loss": 0.5989, "step": 5268 }, { "epoch": 0.85, "grad_norm": 0.9344310290225571, "learning_rate": 5.861335887676389e-07, "loss": 0.5737, "step": 5269 }, { "epoch": 0.85, "grad_norm": 1.0824231357459353, "learning_rate": 5.84908140989835e-07, "loss": 0.5606, "step": 5270 }, { "epoch": 0.85, "grad_norm": 1.0275564466581721, "learning_rate": 5.836838960047558e-07, "loss": 0.6004, "step": 5271 }, { "epoch": 0.85, "grad_norm": 0.9437249392021032, "learning_rate": 5.824608541459192e-07, "loss": 0.5983, "step": 5272 }, { "epoch": 0.85, "grad_norm": 1.037458432864212, "learning_rate": 5.812390157465169e-07, "loss": 0.6007, "step": 5273 }, { "epoch": 0.85, "grad_norm": 0.976492695191397, "learning_rate": 5.8001838113941e-07, "loss": 0.5427, "step": 5274 }, { "epoch": 0.85, "grad_norm": 0.9688886953161026, "learning_rate": 5.78798950657134e-07, "loss": 0.5625, "step": 5275 }, { "epoch": 0.85, "grad_norm": 0.9946625119359499, "learning_rate": 5.775807246318954e-07, "loss": 0.5059, "step": 5276 }, { "epoch": 0.85, "grad_norm": 0.9868702518376952, "learning_rate": 5.76363703395571e-07, "loss": 0.5697, "step": 5277 }, { "epoch": 0.85, "grad_norm": 1.067052010447291, "learning_rate": 5.751478872797128e-07, "loss": 0.5878, "step": 5278 }, { "epoch": 0.85, "grad_norm": 0.9366615099851899, "learning_rate": 5.739332766155419e-07, "loss": 0.5348, "step": 5279 }, { "epoch": 0.85, "grad_norm": 1.0780493273729084, "learning_rate": 5.727198717339511e-07, "loss": 0.6063, "step": 5280 }, { "epoch": 0.85, "grad_norm": 1.0689519579248485, "learning_rate": 5.715076729655056e-07, "loss": 0.622, "step": 5281 }, { "epoch": 0.85, "grad_norm": 1.0147821950795468, "learning_rate": 5.702966806404431e-07, "loss": 0.592, "step": 5282 }, { "epoch": 0.85, "grad_norm": 1.048749534251478, "learning_rate": 5.690868950886702e-07, "loss": 0.553, "step": 5283 }, { "epoch": 0.85, "grad_norm": 0.6582095868611852, "learning_rate": 5.67878316639765e-07, "loss": 0.469, "step": 5284 }, { "epoch": 0.85, "grad_norm": 1.0903511460549942, "learning_rate": 5.666709456229797e-07, "loss": 0.6111, "step": 5285 }, { "epoch": 0.85, "grad_norm": 1.069378836190153, "learning_rate": 5.654647823672337e-07, "loss": 0.6025, "step": 5286 }, { "epoch": 0.85, "grad_norm": 1.0498498453145473, "learning_rate": 5.642598272011196e-07, "loss": 0.6452, "step": 5287 }, { "epoch": 0.85, "grad_norm": 1.0204681124217996, "learning_rate": 5.630560804528995e-07, "loss": 0.5912, "step": 5288 }, { "epoch": 0.85, "grad_norm": 1.0768615642887107, "learning_rate": 5.6185354245051e-07, "loss": 0.6014, "step": 5289 }, { "epoch": 0.85, "grad_norm": 1.0105549987276876, "learning_rate": 5.606522135215531e-07, "loss": 0.5796, "step": 5290 }, { "epoch": 0.85, "grad_norm": 1.060503094680933, "learning_rate": 5.594520939933041e-07, "loss": 0.6425, "step": 5291 }, { "epoch": 0.85, "grad_norm": 1.0585423246379893, "learning_rate": 5.582531841927097e-07, "loss": 0.569, "step": 5292 }, { "epoch": 0.85, "grad_norm": 0.9898097891778114, "learning_rate": 5.570554844463854e-07, "loss": 0.5129, "step": 5293 }, { "epoch": 0.85, "grad_norm": 0.96660176974015, "learning_rate": 5.558589950806164e-07, "loss": 0.4866, "step": 5294 }, { "epoch": 0.85, "grad_norm": 1.110907239102545, "learning_rate": 5.546637164213625e-07, "loss": 0.6565, "step": 5295 }, { "epoch": 0.85, "grad_norm": 1.0715278006922526, "learning_rate": 5.53469648794247e-07, "loss": 0.5875, "step": 5296 }, { "epoch": 0.85, "grad_norm": 0.9606097335561491, "learning_rate": 5.522767925245698e-07, "loss": 0.541, "step": 5297 }, { "epoch": 0.85, "grad_norm": 0.9436580348269376, "learning_rate": 5.510851479372959e-07, "loss": 0.4874, "step": 5298 }, { "epoch": 0.85, "grad_norm": 0.960843959065725, "learning_rate": 5.498947153570622e-07, "loss": 0.4786, "step": 5299 }, { "epoch": 0.85, "grad_norm": 1.134654081402097, "learning_rate": 5.487054951081772e-07, "loss": 0.5898, "step": 5300 }, { "epoch": 0.85, "grad_norm": 0.9818707185808371, "learning_rate": 5.475174875146156e-07, "loss": 0.5372, "step": 5301 }, { "epoch": 0.85, "grad_norm": 1.0922576665655455, "learning_rate": 5.463306929000228e-07, "loss": 0.606, "step": 5302 }, { "epoch": 0.85, "grad_norm": 1.0305282400865756, "learning_rate": 5.451451115877154e-07, "loss": 0.5586, "step": 5303 }, { "epoch": 0.85, "grad_norm": 1.0829463897755023, "learning_rate": 5.439607439006795e-07, "loss": 0.5664, "step": 5304 }, { "epoch": 0.85, "grad_norm": 1.0396069258770884, "learning_rate": 5.427775901615684e-07, "loss": 0.6382, "step": 5305 }, { "epoch": 0.85, "grad_norm": 1.0192800705378473, "learning_rate": 5.415956506927051e-07, "loss": 0.5376, "step": 5306 }, { "epoch": 0.86, "grad_norm": 1.1565156550638231, "learning_rate": 5.404149258160835e-07, "loss": 0.6994, "step": 5307 }, { "epoch": 0.86, "grad_norm": 1.0339829958337519, "learning_rate": 5.392354158533658e-07, "loss": 0.581, "step": 5308 }, { "epoch": 0.86, "grad_norm": 1.0403587546908875, "learning_rate": 5.380571211258811e-07, "loss": 0.5856, "step": 5309 }, { "epoch": 0.86, "grad_norm": 0.6954657542963856, "learning_rate": 5.36880041954631e-07, "loss": 0.5104, "step": 5310 }, { "epoch": 0.86, "grad_norm": 1.121336678174201, "learning_rate": 5.357041786602851e-07, "loss": 0.6519, "step": 5311 }, { "epoch": 0.86, "grad_norm": 1.0613368977206599, "learning_rate": 5.345295315631805e-07, "loss": 0.5453, "step": 5312 }, { "epoch": 0.86, "grad_norm": 1.0162393875640843, "learning_rate": 5.33356100983321e-07, "loss": 0.533, "step": 5313 }, { "epoch": 0.86, "grad_norm": 1.0612525436499354, "learning_rate": 5.32183887240385e-07, "loss": 0.6138, "step": 5314 }, { "epoch": 0.86, "grad_norm": 1.0005071071691518, "learning_rate": 5.310128906537137e-07, "loss": 0.5782, "step": 5315 }, { "epoch": 0.86, "grad_norm": 0.9657993983141224, "learning_rate": 5.298431115423186e-07, "loss": 0.531, "step": 5316 }, { "epoch": 0.86, "grad_norm": 0.9996200117211376, "learning_rate": 5.286745502248819e-07, "loss": 0.6109, "step": 5317 }, { "epoch": 0.86, "grad_norm": 1.1776338300650258, "learning_rate": 5.27507207019749e-07, "loss": 0.5746, "step": 5318 }, { "epoch": 0.86, "grad_norm": 0.9429207567510018, "learning_rate": 5.263410822449388e-07, "loss": 0.5042, "step": 5319 }, { "epoch": 0.86, "grad_norm": 1.0915199418442072, "learning_rate": 5.251761762181351e-07, "loss": 0.5903, "step": 5320 }, { "epoch": 0.86, "grad_norm": 1.038977501783133, "learning_rate": 5.240124892566895e-07, "loss": 0.5511, "step": 5321 }, { "epoch": 0.86, "grad_norm": 1.0594678368787669, "learning_rate": 5.228500216776239e-07, "loss": 0.5265, "step": 5322 }, { "epoch": 0.86, "grad_norm": 1.0666379055470785, "learning_rate": 5.216887737976256e-07, "loss": 0.6083, "step": 5323 }, { "epoch": 0.86, "grad_norm": 1.1512303845500023, "learning_rate": 5.205287459330499e-07, "loss": 0.6671, "step": 5324 }, { "epoch": 0.86, "grad_norm": 0.9340583537148618, "learning_rate": 5.193699383999213e-07, "loss": 0.577, "step": 5325 }, { "epoch": 0.86, "grad_norm": 1.2154686856510797, "learning_rate": 5.182123515139315e-07, "loss": 0.5092, "step": 5326 }, { "epoch": 0.86, "grad_norm": 1.0532878006381232, "learning_rate": 5.170559855904389e-07, "loss": 0.5884, "step": 5327 }, { "epoch": 0.86, "grad_norm": 1.0177303561681834, "learning_rate": 5.159008409444671e-07, "loss": 0.6046, "step": 5328 }, { "epoch": 0.86, "grad_norm": 1.0061290575507669, "learning_rate": 5.147469178907127e-07, "loss": 0.594, "step": 5329 }, { "epoch": 0.86, "grad_norm": 1.0150493950139718, "learning_rate": 5.135942167435342e-07, "loss": 0.5643, "step": 5330 }, { "epoch": 0.86, "grad_norm": 0.9799531989094968, "learning_rate": 5.124427378169588e-07, "loss": 0.566, "step": 5331 }, { "epoch": 0.86, "grad_norm": 1.0320827301044417, "learning_rate": 5.112924814246817e-07, "loss": 0.5371, "step": 5332 }, { "epoch": 0.86, "grad_norm": 1.0033996768121038, "learning_rate": 5.10143447880066e-07, "loss": 0.5938, "step": 5333 }, { "epoch": 0.86, "grad_norm": 1.0985550857635513, "learning_rate": 5.089956374961386e-07, "loss": 0.6197, "step": 5334 }, { "epoch": 0.86, "grad_norm": 0.9874049904785749, "learning_rate": 5.078490505855938e-07, "loss": 0.5124, "step": 5335 }, { "epoch": 0.86, "grad_norm": 1.0251815929947037, "learning_rate": 5.06703687460795e-07, "loss": 0.6097, "step": 5336 }, { "epoch": 0.86, "grad_norm": 1.0316808884098747, "learning_rate": 5.055595484337705e-07, "loss": 0.597, "step": 5337 }, { "epoch": 0.86, "grad_norm": 0.9861587380281057, "learning_rate": 5.044166338162143e-07, "loss": 0.5183, "step": 5338 }, { "epoch": 0.86, "grad_norm": 1.0567983884085228, "learning_rate": 5.03274943919489e-07, "loss": 0.6364, "step": 5339 }, { "epoch": 0.86, "grad_norm": 1.0491668352240564, "learning_rate": 5.021344790546212e-07, "loss": 0.5986, "step": 5340 }, { "epoch": 0.86, "grad_norm": 1.0756755698209064, "learning_rate": 5.009952395323065e-07, "loss": 0.6098, "step": 5341 }, { "epoch": 0.86, "grad_norm": 1.0201352465743787, "learning_rate": 4.998572256629047e-07, "loss": 0.6325, "step": 5342 }, { "epoch": 0.86, "grad_norm": 0.9624850896089041, "learning_rate": 4.987204377564409e-07, "loss": 0.5613, "step": 5343 }, { "epoch": 0.86, "grad_norm": 1.065915688816882, "learning_rate": 4.975848761226088e-07, "loss": 0.6015, "step": 5344 }, { "epoch": 0.86, "grad_norm": 1.0281805141824745, "learning_rate": 4.964505410707655e-07, "loss": 0.4828, "step": 5345 }, { "epoch": 0.86, "grad_norm": 1.0205613111336664, "learning_rate": 4.95317432909937e-07, "loss": 0.5506, "step": 5346 }, { "epoch": 0.86, "grad_norm": 0.9234754546685896, "learning_rate": 4.941855519488109e-07, "loss": 0.488, "step": 5347 }, { "epoch": 0.86, "grad_norm": 1.010656699995779, "learning_rate": 4.930548984957451e-07, "loss": 0.5806, "step": 5348 }, { "epoch": 0.86, "grad_norm": 1.0775370134740074, "learning_rate": 4.919254728587591e-07, "loss": 0.6418, "step": 5349 }, { "epoch": 0.86, "grad_norm": 0.9905056816255239, "learning_rate": 4.907972753455398e-07, "loss": 0.4806, "step": 5350 }, { "epoch": 0.86, "grad_norm": 1.046818992747354, "learning_rate": 4.896703062634401e-07, "loss": 0.5669, "step": 5351 }, { "epoch": 0.86, "grad_norm": 1.0742812252935492, "learning_rate": 4.885445659194771e-07, "loss": 0.7122, "step": 5352 }, { "epoch": 0.86, "grad_norm": 1.0439717535472564, "learning_rate": 4.874200546203328e-07, "loss": 0.608, "step": 5353 }, { "epoch": 0.86, "grad_norm": 0.9994051914718778, "learning_rate": 4.86296772672355e-07, "loss": 0.6534, "step": 5354 }, { "epoch": 0.86, "grad_norm": 0.9724560046110385, "learning_rate": 4.851747203815588e-07, "loss": 0.5549, "step": 5355 }, { "epoch": 0.86, "grad_norm": 1.0702135479088555, "learning_rate": 4.840538980536203e-07, "loss": 0.5732, "step": 5356 }, { "epoch": 0.86, "grad_norm": 1.0320588451695234, "learning_rate": 4.829343059938818e-07, "loss": 0.5805, "step": 5357 }, { "epoch": 0.86, "grad_norm": 1.0605178500973564, "learning_rate": 4.818159445073528e-07, "loss": 0.5014, "step": 5358 }, { "epoch": 0.86, "grad_norm": 1.0981733393435582, "learning_rate": 4.806988138987051e-07, "loss": 0.5717, "step": 5359 }, { "epoch": 0.86, "grad_norm": 0.9879712150863855, "learning_rate": 4.795829144722752e-07, "loss": 0.5166, "step": 5360 }, { "epoch": 0.86, "grad_norm": 0.9558676793186869, "learning_rate": 4.784682465320661e-07, "loss": 0.5766, "step": 5361 }, { "epoch": 0.86, "grad_norm": 0.6665559118702703, "learning_rate": 4.773548103817421e-07, "loss": 0.4912, "step": 5362 }, { "epoch": 0.86, "grad_norm": 1.0936849109209077, "learning_rate": 4.762426063246367e-07, "loss": 0.6284, "step": 5363 }, { "epoch": 0.86, "grad_norm": 1.0379521015925315, "learning_rate": 4.75131634663743e-07, "loss": 0.6005, "step": 5364 }, { "epoch": 0.86, "grad_norm": 1.0525528268967095, "learning_rate": 4.740218957017201e-07, "loss": 0.6249, "step": 5365 }, { "epoch": 0.86, "grad_norm": 0.9808550876906315, "learning_rate": 4.729133897408933e-07, "loss": 0.5716, "step": 5366 }, { "epoch": 0.86, "grad_norm": 1.075871209040523, "learning_rate": 4.7180611708324776e-07, "loss": 0.5349, "step": 5367 }, { "epoch": 0.86, "grad_norm": 1.1066723711838085, "learning_rate": 4.7070007803043714e-07, "loss": 0.6134, "step": 5368 }, { "epoch": 0.87, "grad_norm": 1.1210136016862404, "learning_rate": 4.6959527288377493e-07, "loss": 0.6187, "step": 5369 }, { "epoch": 0.87, "grad_norm": 1.0763661144842214, "learning_rate": 4.684917019442431e-07, "loss": 0.6473, "step": 5370 }, { "epoch": 0.87, "grad_norm": 1.1173034323796098, "learning_rate": 4.673893655124834e-07, "loss": 0.5817, "step": 5371 }, { "epoch": 0.87, "grad_norm": 1.0263195965203538, "learning_rate": 4.6628826388880165e-07, "loss": 0.4982, "step": 5372 }, { "epoch": 0.87, "grad_norm": 1.0620555346660505, "learning_rate": 4.651883973731708e-07, "loss": 0.6626, "step": 5373 }, { "epoch": 0.87, "grad_norm": 0.9040278249830518, "learning_rate": 4.640897662652227e-07, "loss": 0.4685, "step": 5374 }, { "epoch": 0.87, "grad_norm": 1.0041914536372203, "learning_rate": 4.629923708642542e-07, "loss": 0.5703, "step": 5375 }, { "epoch": 0.87, "grad_norm": 0.9535087821063442, "learning_rate": 4.618962114692277e-07, "loss": 0.5183, "step": 5376 }, { "epoch": 0.87, "grad_norm": 0.9820220585612087, "learning_rate": 4.608012883787688e-07, "loss": 0.514, "step": 5377 }, { "epoch": 0.87, "grad_norm": 1.0431003673985013, "learning_rate": 4.5970760189116057e-07, "loss": 0.5968, "step": 5378 }, { "epoch": 0.87, "grad_norm": 1.0299143341371995, "learning_rate": 4.5861515230435525e-07, "loss": 0.5257, "step": 5379 }, { "epoch": 0.87, "grad_norm": 1.0764525170430264, "learning_rate": 4.5752393991596754e-07, "loss": 0.6474, "step": 5380 }, { "epoch": 0.87, "grad_norm": 1.0602059020295858, "learning_rate": 4.5643396502327297e-07, "loss": 0.5415, "step": 5381 }, { "epoch": 0.87, "grad_norm": 0.9318694833006669, "learning_rate": 4.55345227923209e-07, "loss": 0.5686, "step": 5382 }, { "epoch": 0.87, "grad_norm": 1.0741222165021485, "learning_rate": 4.5425772891238065e-07, "loss": 0.6259, "step": 5383 }, { "epoch": 0.87, "grad_norm": 1.0363539626683014, "learning_rate": 4.5317146828704973e-07, "loss": 0.5537, "step": 5384 }, { "epoch": 0.87, "grad_norm": 1.0953554734378081, "learning_rate": 4.520864463431457e-07, "loss": 0.5768, "step": 5385 }, { "epoch": 0.87, "grad_norm": 0.6166165739699614, "learning_rate": 4.510026633762571e-07, "loss": 0.4494, "step": 5386 }, { "epoch": 0.87, "grad_norm": 1.0425424044417757, "learning_rate": 4.4992011968163775e-07, "loss": 0.6175, "step": 5387 }, { "epoch": 0.87, "grad_norm": 1.0230523111976728, "learning_rate": 4.488388155542012e-07, "loss": 0.6302, "step": 5388 }, { "epoch": 0.87, "grad_norm": 1.04279325447922, "learning_rate": 4.47758751288524e-07, "loss": 0.5369, "step": 5389 }, { "epoch": 0.87, "grad_norm": 1.02269064713844, "learning_rate": 4.466799271788469e-07, "loss": 0.5892, "step": 5390 }, { "epoch": 0.87, "grad_norm": 1.0196858339935873, "learning_rate": 4.4560234351906983e-07, "loss": 0.4966, "step": 5391 }, { "epoch": 0.87, "grad_norm": 0.9865520573336268, "learning_rate": 4.445260006027585e-07, "loss": 0.5919, "step": 5392 }, { "epoch": 0.87, "grad_norm": 0.9842238208294244, "learning_rate": 4.4345089872313674e-07, "loss": 0.5686, "step": 5393 }, { "epoch": 0.87, "grad_norm": 1.0293195850256684, "learning_rate": 4.4237703817309073e-07, "loss": 0.6088, "step": 5394 }, { "epoch": 0.87, "grad_norm": 0.9206359490846122, "learning_rate": 4.4130441924517263e-07, "loss": 0.45, "step": 5395 }, { "epoch": 0.87, "grad_norm": 0.667148183892131, "learning_rate": 4.4023304223159203e-07, "loss": 0.5098, "step": 5396 }, { "epoch": 0.87, "grad_norm": 1.067534368564209, "learning_rate": 4.3916290742421986e-07, "loss": 0.5546, "step": 5397 }, { "epoch": 0.87, "grad_norm": 1.0754095454079409, "learning_rate": 4.3809401511459237e-07, "loss": 0.5953, "step": 5398 }, { "epoch": 0.87, "grad_norm": 1.0153823284468526, "learning_rate": 4.3702636559390667e-07, "loss": 0.5817, "step": 5399 }, { "epoch": 0.87, "grad_norm": 1.0315366538737971, "learning_rate": 4.3595995915301614e-07, "loss": 0.5703, "step": 5400 }, { "epoch": 0.87, "grad_norm": 0.9291506792248045, "learning_rate": 4.348947960824412e-07, "loss": 0.5027, "step": 5401 }, { "epoch": 0.87, "grad_norm": 1.0274025123180561, "learning_rate": 4.3383087667236254e-07, "loss": 0.5889, "step": 5402 }, { "epoch": 0.87, "grad_norm": 0.9817287516962871, "learning_rate": 4.3276820121262053e-07, "loss": 0.5119, "step": 5403 }, { "epoch": 0.87, "grad_norm": 1.02159256337688, "learning_rate": 4.3170676999271576e-07, "loss": 0.5819, "step": 5404 }, { "epoch": 0.87, "grad_norm": 0.9773692672498304, "learning_rate": 4.306465833018131e-07, "loss": 0.5016, "step": 5405 }, { "epoch": 0.87, "grad_norm": 1.0412070262846036, "learning_rate": 4.295876414287375e-07, "loss": 0.5804, "step": 5406 }, { "epoch": 0.87, "grad_norm": 1.0186674071232007, "learning_rate": 4.2852994466197064e-07, "loss": 0.5259, "step": 5407 }, { "epoch": 0.87, "grad_norm": 1.033605603962162, "learning_rate": 4.274734932896607e-07, "loss": 0.5167, "step": 5408 }, { "epoch": 0.87, "grad_norm": 0.9921239516227279, "learning_rate": 4.264182875996142e-07, "loss": 0.5732, "step": 5409 }, { "epoch": 0.87, "grad_norm": 0.9394645921800621, "learning_rate": 4.253643278792974e-07, "loss": 0.5296, "step": 5410 }, { "epoch": 0.87, "grad_norm": 1.0792412043157142, "learning_rate": 4.243116144158377e-07, "loss": 0.5524, "step": 5411 }, { "epoch": 0.87, "grad_norm": 1.0621776967854895, "learning_rate": 4.232601474960246e-07, "loss": 0.5913, "step": 5412 }, { "epoch": 0.87, "grad_norm": 1.107477053972103, "learning_rate": 4.2220992740630515e-07, "loss": 0.5944, "step": 5413 }, { "epoch": 0.87, "grad_norm": 1.072704024203222, "learning_rate": 4.211609544327894e-07, "loss": 0.6419, "step": 5414 }, { "epoch": 0.87, "grad_norm": 1.0013870364173174, "learning_rate": 4.201132288612464e-07, "loss": 0.5553, "step": 5415 }, { "epoch": 0.87, "grad_norm": 1.0372138811562466, "learning_rate": 4.1906675097710457e-07, "loss": 0.5864, "step": 5416 }, { "epoch": 0.87, "grad_norm": 1.0014938965224214, "learning_rate": 4.180215210654548e-07, "loss": 0.6091, "step": 5417 }, { "epoch": 0.87, "grad_norm": 1.0344869093116396, "learning_rate": 4.1697753941104533e-07, "loss": 0.5855, "step": 5418 }, { "epoch": 0.87, "grad_norm": 0.9045712564435315, "learning_rate": 4.159348062982849e-07, "loss": 0.4658, "step": 5419 }, { "epoch": 0.87, "grad_norm": 0.9516960288834059, "learning_rate": 4.1489332201124443e-07, "loss": 0.5366, "step": 5420 }, { "epoch": 0.87, "grad_norm": 1.1475407599759215, "learning_rate": 4.138530868336532e-07, "loss": 0.5363, "step": 5421 }, { "epoch": 0.87, "grad_norm": 0.9804195425721148, "learning_rate": 4.1281410104889784e-07, "loss": 0.4947, "step": 5422 }, { "epoch": 0.87, "grad_norm": 1.0544655948697874, "learning_rate": 4.1177636494002747e-07, "loss": 0.6083, "step": 5423 }, { "epoch": 0.87, "grad_norm": 1.0599906459289223, "learning_rate": 4.107398787897515e-07, "loss": 0.5801, "step": 5424 }, { "epoch": 0.87, "grad_norm": 0.9417158938737237, "learning_rate": 4.097046428804363e-07, "loss": 0.5596, "step": 5425 }, { "epoch": 0.87, "grad_norm": 1.0926068634715345, "learning_rate": 4.0867065749410737e-07, "loss": 0.6613, "step": 5426 }, { "epoch": 0.87, "grad_norm": 0.9719320657022369, "learning_rate": 4.0763792291245274e-07, "loss": 0.5411, "step": 5427 }, { "epoch": 0.87, "grad_norm": 0.9530201623130976, "learning_rate": 4.066064394168184e-07, "loss": 0.5356, "step": 5428 }, { "epoch": 0.87, "grad_norm": 0.9727297474249349, "learning_rate": 4.055762072882058e-07, "loss": 0.4628, "step": 5429 }, { "epoch": 0.87, "grad_norm": 0.9847177137561993, "learning_rate": 4.045472268072803e-07, "loss": 0.6488, "step": 5430 }, { "epoch": 0.88, "grad_norm": 1.0834700565963222, "learning_rate": 4.0351949825436556e-07, "loss": 0.5347, "step": 5431 }, { "epoch": 0.88, "grad_norm": 1.0765626708944795, "learning_rate": 4.024930219094425e-07, "loss": 0.5857, "step": 5432 }, { "epoch": 0.88, "grad_norm": 1.0393289289976886, "learning_rate": 4.014677980521503e-07, "loss": 0.5744, "step": 5433 }, { "epoch": 0.88, "grad_norm": 0.9837184951009063, "learning_rate": 4.0044382696178994e-07, "loss": 0.5114, "step": 5434 }, { "epoch": 0.88, "grad_norm": 1.016605576594332, "learning_rate": 3.994211089173189e-07, "loss": 0.5254, "step": 5435 }, { "epoch": 0.88, "grad_norm": 0.9228662738271468, "learning_rate": 3.9839964419735254e-07, "loss": 0.548, "step": 5436 }, { "epoch": 0.88, "grad_norm": 1.0090413072087565, "learning_rate": 3.9737943308016726e-07, "loss": 0.6058, "step": 5437 }, { "epoch": 0.88, "grad_norm": 1.0497552798598961, "learning_rate": 3.963604758436962e-07, "loss": 0.5549, "step": 5438 }, { "epoch": 0.88, "grad_norm": 0.946658448727259, "learning_rate": 3.953427727655323e-07, "loss": 0.5162, "step": 5439 }, { "epoch": 0.88, "grad_norm": 1.076480248934819, "learning_rate": 3.9432632412292504e-07, "loss": 0.6071, "step": 5440 }, { "epoch": 0.88, "grad_norm": 1.04834389727141, "learning_rate": 3.933111301927828e-07, "loss": 0.49, "step": 5441 }, { "epoch": 0.88, "grad_norm": 1.0848161143569053, "learning_rate": 3.922971912516721e-07, "loss": 0.5789, "step": 5442 }, { "epoch": 0.88, "grad_norm": 1.0891721996182941, "learning_rate": 3.912845075758209e-07, "loss": 0.5624, "step": 5443 }, { "epoch": 0.88, "grad_norm": 1.039746527048185, "learning_rate": 3.9027307944110804e-07, "loss": 0.6073, "step": 5444 }, { "epoch": 0.88, "grad_norm": 0.9698055791727057, "learning_rate": 3.892629071230758e-07, "loss": 0.5732, "step": 5445 }, { "epoch": 0.88, "grad_norm": 1.1164340624317148, "learning_rate": 3.8825399089692464e-07, "loss": 0.5743, "step": 5446 }, { "epoch": 0.88, "grad_norm": 1.045446006316855, "learning_rate": 3.8724633103750964e-07, "loss": 0.6404, "step": 5447 }, { "epoch": 0.88, "grad_norm": 1.0923161128356194, "learning_rate": 3.862399278193446e-07, "loss": 0.672, "step": 5448 }, { "epoch": 0.88, "grad_norm": 1.0319511468126787, "learning_rate": 3.852347815166024e-07, "loss": 0.5949, "step": 5449 }, { "epoch": 0.88, "grad_norm": 1.014788457900176, "learning_rate": 3.8423089240311404e-07, "loss": 0.502, "step": 5450 }, { "epoch": 0.88, "grad_norm": 0.9723411469674623, "learning_rate": 3.8322826075236295e-07, "loss": 0.5632, "step": 5451 }, { "epoch": 0.88, "grad_norm": 0.6112810697978414, "learning_rate": 3.822268868374962e-07, "loss": 0.4914, "step": 5452 }, { "epoch": 0.88, "grad_norm": 1.0534832844434676, "learning_rate": 3.8122677093131613e-07, "loss": 0.5993, "step": 5453 }, { "epoch": 0.88, "grad_norm": 1.0440273469845522, "learning_rate": 3.802279133062803e-07, "loss": 0.6233, "step": 5454 }, { "epoch": 0.88, "grad_norm": 0.9708111601216068, "learning_rate": 3.7923031423450495e-07, "loss": 0.5327, "step": 5455 }, { "epoch": 0.88, "grad_norm": 0.6143990052691758, "learning_rate": 3.782339739877655e-07, "loss": 0.4312, "step": 5456 }, { "epoch": 0.88, "grad_norm": 1.0049799616533042, "learning_rate": 3.772388928374909e-07, "loss": 0.5211, "step": 5457 }, { "epoch": 0.88, "grad_norm": 1.0328432953355067, "learning_rate": 3.7624507105476826e-07, "loss": 0.5667, "step": 5458 }, { "epoch": 0.88, "grad_norm": 1.0325620225997374, "learning_rate": 3.752525089103437e-07, "loss": 0.5312, "step": 5459 }, { "epoch": 0.88, "grad_norm": 0.9562727790235105, "learning_rate": 3.7426120667461706e-07, "loss": 0.5234, "step": 5460 }, { "epoch": 0.88, "grad_norm": 0.9864133114132222, "learning_rate": 3.7327116461764737e-07, "loss": 0.6322, "step": 5461 }, { "epoch": 0.88, "grad_norm": 0.6583603252530985, "learning_rate": 3.7228238300914934e-07, "loss": 0.4555, "step": 5462 }, { "epoch": 0.88, "grad_norm": 0.960195546219715, "learning_rate": 3.7129486211849307e-07, "loss": 0.5506, "step": 5463 }, { "epoch": 0.88, "grad_norm": 1.189643247130239, "learning_rate": 3.7030860221470787e-07, "loss": 0.6136, "step": 5464 }, { "epoch": 0.88, "grad_norm": 1.01859904537151, "learning_rate": 3.6932360356647713e-07, "loss": 0.5096, "step": 5465 }, { "epoch": 0.88, "grad_norm": 1.015535350943715, "learning_rate": 3.683398664421428e-07, "loss": 0.565, "step": 5466 }, { "epoch": 0.88, "grad_norm": 0.9517095635129854, "learning_rate": 3.673573911097006e-07, "loss": 0.5365, "step": 5467 }, { "epoch": 0.88, "grad_norm": 1.0241250877283186, "learning_rate": 3.663761778368052e-07, "loss": 0.5486, "step": 5468 }, { "epoch": 0.88, "grad_norm": 0.7023895722821127, "learning_rate": 3.653962268907657e-07, "loss": 0.4853, "step": 5469 }, { "epoch": 0.88, "grad_norm": 1.044042763307488, "learning_rate": 3.6441753853854677e-07, "loss": 0.4482, "step": 5470 }, { "epoch": 0.88, "grad_norm": 1.0104596524046299, "learning_rate": 3.6344011304677074e-07, "loss": 0.5838, "step": 5471 }, { "epoch": 0.88, "grad_norm": 1.0166255412514318, "learning_rate": 3.624639506817174e-07, "loss": 0.5795, "step": 5472 }, { "epoch": 0.88, "grad_norm": 1.0335294877439671, "learning_rate": 3.614890517093161e-07, "loss": 0.6123, "step": 5473 }, { "epoch": 0.88, "grad_norm": 1.0554255229470013, "learning_rate": 3.605154163951591e-07, "loss": 0.5251, "step": 5474 }, { "epoch": 0.88, "grad_norm": 0.9840462754492645, "learning_rate": 3.595430450044912e-07, "loss": 0.5683, "step": 5475 }, { "epoch": 0.88, "grad_norm": 1.029108872851345, "learning_rate": 3.585719378022129e-07, "loss": 0.5348, "step": 5476 }, { "epoch": 0.88, "grad_norm": 1.0053202782628397, "learning_rate": 3.576020950528797e-07, "loss": 0.5901, "step": 5477 }, { "epoch": 0.88, "grad_norm": 1.0730291520335882, "learning_rate": 3.566335170207053e-07, "loss": 0.5362, "step": 5478 }, { "epoch": 0.88, "grad_norm": 1.0823990179378389, "learning_rate": 3.5566620396955573e-07, "loss": 0.5647, "step": 5479 }, { "epoch": 0.88, "grad_norm": 1.0347269930065042, "learning_rate": 3.5470015616295315e-07, "loss": 0.492, "step": 5480 }, { "epoch": 0.88, "grad_norm": 0.6824849473063792, "learning_rate": 3.537353738640775e-07, "loss": 0.4767, "step": 5481 }, { "epoch": 0.88, "grad_norm": 0.6831128263896733, "learning_rate": 3.527718573357597e-07, "loss": 0.4596, "step": 5482 }, { "epoch": 0.88, "grad_norm": 0.9580094604741383, "learning_rate": 3.518096068404908e-07, "loss": 0.5817, "step": 5483 }, { "epoch": 0.88, "grad_norm": 1.1230928024331281, "learning_rate": 3.508486226404123e-07, "loss": 0.6442, "step": 5484 }, { "epoch": 0.88, "grad_norm": 1.0968922492014628, "learning_rate": 3.4988890499732477e-07, "loss": 0.6178, "step": 5485 }, { "epoch": 0.88, "grad_norm": 1.0698282548137314, "learning_rate": 3.489304541726801e-07, "loss": 0.5978, "step": 5486 }, { "epoch": 0.88, "grad_norm": 1.0803133582998532, "learning_rate": 3.479732704275862e-07, "loss": 0.5861, "step": 5487 }, { "epoch": 0.88, "grad_norm": 1.0588609594924376, "learning_rate": 3.470173540228089e-07, "loss": 0.547, "step": 5488 }, { "epoch": 0.88, "grad_norm": 1.0497306275020217, "learning_rate": 3.460627052187632e-07, "loss": 0.5671, "step": 5489 }, { "epoch": 0.88, "grad_norm": 0.6605524077733674, "learning_rate": 3.451093242755238e-07, "loss": 0.4743, "step": 5490 }, { "epoch": 0.88, "grad_norm": 1.1049236856108615, "learning_rate": 3.4415721145281743e-07, "loss": 0.5355, "step": 5491 }, { "epoch": 0.88, "grad_norm": 1.0411107866516922, "learning_rate": 3.4320636701002497e-07, "loss": 0.5872, "step": 5492 }, { "epoch": 0.89, "grad_norm": 1.0495261128448135, "learning_rate": 3.422567912061836e-07, "loss": 0.5362, "step": 5493 }, { "epoch": 0.89, "grad_norm": 1.042951842294696, "learning_rate": 3.4130848429998533e-07, "loss": 0.6364, "step": 5494 }, { "epoch": 0.89, "grad_norm": 0.9684863664012521, "learning_rate": 3.403614465497718e-07, "loss": 0.5036, "step": 5495 }, { "epoch": 0.89, "grad_norm": 1.0407275384047345, "learning_rate": 3.3941567821354383e-07, "loss": 0.5326, "step": 5496 }, { "epoch": 0.89, "grad_norm": 0.9500087554589208, "learning_rate": 3.3847117954895536e-07, "loss": 0.5202, "step": 5497 }, { "epoch": 0.89, "grad_norm": 1.1121829882372587, "learning_rate": 3.375279508133139e-07, "loss": 0.6297, "step": 5498 }, { "epoch": 0.89, "grad_norm": 1.0294244345093888, "learning_rate": 3.3658599226357944e-07, "loss": 0.6812, "step": 5499 }, { "epoch": 0.89, "grad_norm": 0.6472027109529903, "learning_rate": 3.356453041563695e-07, "loss": 0.4464, "step": 5500 }, { "epoch": 0.89, "grad_norm": 1.0173436133692797, "learning_rate": 3.347058867479519e-07, "loss": 0.5746, "step": 5501 }, { "epoch": 0.89, "grad_norm": 1.0371058906915347, "learning_rate": 3.337677402942502e-07, "loss": 0.506, "step": 5502 }, { "epoch": 0.89, "grad_norm": 1.0806197988929243, "learning_rate": 3.3283086505084163e-07, "loss": 0.6032, "step": 5503 }, { "epoch": 0.89, "grad_norm": 1.116689260823897, "learning_rate": 3.3189526127295644e-07, "loss": 0.6575, "step": 5504 }, { "epoch": 0.89, "grad_norm": 1.0214300518691564, "learning_rate": 3.3096092921547976e-07, "loss": 0.5944, "step": 5505 }, { "epoch": 0.89, "grad_norm": 1.1038338677836759, "learning_rate": 3.300278691329478e-07, "loss": 0.6476, "step": 5506 }, { "epoch": 0.89, "grad_norm": 1.0483367780975363, "learning_rate": 3.290960812795535e-07, "loss": 0.5709, "step": 5507 }, { "epoch": 0.89, "grad_norm": 1.0956658373423223, "learning_rate": 3.2816556590914096e-07, "loss": 0.5764, "step": 5508 }, { "epoch": 0.89, "grad_norm": 1.0122203818208813, "learning_rate": 3.2723632327520795e-07, "loss": 0.5847, "step": 5509 }, { "epoch": 0.89, "grad_norm": 1.1698777849723354, "learning_rate": 3.2630835363090584e-07, "loss": 0.6682, "step": 5510 }, { "epoch": 0.89, "grad_norm": 1.0689562929407017, "learning_rate": 3.2538165722903913e-07, "loss": 0.5433, "step": 5511 }, { "epoch": 0.89, "grad_norm": 0.9444796568777285, "learning_rate": 3.244562343220664e-07, "loss": 0.573, "step": 5512 }, { "epoch": 0.89, "grad_norm": 1.0536833407411912, "learning_rate": 3.235320851620971e-07, "loss": 0.5729, "step": 5513 }, { "epoch": 0.89, "grad_norm": 1.0027094273926629, "learning_rate": 3.2260921000089483e-07, "loss": 0.5171, "step": 5514 }, { "epoch": 0.89, "grad_norm": 1.0894450690659234, "learning_rate": 3.216876090898774e-07, "loss": 0.5276, "step": 5515 }, { "epoch": 0.89, "grad_norm": 1.0695732898158838, "learning_rate": 3.207672826801139e-07, "loss": 0.6677, "step": 5516 }, { "epoch": 0.89, "grad_norm": 1.091673399881913, "learning_rate": 3.198482310223261e-07, "loss": 0.5787, "step": 5517 }, { "epoch": 0.89, "grad_norm": 1.0125337566118928, "learning_rate": 3.1893045436688863e-07, "loss": 0.5833, "step": 5518 }, { "epoch": 0.89, "grad_norm": 1.0889974030524212, "learning_rate": 3.1801395296383097e-07, "loss": 0.6043, "step": 5519 }, { "epoch": 0.89, "grad_norm": 1.1099818604557212, "learning_rate": 3.1709872706283173e-07, "loss": 0.6092, "step": 5520 }, { "epoch": 0.89, "grad_norm": 0.9557354878248199, "learning_rate": 3.161847769132237e-07, "loss": 0.4865, "step": 5521 }, { "epoch": 0.89, "grad_norm": 1.0861089240106898, "learning_rate": 3.152721027639932e-07, "loss": 0.5698, "step": 5522 }, { "epoch": 0.89, "grad_norm": 0.6722680754856171, "learning_rate": 3.1436070486377755e-07, "loss": 0.4502, "step": 5523 }, { "epoch": 0.89, "grad_norm": 0.9789450373599586, "learning_rate": 3.134505834608653e-07, "loss": 0.5599, "step": 5524 }, { "epoch": 0.89, "grad_norm": 1.0434220240551915, "learning_rate": 3.125417388031998e-07, "loss": 0.5947, "step": 5525 }, { "epoch": 0.89, "grad_norm": 1.0219079322471198, "learning_rate": 3.116341711383758e-07, "loss": 0.5705, "step": 5526 }, { "epoch": 0.89, "grad_norm": 1.102326432719913, "learning_rate": 3.1072788071363936e-07, "loss": 0.5829, "step": 5527 }, { "epoch": 0.89, "grad_norm": 1.0237232203906357, "learning_rate": 3.0982286777588745e-07, "loss": 0.6001, "step": 5528 }, { "epoch": 0.89, "grad_norm": 1.0660494984694744, "learning_rate": 3.089191325716728e-07, "loss": 0.5844, "step": 5529 }, { "epoch": 0.89, "grad_norm": 1.1341907065540477, "learning_rate": 3.0801667534719694e-07, "loss": 0.5658, "step": 5530 }, { "epoch": 0.89, "grad_norm": 1.0596006387971575, "learning_rate": 3.0711549634831364e-07, "loss": 0.6221, "step": 5531 }, { "epoch": 0.89, "grad_norm": 0.934146198553734, "learning_rate": 3.062155958205293e-07, "loss": 0.5726, "step": 5532 }, { "epoch": 0.89, "grad_norm": 0.6698126600777463, "learning_rate": 3.0531697400900117e-07, "loss": 0.4833, "step": 5533 }, { "epoch": 0.89, "grad_norm": 0.6466520817538444, "learning_rate": 3.044196311585401e-07, "loss": 0.4359, "step": 5534 }, { "epoch": 0.89, "grad_norm": 1.0625125926955123, "learning_rate": 3.0352356751360555e-07, "loss": 0.6247, "step": 5535 }, { "epoch": 0.89, "grad_norm": 0.9714020826596609, "learning_rate": 3.0262878331830946e-07, "loss": 0.543, "step": 5536 }, { "epoch": 0.89, "grad_norm": 1.0035036528019892, "learning_rate": 3.017352788164174e-07, "loss": 0.5282, "step": 5537 }, { "epoch": 0.89, "grad_norm": 1.0665749750397815, "learning_rate": 3.0084305425134406e-07, "loss": 0.6551, "step": 5538 }, { "epoch": 0.89, "grad_norm": 0.9332740941892883, "learning_rate": 2.999521098661551e-07, "loss": 0.4979, "step": 5539 }, { "epoch": 0.89, "grad_norm": 1.0170626745398943, "learning_rate": 2.990624459035685e-07, "loss": 0.5667, "step": 5540 }, { "epoch": 0.89, "grad_norm": 1.0543273011101513, "learning_rate": 2.981740626059548e-07, "loss": 0.5513, "step": 5541 }, { "epoch": 0.89, "grad_norm": 1.0200112625730604, "learning_rate": 2.972869602153333e-07, "loss": 0.581, "step": 5542 }, { "epoch": 0.89, "grad_norm": 1.0661442339344485, "learning_rate": 2.964011389733734e-07, "loss": 0.5549, "step": 5543 }, { "epoch": 0.89, "grad_norm": 0.9831828250171059, "learning_rate": 2.955165991214004e-07, "loss": 0.4838, "step": 5544 }, { "epoch": 0.89, "grad_norm": 1.0084705064476842, "learning_rate": 2.946333409003849e-07, "loss": 0.6019, "step": 5545 }, { "epoch": 0.89, "grad_norm": 1.0543011798622692, "learning_rate": 2.93751364550951e-07, "loss": 0.5785, "step": 5546 }, { "epoch": 0.89, "grad_norm": 1.0011303562065086, "learning_rate": 2.928706703133738e-07, "loss": 0.531, "step": 5547 }, { "epoch": 0.89, "grad_norm": 1.1228052869296419, "learning_rate": 2.919912584275797e-07, "loss": 0.5695, "step": 5548 }, { "epoch": 0.89, "grad_norm": 0.6572791098733273, "learning_rate": 2.911131291331437e-07, "loss": 0.4781, "step": 5549 }, { "epoch": 0.89, "grad_norm": 1.0779232607196123, "learning_rate": 2.902362826692923e-07, "loss": 0.6499, "step": 5550 }, { "epoch": 0.89, "grad_norm": 0.9756223479285945, "learning_rate": 2.8936071927490317e-07, "loss": 0.5049, "step": 5551 }, { "epoch": 0.89, "grad_norm": 0.9612104154297084, "learning_rate": 2.884864391885034e-07, "loss": 0.5759, "step": 5552 }, { "epoch": 0.89, "grad_norm": 1.0854430277396139, "learning_rate": 2.876134426482713e-07, "loss": 0.6429, "step": 5553 }, { "epoch": 0.89, "grad_norm": 1.0270442528963726, "learning_rate": 2.8674172989203497e-07, "loss": 0.5622, "step": 5554 }, { "epoch": 0.9, "grad_norm": 1.0382850024689978, "learning_rate": 2.858713011572728e-07, "loss": 0.6394, "step": 5555 }, { "epoch": 0.9, "grad_norm": 0.9819674903429195, "learning_rate": 2.850021566811145e-07, "loss": 0.561, "step": 5556 }, { "epoch": 0.9, "grad_norm": 0.6638302144387936, "learning_rate": 2.8413429670033855e-07, "loss": 0.4642, "step": 5557 }, { "epoch": 0.9, "grad_norm": 1.061859273924965, "learning_rate": 2.832677214513735e-07, "loss": 0.6811, "step": 5558 }, { "epoch": 0.9, "grad_norm": 0.9164145949177618, "learning_rate": 2.8240243117029885e-07, "loss": 0.4756, "step": 5559 }, { "epoch": 0.9, "grad_norm": 0.9408193412622898, "learning_rate": 2.815384260928433e-07, "loss": 0.5358, "step": 5560 }, { "epoch": 0.9, "grad_norm": 1.083725922040101, "learning_rate": 2.8067570645438467e-07, "loss": 0.575, "step": 5561 }, { "epoch": 0.9, "grad_norm": 1.0861298558444912, "learning_rate": 2.7981427248995276e-07, "loss": 0.5339, "step": 5562 }, { "epoch": 0.9, "grad_norm": 1.0973214650411918, "learning_rate": 2.7895412443422644e-07, "loss": 0.5715, "step": 5563 }, { "epoch": 0.9, "grad_norm": 1.0328810127482706, "learning_rate": 2.780952625215333e-07, "loss": 0.5915, "step": 5564 }, { "epoch": 0.9, "grad_norm": 1.0181382063910152, "learning_rate": 2.772376869858495e-07, "loss": 0.5834, "step": 5565 }, { "epoch": 0.9, "grad_norm": 1.063112432318683, "learning_rate": 2.7638139806080435e-07, "loss": 0.5821, "step": 5566 }, { "epoch": 0.9, "grad_norm": 1.1131037332401148, "learning_rate": 2.755263959796739e-07, "loss": 0.665, "step": 5567 }, { "epoch": 0.9, "grad_norm": 1.1665945481067514, "learning_rate": 2.746726809753836e-07, "loss": 0.7384, "step": 5568 }, { "epoch": 0.9, "grad_norm": 0.9609751344829549, "learning_rate": 2.73820253280509e-07, "loss": 0.5228, "step": 5569 }, { "epoch": 0.9, "grad_norm": 0.9613750889826922, "learning_rate": 2.72969113127276e-07, "loss": 0.4558, "step": 5570 }, { "epoch": 0.9, "grad_norm": 1.151759932071961, "learning_rate": 2.7211926074755755e-07, "loss": 0.7, "step": 5571 }, { "epoch": 0.9, "grad_norm": 0.9782703907592869, "learning_rate": 2.712706963728773e-07, "loss": 0.5761, "step": 5572 }, { "epoch": 0.9, "grad_norm": 0.9836208568725887, "learning_rate": 2.7042342023440803e-07, "loss": 0.5053, "step": 5573 }, { "epoch": 0.9, "grad_norm": 1.0118058314909428, "learning_rate": 2.6957743256297077e-07, "loss": 0.5513, "step": 5574 }, { "epoch": 0.9, "grad_norm": 0.9444850233991927, "learning_rate": 2.687327335890344e-07, "loss": 0.5098, "step": 5575 }, { "epoch": 0.9, "grad_norm": 0.6828821285754091, "learning_rate": 2.6788932354272104e-07, "loss": 0.4866, "step": 5576 }, { "epoch": 0.9, "grad_norm": 1.0841197724809626, "learning_rate": 2.6704720265379626e-07, "loss": 0.5151, "step": 5577 }, { "epoch": 0.9, "grad_norm": 1.0110367903493274, "learning_rate": 2.662063711516788e-07, "loss": 0.5921, "step": 5578 }, { "epoch": 0.9, "grad_norm": 1.0393891423510995, "learning_rate": 2.653668292654338e-07, "loss": 0.5062, "step": 5579 }, { "epoch": 0.9, "grad_norm": 0.9965735627587826, "learning_rate": 2.645285772237743e-07, "loss": 0.5224, "step": 5580 }, { "epoch": 0.9, "grad_norm": 0.9653412034617587, "learning_rate": 2.636916152550656e-07, "loss": 0.514, "step": 5581 }, { "epoch": 0.9, "grad_norm": 1.0418712480906847, "learning_rate": 2.6285594358731736e-07, "loss": 0.5581, "step": 5582 }, { "epoch": 0.9, "grad_norm": 1.0458123010412255, "learning_rate": 2.6202156244819035e-07, "loss": 0.5265, "step": 5583 }, { "epoch": 0.9, "grad_norm": 1.0082290737007817, "learning_rate": 2.611884720649921e-07, "loss": 0.6331, "step": 5584 }, { "epoch": 0.9, "grad_norm": 1.0674215917161387, "learning_rate": 2.6035667266468166e-07, "loss": 0.6253, "step": 5585 }, { "epoch": 0.9, "grad_norm": 1.0692431636148754, "learning_rate": 2.595261644738617e-07, "loss": 0.6049, "step": 5586 }, { "epoch": 0.9, "grad_norm": 0.9183243702528002, "learning_rate": 2.586969477187867e-07, "loss": 0.3919, "step": 5587 }, { "epoch": 0.9, "grad_norm": 0.9746585461481831, "learning_rate": 2.578690226253583e-07, "loss": 0.539, "step": 5588 }, { "epoch": 0.9, "grad_norm": 1.0121643076304234, "learning_rate": 2.570423894191254e-07, "loss": 0.482, "step": 5589 }, { "epoch": 0.9, "grad_norm": 0.9161540587789992, "learning_rate": 2.5621704832528573e-07, "loss": 0.595, "step": 5590 }, { "epoch": 0.9, "grad_norm": 1.0148002662535323, "learning_rate": 2.553929995686849e-07, "loss": 0.5615, "step": 5591 }, { "epoch": 0.9, "grad_norm": 1.0056509928856485, "learning_rate": 2.545702433738184e-07, "loss": 0.5691, "step": 5592 }, { "epoch": 0.9, "grad_norm": 1.0602073594496693, "learning_rate": 2.537487799648247e-07, "loss": 0.5574, "step": 5593 }, { "epoch": 0.9, "grad_norm": 1.0719631001202896, "learning_rate": 2.529286095654948e-07, "loss": 0.5199, "step": 5594 }, { "epoch": 0.9, "grad_norm": 1.0166341660136442, "learning_rate": 2.5210973239926553e-07, "loss": 0.5486, "step": 5595 }, { "epoch": 0.9, "grad_norm": 0.6271309046555144, "learning_rate": 2.5129214868922227e-07, "loss": 0.4733, "step": 5596 }, { "epoch": 0.9, "grad_norm": 0.9649522661143487, "learning_rate": 2.504758586580952e-07, "loss": 0.5531, "step": 5597 }, { "epoch": 0.9, "grad_norm": 1.0061987427835428, "learning_rate": 2.4966086252826706e-07, "loss": 0.578, "step": 5598 }, { "epoch": 0.9, "grad_norm": 1.029425221736268, "learning_rate": 2.4884716052176237e-07, "loss": 0.673, "step": 5599 }, { "epoch": 0.9, "grad_norm": 1.0507613788297858, "learning_rate": 2.480347528602589e-07, "loss": 0.5702, "step": 5600 }, { "epoch": 0.9, "grad_norm": 0.9368570068431507, "learning_rate": 2.4722363976507677e-07, "loss": 0.5648, "step": 5601 }, { "epoch": 0.9, "grad_norm": 0.9922137994193313, "learning_rate": 2.4641382145718597e-07, "loss": 0.5856, "step": 5602 }, { "epoch": 0.9, "grad_norm": 0.9799842390263321, "learning_rate": 2.456052981572038e-07, "loss": 0.6119, "step": 5603 }, { "epoch": 0.9, "grad_norm": 1.2005713443172104, "learning_rate": 2.447980700853941e-07, "loss": 0.6263, "step": 5604 }, { "epoch": 0.9, "grad_norm": 0.9525423913853822, "learning_rate": 2.439921374616683e-07, "loss": 0.5675, "step": 5605 }, { "epoch": 0.9, "grad_norm": 1.0353348777385614, "learning_rate": 2.4318750050558336e-07, "loss": 0.6561, "step": 5606 }, { "epoch": 0.9, "grad_norm": 1.0041666195653278, "learning_rate": 2.423841594363469e-07, "loss": 0.5906, "step": 5607 }, { "epoch": 0.9, "grad_norm": 0.9715725656550747, "learning_rate": 2.415821144728092e-07, "loss": 0.5057, "step": 5608 }, { "epoch": 0.9, "grad_norm": 1.1820797523350357, "learning_rate": 2.4078136583347014e-07, "loss": 0.5647, "step": 5609 }, { "epoch": 0.9, "grad_norm": 0.9796705537021708, "learning_rate": 2.399819137364756e-07, "loss": 0.4694, "step": 5610 }, { "epoch": 0.9, "grad_norm": 0.9347358315090242, "learning_rate": 2.3918375839961927e-07, "loss": 0.5299, "step": 5611 }, { "epoch": 0.9, "grad_norm": 0.9991540671700192, "learning_rate": 2.3838690004033826e-07, "loss": 0.5376, "step": 5612 }, { "epoch": 0.9, "grad_norm": 1.1310369867254366, "learning_rate": 2.3759133887572117e-07, "loss": 0.6034, "step": 5613 }, { "epoch": 0.9, "grad_norm": 0.9215603401557225, "learning_rate": 2.3679707512250072e-07, "loss": 0.5203, "step": 5614 }, { "epoch": 0.9, "grad_norm": 1.0514627069091567, "learning_rate": 2.3600410899705383e-07, "loss": 0.5541, "step": 5615 }, { "epoch": 0.9, "grad_norm": 1.0350499966833382, "learning_rate": 2.3521244071540883e-07, "loss": 0.5925, "step": 5616 }, { "epoch": 0.91, "grad_norm": 1.031460914229301, "learning_rate": 2.3442207049323707e-07, "loss": 0.5897, "step": 5617 }, { "epoch": 0.91, "grad_norm": 1.0078291330860885, "learning_rate": 2.3363299854585742e-07, "loss": 0.4935, "step": 5618 }, { "epoch": 0.91, "grad_norm": 1.0064122850718928, "learning_rate": 2.3284522508823403e-07, "loss": 0.5396, "step": 5619 }, { "epoch": 0.91, "grad_norm": 1.057485856689331, "learning_rate": 2.320587503349797e-07, "loss": 0.544, "step": 5620 }, { "epoch": 0.91, "grad_norm": 0.9600073434806452, "learning_rate": 2.3127357450035025e-07, "loss": 0.495, "step": 5621 }, { "epoch": 0.91, "grad_norm": 1.0419411379111172, "learning_rate": 2.304896977982496e-07, "loss": 0.5436, "step": 5622 }, { "epoch": 0.91, "grad_norm": 0.6266454025245999, "learning_rate": 2.2970712044222742e-07, "loss": 0.4467, "step": 5623 }, { "epoch": 0.91, "grad_norm": 1.0552041361041875, "learning_rate": 2.2892584264548046e-07, "loss": 0.6166, "step": 5624 }, { "epoch": 0.91, "grad_norm": 1.0578903791416672, "learning_rate": 2.2814586462084897e-07, "loss": 0.5426, "step": 5625 }, { "epoch": 0.91, "grad_norm": 0.6218668535050852, "learning_rate": 2.2736718658082025e-07, "loss": 0.4685, "step": 5626 }, { "epoch": 0.91, "grad_norm": 1.0308873685227118, "learning_rate": 2.2658980873752902e-07, "loss": 0.5072, "step": 5627 }, { "epoch": 0.91, "grad_norm": 0.9872595760687692, "learning_rate": 2.2581373130275307e-07, "loss": 0.6359, "step": 5628 }, { "epoch": 0.91, "grad_norm": 0.9461594560136964, "learning_rate": 2.250389544879189e-07, "loss": 0.5133, "step": 5629 }, { "epoch": 0.91, "grad_norm": 0.9512881904970983, "learning_rate": 2.2426547850409598e-07, "loss": 0.5308, "step": 5630 }, { "epoch": 0.91, "grad_norm": 1.0140743877186862, "learning_rate": 2.234933035619996e-07, "loss": 0.5793, "step": 5631 }, { "epoch": 0.91, "grad_norm": 0.9687308362978071, "learning_rate": 2.2272242987199379e-07, "loss": 0.5817, "step": 5632 }, { "epoch": 0.91, "grad_norm": 1.0397100908649268, "learning_rate": 2.219528576440838e-07, "loss": 0.5632, "step": 5633 }, { "epoch": 0.91, "grad_norm": 0.9770546176333865, "learning_rate": 2.2118458708792256e-07, "loss": 0.552, "step": 5634 }, { "epoch": 0.91, "grad_norm": 0.9738079319773267, "learning_rate": 2.2041761841280873e-07, "loss": 0.5078, "step": 5635 }, { "epoch": 0.91, "grad_norm": 0.7228201611518009, "learning_rate": 2.196519518276874e-07, "loss": 0.4828, "step": 5636 }, { "epoch": 0.91, "grad_norm": 1.0118924593312792, "learning_rate": 2.1888758754114336e-07, "loss": 0.5149, "step": 5637 }, { "epoch": 0.91, "grad_norm": 0.9808364047946521, "learning_rate": 2.1812452576141285e-07, "loss": 0.5455, "step": 5638 }, { "epoch": 0.91, "grad_norm": 1.1022571526277614, "learning_rate": 2.1736276669637568e-07, "loss": 0.6014, "step": 5639 }, { "epoch": 0.91, "grad_norm": 0.965735149413908, "learning_rate": 2.1660231055355473e-07, "loss": 0.5153, "step": 5640 }, { "epoch": 0.91, "grad_norm": 0.9606420238012073, "learning_rate": 2.1584315754011876e-07, "loss": 0.5643, "step": 5641 }, { "epoch": 0.91, "grad_norm": 1.0174527289956823, "learning_rate": 2.1508530786288284e-07, "loss": 0.5055, "step": 5642 }, { "epoch": 0.91, "grad_norm": 1.1231591946640616, "learning_rate": 2.143287617283074e-07, "loss": 0.6075, "step": 5643 }, { "epoch": 0.91, "grad_norm": 1.0155962549875126, "learning_rate": 2.135735193424937e-07, "loss": 0.4912, "step": 5644 }, { "epoch": 0.91, "grad_norm": 0.9496194154178266, "learning_rate": 2.1281958091119215e-07, "loss": 0.6203, "step": 5645 }, { "epoch": 0.91, "grad_norm": 1.0177730312752264, "learning_rate": 2.1206694663979676e-07, "loss": 0.566, "step": 5646 }, { "epoch": 0.91, "grad_norm": 0.9769314558605976, "learning_rate": 2.113156167333452e-07, "loss": 0.6019, "step": 5647 }, { "epoch": 0.91, "grad_norm": 1.043883333046884, "learning_rate": 2.105655913965199e-07, "loss": 0.5344, "step": 5648 }, { "epoch": 0.91, "grad_norm": 1.0401883514881405, "learning_rate": 2.0981687083364955e-07, "loss": 0.5224, "step": 5649 }, { "epoch": 0.91, "grad_norm": 1.0067410360268318, "learning_rate": 2.0906945524870493e-07, "loss": 0.5839, "step": 5650 }, { "epoch": 0.91, "grad_norm": 0.9395106253423855, "learning_rate": 2.0832334484530424e-07, "loss": 0.5796, "step": 5651 }, { "epoch": 0.91, "grad_norm": 1.1258581587172127, "learning_rate": 2.0757853982670773e-07, "loss": 0.6504, "step": 5652 }, { "epoch": 0.91, "grad_norm": 0.947602699409233, "learning_rate": 2.0683504039582024e-07, "loss": 0.5901, "step": 5653 }, { "epoch": 0.91, "grad_norm": 0.6669612913731927, "learning_rate": 2.0609284675519315e-07, "loss": 0.4684, "step": 5654 }, { "epoch": 0.91, "grad_norm": 1.0117106221943026, "learning_rate": 2.053519591070191e-07, "loss": 0.4657, "step": 5655 }, { "epoch": 0.91, "grad_norm": 1.0419734772538751, "learning_rate": 2.0461237765313612e-07, "loss": 0.5384, "step": 5656 }, { "epoch": 0.91, "grad_norm": 1.0364370514219166, "learning_rate": 2.0387410259502692e-07, "loss": 0.644, "step": 5657 }, { "epoch": 0.91, "grad_norm": 0.6109087541913896, "learning_rate": 2.0313713413381952e-07, "loss": 0.4458, "step": 5658 }, { "epoch": 0.91, "grad_norm": 1.0136842510244017, "learning_rate": 2.0240147247028164e-07, "loss": 0.5403, "step": 5659 }, { "epoch": 0.91, "grad_norm": 1.047935503805689, "learning_rate": 2.016671178048296e-07, "loss": 0.5809, "step": 5660 }, { "epoch": 0.91, "grad_norm": 1.0568607901464904, "learning_rate": 2.0093407033752178e-07, "loss": 0.5509, "step": 5661 }, { "epoch": 0.91, "grad_norm": 0.9536937772409713, "learning_rate": 2.0020233026806003e-07, "loss": 0.4873, "step": 5662 }, { "epoch": 0.91, "grad_norm": 1.016068484907409, "learning_rate": 1.9947189779579047e-07, "loss": 0.5177, "step": 5663 }, { "epoch": 0.91, "grad_norm": 1.01372866943647, "learning_rate": 1.9874277311970335e-07, "loss": 0.5705, "step": 5664 }, { "epoch": 0.91, "grad_norm": 1.0964453122596354, "learning_rate": 1.9801495643843316e-07, "loss": 0.6649, "step": 5665 }, { "epoch": 0.91, "grad_norm": 1.0507715524085735, "learning_rate": 1.972884479502557e-07, "loss": 0.494, "step": 5666 }, { "epoch": 0.91, "grad_norm": 0.9795147367244392, "learning_rate": 1.965632478530921e-07, "loss": 0.5049, "step": 5667 }, { "epoch": 0.91, "grad_norm": 0.918702237024255, "learning_rate": 1.9583935634450768e-07, "loss": 0.5642, "step": 5668 }, { "epoch": 0.91, "grad_norm": 1.108550353338122, "learning_rate": 1.9511677362171078e-07, "loss": 0.5915, "step": 5669 }, { "epoch": 0.91, "grad_norm": 1.0336367266425566, "learning_rate": 1.943954998815506e-07, "loss": 0.6107, "step": 5670 }, { "epoch": 0.91, "grad_norm": 1.0163066736216737, "learning_rate": 1.9367553532052497e-07, "loss": 0.5603, "step": 5671 }, { "epoch": 0.91, "grad_norm": 0.9958782952940909, "learning_rate": 1.9295688013477031e-07, "loss": 0.5335, "step": 5672 }, { "epoch": 0.91, "grad_norm": 1.0419059487236775, "learning_rate": 1.9223953452006784e-07, "loss": 0.6191, "step": 5673 }, { "epoch": 0.91, "grad_norm": 0.9867827408125353, "learning_rate": 1.915234986718434e-07, "loss": 0.583, "step": 5674 }, { "epoch": 0.91, "grad_norm": 1.1035805609924016, "learning_rate": 1.908087727851632e-07, "loss": 0.5909, "step": 5675 }, { "epoch": 0.91, "grad_norm": 1.0640638087239234, "learning_rate": 1.9009535705474091e-07, "loss": 0.6196, "step": 5676 }, { "epoch": 0.91, "grad_norm": 0.9856650935100841, "learning_rate": 1.8938325167492888e-07, "loss": 0.6122, "step": 5677 }, { "epoch": 0.91, "grad_norm": 1.0406641386203577, "learning_rate": 1.8867245683972358e-07, "loss": 0.5901, "step": 5678 }, { "epoch": 0.92, "grad_norm": 1.0553211433519176, "learning_rate": 1.8796297274276677e-07, "loss": 0.6245, "step": 5679 }, { "epoch": 0.92, "grad_norm": 0.9562490584410693, "learning_rate": 1.8725479957734105e-07, "loss": 0.5086, "step": 5680 }, { "epoch": 0.92, "grad_norm": 1.023846390618494, "learning_rate": 1.8654793753637156e-07, "loss": 0.5852, "step": 5681 }, { "epoch": 0.92, "grad_norm": 1.0790488532999576, "learning_rate": 1.8584238681242705e-07, "loss": 0.5812, "step": 5682 }, { "epoch": 0.92, "grad_norm": 1.0347801637233838, "learning_rate": 1.8513814759772042e-07, "loss": 0.5234, "step": 5683 }, { "epoch": 0.92, "grad_norm": 0.9608763991198356, "learning_rate": 1.844352200841043e-07, "loss": 0.5021, "step": 5684 }, { "epoch": 0.92, "grad_norm": 1.0555304915445853, "learning_rate": 1.8373360446307608e-07, "loss": 0.5191, "step": 5685 }, { "epoch": 0.92, "grad_norm": 0.9638197428882407, "learning_rate": 1.8303330092577453e-07, "loss": 0.5335, "step": 5686 }, { "epoch": 0.92, "grad_norm": 1.072581173056278, "learning_rate": 1.823343096629837e-07, "loss": 0.6369, "step": 5687 }, { "epoch": 0.92, "grad_norm": 1.0848625696674354, "learning_rate": 1.8163663086512574e-07, "loss": 0.4998, "step": 5688 }, { "epoch": 0.92, "grad_norm": 0.9879044467968758, "learning_rate": 1.80940264722268e-07, "loss": 0.6271, "step": 5689 }, { "epoch": 0.92, "grad_norm": 1.003648369682334, "learning_rate": 1.8024521142412155e-07, "loss": 0.6057, "step": 5690 }, { "epoch": 0.92, "grad_norm": 0.7008734723409707, "learning_rate": 1.7955147116003603e-07, "loss": 0.4891, "step": 5691 }, { "epoch": 0.92, "grad_norm": 0.942205448049128, "learning_rate": 1.7885904411900633e-07, "loss": 0.5698, "step": 5692 }, { "epoch": 0.92, "grad_norm": 0.9677739316429045, "learning_rate": 1.7816793048966884e-07, "loss": 0.6258, "step": 5693 }, { "epoch": 0.92, "grad_norm": 1.330364472853426, "learning_rate": 1.7747813046030184e-07, "loss": 0.5856, "step": 5694 }, { "epoch": 0.92, "grad_norm": 0.9871574962536415, "learning_rate": 1.7678964421882494e-07, "loss": 0.5688, "step": 5695 }, { "epoch": 0.92, "grad_norm": 1.0537375851310344, "learning_rate": 1.7610247195280207e-07, "loss": 0.5908, "step": 5696 }, { "epoch": 0.92, "grad_norm": 0.9527516972962567, "learning_rate": 1.7541661384943676e-07, "loss": 0.5401, "step": 5697 }, { "epoch": 0.92, "grad_norm": 0.9673482218804622, "learning_rate": 1.747320700955768e-07, "loss": 0.6761, "step": 5698 }, { "epoch": 0.92, "grad_norm": 1.0495548375639372, "learning_rate": 1.7404884087771023e-07, "loss": 0.5342, "step": 5699 }, { "epoch": 0.92, "grad_norm": 1.0395578120297386, "learning_rate": 1.7336692638196706e-07, "loss": 0.5758, "step": 5700 }, { "epoch": 0.92, "grad_norm": 1.0892405403178511, "learning_rate": 1.726863267941209e-07, "loss": 0.5799, "step": 5701 }, { "epoch": 0.92, "grad_norm": 0.9698192351580793, "learning_rate": 1.72007042299584e-07, "loss": 0.5248, "step": 5702 }, { "epoch": 0.92, "grad_norm": 1.0042716680228556, "learning_rate": 1.7132907308341383e-07, "loss": 0.626, "step": 5703 }, { "epoch": 0.92, "grad_norm": 1.0439854914496418, "learning_rate": 1.7065241933030664e-07, "loss": 0.6124, "step": 5704 }, { "epoch": 0.92, "grad_norm": 1.2328991660419708, "learning_rate": 1.6997708122460322e-07, "loss": 0.591, "step": 5705 }, { "epoch": 0.92, "grad_norm": 0.9988965526524847, "learning_rate": 1.6930305895028255e-07, "loss": 0.5668, "step": 5706 }, { "epoch": 0.92, "grad_norm": 0.9627770542481197, "learning_rate": 1.6863035269096774e-07, "loss": 0.5059, "step": 5707 }, { "epoch": 0.92, "grad_norm": 0.9788831261499522, "learning_rate": 1.6795896262992162e-07, "loss": 0.4812, "step": 5708 }, { "epoch": 0.92, "grad_norm": 0.9499693562799871, "learning_rate": 1.672888889500518e-07, "loss": 0.5326, "step": 5709 }, { "epoch": 0.92, "grad_norm": 1.0064958003630278, "learning_rate": 1.6662013183390112e-07, "loss": 0.5909, "step": 5710 }, { "epoch": 0.92, "grad_norm": 1.2256419873903477, "learning_rate": 1.6595269146366e-07, "loss": 0.5797, "step": 5711 }, { "epoch": 0.92, "grad_norm": 1.0126687153308378, "learning_rate": 1.6528656802115794e-07, "loss": 0.5904, "step": 5712 }, { "epoch": 0.92, "grad_norm": 1.063817697119845, "learning_rate": 1.6462176168786371e-07, "loss": 0.5932, "step": 5713 }, { "epoch": 0.92, "grad_norm": 0.9987999802790458, "learning_rate": 1.6395827264488962e-07, "loss": 0.4862, "step": 5714 }, { "epoch": 0.92, "grad_norm": 0.9052428848949725, "learning_rate": 1.6329610107298833e-07, "loss": 0.4977, "step": 5715 }, { "epoch": 0.92, "grad_norm": 1.0204585195475446, "learning_rate": 1.6263524715255442e-07, "loss": 0.5578, "step": 5716 }, { "epoch": 0.92, "grad_norm": 1.0455663224062182, "learning_rate": 1.6197571106362054e-07, "loss": 0.5564, "step": 5717 }, { "epoch": 0.92, "grad_norm": 1.036440472074687, "learning_rate": 1.6131749298586464e-07, "loss": 0.5709, "step": 5718 }, { "epoch": 0.92, "grad_norm": 1.0665461923133959, "learning_rate": 1.6066059309860273e-07, "loss": 0.639, "step": 5719 }, { "epoch": 0.92, "grad_norm": 0.9453739533683498, "learning_rate": 1.6000501158079217e-07, "loss": 0.5597, "step": 5720 }, { "epoch": 0.92, "grad_norm": 0.9294138299084963, "learning_rate": 1.5935074861103183e-07, "loss": 0.5368, "step": 5721 }, { "epoch": 0.92, "grad_norm": 1.0026262200191232, "learning_rate": 1.5869780436756078e-07, "loss": 0.6077, "step": 5722 }, { "epoch": 0.92, "grad_norm": 1.0199138356466153, "learning_rate": 1.5804617902825948e-07, "loss": 0.5314, "step": 5723 }, { "epoch": 0.92, "grad_norm": 0.6799222943976461, "learning_rate": 1.573958727706476e-07, "loss": 0.4525, "step": 5724 }, { "epoch": 0.92, "grad_norm": 0.9878787957496313, "learning_rate": 1.567468857718879e-07, "loss": 0.5243, "step": 5725 }, { "epoch": 0.92, "grad_norm": 1.0691929765081982, "learning_rate": 1.560992182087806e-07, "loss": 0.6273, "step": 5726 }, { "epoch": 0.92, "grad_norm": 0.9819726395135023, "learning_rate": 1.554528702577701e-07, "loss": 0.5903, "step": 5727 }, { "epoch": 0.92, "grad_norm": 0.9773675062929092, "learning_rate": 1.5480784209493837e-07, "loss": 0.5846, "step": 5728 }, { "epoch": 0.92, "grad_norm": 0.9414705551181769, "learning_rate": 1.5416413389600816e-07, "loss": 0.572, "step": 5729 }, { "epoch": 0.92, "grad_norm": 1.1266514391302263, "learning_rate": 1.5352174583634526e-07, "loss": 0.6172, "step": 5730 }, { "epoch": 0.92, "grad_norm": 1.0425727466367103, "learning_rate": 1.5288067809095196e-07, "loss": 0.6121, "step": 5731 }, { "epoch": 0.92, "grad_norm": 1.0771211729645591, "learning_rate": 1.5224093083447354e-07, "loss": 0.6217, "step": 5732 }, { "epoch": 0.92, "grad_norm": 0.9689854485630093, "learning_rate": 1.5160250424119505e-07, "loss": 0.4691, "step": 5733 }, { "epoch": 0.92, "grad_norm": 1.0183055889981414, "learning_rate": 1.509653984850412e-07, "loss": 0.5269, "step": 5734 }, { "epoch": 0.92, "grad_norm": 0.9650791639252029, "learning_rate": 1.5032961373957766e-07, "loss": 0.5759, "step": 5735 }, { "epoch": 0.92, "grad_norm": 0.5965172794468568, "learning_rate": 1.496951501780086e-07, "loss": 0.4555, "step": 5736 }, { "epoch": 0.92, "grad_norm": 1.142808534888976, "learning_rate": 1.490620079731808e-07, "loss": 0.5938, "step": 5737 }, { "epoch": 0.92, "grad_norm": 1.0493650741030391, "learning_rate": 1.4843018729757853e-07, "loss": 0.5589, "step": 5738 }, { "epoch": 0.92, "grad_norm": 1.0182513073668857, "learning_rate": 1.4779968832332737e-07, "loss": 0.615, "step": 5739 }, { "epoch": 0.92, "grad_norm": 0.9343969332887979, "learning_rate": 1.4717051122219272e-07, "loss": 0.506, "step": 5740 }, { "epoch": 0.92, "grad_norm": 1.016287246042637, "learning_rate": 1.4654265616557973e-07, "loss": 0.5642, "step": 5741 }, { "epoch": 0.93, "grad_norm": 0.972119054787757, "learning_rate": 1.459161233245343e-07, "loss": 0.52, "step": 5742 }, { "epoch": 0.93, "grad_norm": 0.9775662336005666, "learning_rate": 1.4529091286973994e-07, "loss": 0.5977, "step": 5743 }, { "epoch": 0.93, "grad_norm": 1.0513593450720808, "learning_rate": 1.4466702497152208e-07, "loss": 0.5526, "step": 5744 }, { "epoch": 0.93, "grad_norm": 1.061408736468417, "learning_rate": 1.4404445979984473e-07, "loss": 0.6584, "step": 5745 }, { "epoch": 0.93, "grad_norm": 0.9780178226874634, "learning_rate": 1.434232175243111e-07, "loss": 0.5526, "step": 5746 }, { "epoch": 0.93, "grad_norm": 0.9869974116198398, "learning_rate": 1.4280329831416585e-07, "loss": 0.5885, "step": 5747 }, { "epoch": 0.93, "grad_norm": 1.0554280892191013, "learning_rate": 1.4218470233829107e-07, "loss": 0.5779, "step": 5748 }, { "epoch": 0.93, "grad_norm": 1.0415155528439948, "learning_rate": 1.415674297652103e-07, "loss": 0.5735, "step": 5749 }, { "epoch": 0.93, "grad_norm": 1.0331095676887971, "learning_rate": 1.4095148076308518e-07, "loss": 0.6498, "step": 5750 }, { "epoch": 0.93, "grad_norm": 1.0421736968461854, "learning_rate": 1.4033685549971643e-07, "loss": 0.5419, "step": 5751 }, { "epoch": 0.93, "grad_norm": 1.0816728992584639, "learning_rate": 1.397235541425468e-07, "loss": 0.6044, "step": 5752 }, { "epoch": 0.93, "grad_norm": 1.122301880380358, "learning_rate": 1.3911157685865483e-07, "loss": 0.6248, "step": 5753 }, { "epoch": 0.93, "grad_norm": 1.1438208355283332, "learning_rate": 1.3850092381476e-07, "loss": 0.5874, "step": 5754 }, { "epoch": 0.93, "grad_norm": 0.9871263683905064, "learning_rate": 1.3789159517722138e-07, "loss": 0.589, "step": 5755 }, { "epoch": 0.93, "grad_norm": 1.0060943984793174, "learning_rate": 1.3728359111203792e-07, "loss": 0.5474, "step": 5756 }, { "epoch": 0.93, "grad_norm": 1.0107584831464644, "learning_rate": 1.3667691178484598e-07, "loss": 0.5677, "step": 5757 }, { "epoch": 0.93, "grad_norm": 1.0161817245342006, "learning_rate": 1.360715573609206e-07, "loss": 0.5572, "step": 5758 }, { "epoch": 0.93, "grad_norm": 1.0308128861082348, "learning_rate": 1.354675280051787e-07, "loss": 0.5347, "step": 5759 }, { "epoch": 0.93, "grad_norm": 0.9786172304288007, "learning_rate": 1.348648238821737e-07, "loss": 0.5249, "step": 5760 }, { "epoch": 0.93, "grad_norm": 0.6227954474031245, "learning_rate": 1.3426344515609813e-07, "loss": 0.4707, "step": 5761 }, { "epoch": 0.93, "grad_norm": 0.6588872747239863, "learning_rate": 1.3366339199078538e-07, "loss": 0.4359, "step": 5762 }, { "epoch": 0.93, "grad_norm": 1.0073973195451362, "learning_rate": 1.3306466454970634e-07, "loss": 0.5871, "step": 5763 }, { "epoch": 0.93, "grad_norm": 1.0397322987053335, "learning_rate": 1.3246726299597058e-07, "loss": 0.6385, "step": 5764 }, { "epoch": 0.93, "grad_norm": 1.063443920543598, "learning_rate": 1.318711874923262e-07, "loss": 0.6177, "step": 5765 }, { "epoch": 0.93, "grad_norm": 1.0609358052534776, "learning_rate": 1.312764382011611e-07, "loss": 0.6091, "step": 5766 }, { "epoch": 0.93, "grad_norm": 1.0591393382130974, "learning_rate": 1.3068301528450178e-07, "loss": 0.6093, "step": 5767 }, { "epoch": 0.93, "grad_norm": 0.9592990295281489, "learning_rate": 1.3009091890401226e-07, "loss": 0.4768, "step": 5768 }, { "epoch": 0.93, "grad_norm": 1.1286418862057355, "learning_rate": 1.2950014922099685e-07, "loss": 0.67, "step": 5769 }, { "epoch": 0.93, "grad_norm": 1.0664278554648636, "learning_rate": 1.2891070639639625e-07, "loss": 0.5342, "step": 5770 }, { "epoch": 0.93, "grad_norm": 0.9826537464362525, "learning_rate": 1.28322590590792e-07, "loss": 0.5697, "step": 5771 }, { "epoch": 0.93, "grad_norm": 0.9403362853099366, "learning_rate": 1.2773580196440262e-07, "loss": 0.571, "step": 5772 }, { "epoch": 0.93, "grad_norm": 0.9683736661096697, "learning_rate": 1.271503406770852e-07, "loss": 0.5607, "step": 5773 }, { "epoch": 0.93, "grad_norm": 1.0866259033120709, "learning_rate": 1.265662068883361e-07, "loss": 0.5242, "step": 5774 }, { "epoch": 0.93, "grad_norm": 0.652245077830759, "learning_rate": 1.2598340075728967e-07, "loss": 0.4459, "step": 5775 }, { "epoch": 0.93, "grad_norm": 0.9748266944869681, "learning_rate": 1.254019224427172e-07, "loss": 0.5137, "step": 5776 }, { "epoch": 0.93, "grad_norm": 1.0717814362971791, "learning_rate": 1.2482177210303036e-07, "loss": 0.6168, "step": 5777 }, { "epoch": 0.93, "grad_norm": 1.0889820686915248, "learning_rate": 1.242429498962783e-07, "loss": 0.5683, "step": 5778 }, { "epoch": 0.93, "grad_norm": 1.0859607130585227, "learning_rate": 1.23665455980147e-07, "loss": 0.5559, "step": 5779 }, { "epoch": 0.93, "grad_norm": 1.0624214267699803, "learning_rate": 1.2308929051196296e-07, "loss": 0.5611, "step": 5780 }, { "epoch": 0.93, "grad_norm": 1.0238316214607182, "learning_rate": 1.2251445364868886e-07, "loss": 0.5959, "step": 5781 }, { "epoch": 0.93, "grad_norm": 0.676344168231846, "learning_rate": 1.2194094554692614e-07, "loss": 0.4512, "step": 5782 }, { "epoch": 0.93, "grad_norm": 1.0026834136636622, "learning_rate": 1.213687663629143e-07, "loss": 0.5587, "step": 5783 }, { "epoch": 0.93, "grad_norm": 0.982843265233904, "learning_rate": 1.2079791625253078e-07, "loss": 0.5644, "step": 5784 }, { "epoch": 0.93, "grad_norm": 0.9754653861413627, "learning_rate": 1.2022839537129127e-07, "loss": 0.5387, "step": 5785 }, { "epoch": 0.93, "grad_norm": 1.0522169351760533, "learning_rate": 1.196602038743483e-07, "loss": 0.6152, "step": 5786 }, { "epoch": 0.93, "grad_norm": 0.9751132834068625, "learning_rate": 1.1909334191649302e-07, "loss": 0.5185, "step": 5787 }, { "epoch": 0.93, "grad_norm": 1.0013185249832652, "learning_rate": 1.1852780965215471e-07, "loss": 0.5584, "step": 5788 }, { "epoch": 0.93, "grad_norm": 1.0304719621313692, "learning_rate": 1.1796360723539956e-07, "loss": 0.5886, "step": 5789 }, { "epoch": 0.93, "grad_norm": 0.9788791357776215, "learning_rate": 1.174007348199313e-07, "loss": 0.4906, "step": 5790 }, { "epoch": 0.93, "grad_norm": 1.0450787272003623, "learning_rate": 1.1683919255909337e-07, "loss": 0.5662, "step": 5791 }, { "epoch": 0.93, "grad_norm": 1.0408351561393512, "learning_rate": 1.1627898060586395e-07, "loss": 0.5815, "step": 5792 }, { "epoch": 0.93, "grad_norm": 1.0884220928793564, "learning_rate": 1.1572009911286097e-07, "loss": 0.57, "step": 5793 }, { "epoch": 0.93, "grad_norm": 1.0713184634646309, "learning_rate": 1.151625482323393e-07, "loss": 0.6207, "step": 5794 }, { "epoch": 0.93, "grad_norm": 1.1160283150969414, "learning_rate": 1.1460632811619021e-07, "loss": 0.6513, "step": 5795 }, { "epoch": 0.93, "grad_norm": 0.9962178038413154, "learning_rate": 1.1405143891594417e-07, "loss": 0.6872, "step": 5796 }, { "epoch": 0.93, "grad_norm": 1.042475809805722, "learning_rate": 1.1349788078276857e-07, "loss": 0.5896, "step": 5797 }, { "epoch": 0.93, "grad_norm": 0.9681189102028814, "learning_rate": 1.129456538674667e-07, "loss": 0.5233, "step": 5798 }, { "epoch": 0.93, "grad_norm": 1.0004304131952244, "learning_rate": 1.1239475832048152e-07, "loss": 0.596, "step": 5799 }, { "epoch": 0.93, "grad_norm": 1.082046235907496, "learning_rate": 1.1184519429189245e-07, "loss": 0.6274, "step": 5800 }, { "epoch": 0.93, "grad_norm": 0.9925946848209398, "learning_rate": 1.1129696193141414e-07, "loss": 0.5737, "step": 5801 }, { "epoch": 0.93, "grad_norm": 1.1232155854424806, "learning_rate": 1.107500613884016e-07, "loss": 0.5956, "step": 5802 }, { "epoch": 0.93, "grad_norm": 0.9863741399379653, "learning_rate": 1.1020449281184565e-07, "loss": 0.5304, "step": 5803 }, { "epoch": 0.94, "grad_norm": 1.010276648973911, "learning_rate": 1.0966025635037403e-07, "loss": 0.5608, "step": 5804 }, { "epoch": 0.94, "grad_norm": 0.9572718717986605, "learning_rate": 1.0911735215225095e-07, "loss": 0.5191, "step": 5805 }, { "epoch": 0.94, "grad_norm": 0.9871357958617829, "learning_rate": 1.0857578036537919e-07, "loss": 0.5595, "step": 5806 }, { "epoch": 0.94, "grad_norm": 0.9691552688410355, "learning_rate": 1.0803554113729797e-07, "loss": 0.5508, "step": 5807 }, { "epoch": 0.94, "grad_norm": 0.9244410317940109, "learning_rate": 1.0749663461518344e-07, "loss": 0.4673, "step": 5808 }, { "epoch": 0.94, "grad_norm": 1.0252864380676119, "learning_rate": 1.0695906094584763e-07, "loss": 0.4839, "step": 5809 }, { "epoch": 0.94, "grad_norm": 1.0923606935668153, "learning_rate": 1.0642282027574168e-07, "loss": 0.6282, "step": 5810 }, { "epoch": 0.94, "grad_norm": 0.9959640840085532, "learning_rate": 1.0588791275095156e-07, "loss": 0.5691, "step": 5811 }, { "epoch": 0.94, "grad_norm": 1.0102238333022042, "learning_rate": 1.0535433851720067e-07, "loss": 0.6104, "step": 5812 }, { "epoch": 0.94, "grad_norm": 1.0374208702707612, "learning_rate": 1.0482209771985052e-07, "loss": 0.6268, "step": 5813 }, { "epoch": 0.94, "grad_norm": 1.094712556063163, "learning_rate": 1.0429119050389624e-07, "loss": 0.5892, "step": 5814 }, { "epoch": 0.94, "grad_norm": 1.0082232269653209, "learning_rate": 1.0376161701397325e-07, "loss": 0.5411, "step": 5815 }, { "epoch": 0.94, "grad_norm": 0.9365700852822209, "learning_rate": 1.0323337739435169e-07, "loss": 0.5505, "step": 5816 }, { "epoch": 0.94, "grad_norm": 1.0761601481580647, "learning_rate": 1.0270647178893756e-07, "loss": 0.5205, "step": 5817 }, { "epoch": 0.94, "grad_norm": 0.9556843770183094, "learning_rate": 1.0218090034127603e-07, "loss": 0.5018, "step": 5818 }, { "epoch": 0.94, "grad_norm": 0.7275192326864739, "learning_rate": 1.0165666319454592e-07, "loss": 0.4696, "step": 5819 }, { "epoch": 0.94, "grad_norm": 1.000482895881363, "learning_rate": 1.011337604915641e-07, "loss": 0.4971, "step": 5820 }, { "epoch": 0.94, "grad_norm": 0.9297404839360337, "learning_rate": 1.0061219237478382e-07, "loss": 0.4627, "step": 5821 }, { "epoch": 0.94, "grad_norm": 1.0507514866501615, "learning_rate": 1.0009195898629532e-07, "loss": 0.6638, "step": 5822 }, { "epoch": 0.94, "grad_norm": 0.9545611305913939, "learning_rate": 9.957306046782411e-08, "loss": 0.5016, "step": 5823 }, { "epoch": 0.94, "grad_norm": 1.0432008010995888, "learning_rate": 9.905549696073213e-08, "loss": 0.505, "step": 5824 }, { "epoch": 0.94, "grad_norm": 1.189490967055027, "learning_rate": 9.85392686060177e-08, "loss": 0.6521, "step": 5825 }, { "epoch": 0.94, "grad_norm": 0.9706825595759822, "learning_rate": 9.802437554431665e-08, "loss": 0.5421, "step": 5826 }, { "epoch": 0.94, "grad_norm": 0.9949154007473566, "learning_rate": 9.7510817915899e-08, "loss": 0.6245, "step": 5827 }, { "epoch": 0.94, "grad_norm": 1.0633350709549745, "learning_rate": 9.699859586067229e-08, "loss": 0.5608, "step": 5828 }, { "epoch": 0.94, "grad_norm": 1.165489803115361, "learning_rate": 9.648770951818098e-08, "loss": 0.6725, "step": 5829 }, { "epoch": 0.94, "grad_norm": 0.9568595411791763, "learning_rate": 9.597815902760321e-08, "loss": 0.5796, "step": 5830 }, { "epoch": 0.94, "grad_norm": 1.158010647017492, "learning_rate": 9.54699445277546e-08, "loss": 0.5982, "step": 5831 }, { "epoch": 0.94, "grad_norm": 1.0634975790965278, "learning_rate": 9.496306615708773e-08, "loss": 0.5952, "step": 5832 }, { "epoch": 0.94, "grad_norm": 0.9860782899282723, "learning_rate": 9.44575240536899e-08, "loss": 0.5089, "step": 5833 }, { "epoch": 0.94, "grad_norm": 0.989654658190415, "learning_rate": 9.395331835528431e-08, "loss": 0.5988, "step": 5834 }, { "epoch": 0.94, "grad_norm": 0.9790488626467019, "learning_rate": 9.345044919923108e-08, "loss": 0.5587, "step": 5835 }, { "epoch": 0.94, "grad_norm": 1.1034460187704282, "learning_rate": 9.294891672252448e-08, "loss": 0.5795, "step": 5836 }, { "epoch": 0.94, "grad_norm": 1.0095153520749391, "learning_rate": 9.244872106179748e-08, "loss": 0.5327, "step": 5837 }, { "epoch": 0.94, "grad_norm": 1.0255386318725004, "learning_rate": 9.194986235331604e-08, "loss": 0.6032, "step": 5838 }, { "epoch": 0.94, "grad_norm": 1.0842431932188958, "learning_rate": 9.145234073298314e-08, "loss": 0.5516, "step": 5839 }, { "epoch": 0.94, "grad_norm": 0.9539224188618249, "learning_rate": 9.095615633633814e-08, "loss": 0.469, "step": 5840 }, { "epoch": 0.94, "grad_norm": 1.1255803693516824, "learning_rate": 9.046130929855401e-08, "loss": 0.6669, "step": 5841 }, { "epoch": 0.94, "grad_norm": 1.1008710248296885, "learning_rate": 8.99677997544418e-08, "loss": 0.6232, "step": 5842 }, { "epoch": 0.94, "grad_norm": 1.0536205724554721, "learning_rate": 8.947562783844677e-08, "loss": 0.5702, "step": 5843 }, { "epoch": 0.94, "grad_norm": 1.0785764160775642, "learning_rate": 8.898479368464996e-08, "loss": 0.6063, "step": 5844 }, { "epoch": 0.94, "grad_norm": 1.0971332266116183, "learning_rate": 8.849529742676887e-08, "loss": 0.5751, "step": 5845 }, { "epoch": 0.94, "grad_norm": 1.0409547002377724, "learning_rate": 8.800713919815407e-08, "loss": 0.6088, "step": 5846 }, { "epoch": 0.94, "grad_norm": 0.9776980058593518, "learning_rate": 8.752031913179527e-08, "loss": 0.5463, "step": 5847 }, { "epoch": 0.94, "grad_norm": 0.947627119924481, "learning_rate": 8.70348373603147e-08, "loss": 0.5821, "step": 5848 }, { "epoch": 0.94, "grad_norm": 1.0558757987215825, "learning_rate": 8.655069401597105e-08, "loss": 0.5534, "step": 5849 }, { "epoch": 0.94, "grad_norm": 0.97947724745948, "learning_rate": 8.606788923065824e-08, "loss": 0.5325, "step": 5850 }, { "epoch": 0.94, "grad_norm": 1.01175414455452, "learning_rate": 8.558642313590603e-08, "loss": 0.5486, "step": 5851 }, { "epoch": 0.94, "grad_norm": 1.0468003526242884, "learning_rate": 8.510629586287844e-08, "loss": 0.5141, "step": 5852 }, { "epoch": 0.94, "grad_norm": 0.9846697701846534, "learning_rate": 8.462750754237581e-08, "loss": 0.5417, "step": 5853 }, { "epoch": 0.94, "grad_norm": 0.9896046682575587, "learning_rate": 8.415005830483324e-08, "loss": 0.5053, "step": 5854 }, { "epoch": 0.94, "grad_norm": 1.0539101287136707, "learning_rate": 8.367394828032116e-08, "loss": 0.619, "step": 5855 }, { "epoch": 0.94, "grad_norm": 0.9329023943333974, "learning_rate": 8.319917759854413e-08, "loss": 0.5034, "step": 5856 }, { "epoch": 0.94, "grad_norm": 1.0244137264455406, "learning_rate": 8.272574638884423e-08, "loss": 0.6315, "step": 5857 }, { "epoch": 0.94, "grad_norm": 1.0128514210422985, "learning_rate": 8.225365478019664e-08, "loss": 0.617, "step": 5858 }, { "epoch": 0.94, "grad_norm": 0.911444459882442, "learning_rate": 8.178290290121127e-08, "loss": 0.5044, "step": 5859 }, { "epoch": 0.94, "grad_norm": 1.0068624399303743, "learning_rate": 8.131349088013495e-08, "loss": 0.6689, "step": 5860 }, { "epoch": 0.94, "grad_norm": 1.2237416737767142, "learning_rate": 8.08454188448482e-08, "loss": 0.6577, "step": 5861 }, { "epoch": 0.94, "grad_norm": 1.104144846841128, "learning_rate": 8.037868692286677e-08, "loss": 0.5315, "step": 5862 }, { "epoch": 0.94, "grad_norm": 1.1301577793979476, "learning_rate": 7.991329524134062e-08, "loss": 0.5844, "step": 5863 }, { "epoch": 0.94, "grad_norm": 1.0017748082345865, "learning_rate": 7.944924392705666e-08, "loss": 0.5658, "step": 5864 }, { "epoch": 0.94, "grad_norm": 1.0181163207448154, "learning_rate": 7.898653310643378e-08, "loss": 0.5521, "step": 5865 }, { "epoch": 0.95, "grad_norm": 1.1151265860275503, "learning_rate": 7.852516290552781e-08, "loss": 0.5873, "step": 5866 }, { "epoch": 0.95, "grad_norm": 0.9228355974580896, "learning_rate": 7.80651334500293e-08, "loss": 0.5088, "step": 5867 }, { "epoch": 0.95, "grad_norm": 0.9449438838911601, "learning_rate": 7.760644486526137e-08, "loss": 0.4749, "step": 5868 }, { "epoch": 0.95, "grad_norm": 1.1008864766550193, "learning_rate": 7.714909727618569e-08, "loss": 0.6327, "step": 5869 }, { "epoch": 0.95, "grad_norm": 0.9455451802672485, "learning_rate": 7.669309080739429e-08, "loss": 0.581, "step": 5870 }, { "epoch": 0.95, "grad_norm": 0.9312526334078194, "learning_rate": 7.623842558311668e-08, "loss": 0.5786, "step": 5871 }, { "epoch": 0.95, "grad_norm": 1.0410155610198526, "learning_rate": 7.578510172721598e-08, "loss": 0.5632, "step": 5872 }, { "epoch": 0.95, "grad_norm": 0.979888148188077, "learning_rate": 7.533311936319121e-08, "loss": 0.5616, "step": 5873 }, { "epoch": 0.95, "grad_norm": 1.0364545794796396, "learning_rate": 7.48824786141733e-08, "loss": 0.5908, "step": 5874 }, { "epoch": 0.95, "grad_norm": 1.0378036120632044, "learning_rate": 7.443317960293018e-08, "loss": 0.5411, "step": 5875 }, { "epoch": 0.95, "grad_norm": 1.0180290679657185, "learning_rate": 7.39852224518628e-08, "loss": 0.5724, "step": 5876 }, { "epoch": 0.95, "grad_norm": 1.0102988192271567, "learning_rate": 7.353860728300743e-08, "loss": 0.5622, "step": 5877 }, { "epoch": 0.95, "grad_norm": 1.0540422263947613, "learning_rate": 7.3093334218034e-08, "loss": 0.5991, "step": 5878 }, { "epoch": 0.95, "grad_norm": 1.0745185538079773, "learning_rate": 7.264940337824767e-08, "loss": 0.6116, "step": 5879 }, { "epoch": 0.95, "grad_norm": 0.9289926911556593, "learning_rate": 7.22068148845867e-08, "loss": 0.5246, "step": 5880 }, { "epoch": 0.95, "grad_norm": 1.0698376288189828, "learning_rate": 7.176556885762465e-08, "loss": 0.6335, "step": 5881 }, { "epoch": 0.95, "grad_norm": 1.0860983073356667, "learning_rate": 7.132566541756925e-08, "loss": 0.6587, "step": 5882 }, { "epoch": 0.95, "grad_norm": 1.0037205561518752, "learning_rate": 7.088710468426241e-08, "loss": 0.6316, "step": 5883 }, { "epoch": 0.95, "grad_norm": 0.9654361445621602, "learning_rate": 7.044988677718023e-08, "loss": 0.4967, "step": 5884 }, { "epoch": 0.95, "grad_norm": 1.0913921664766342, "learning_rate": 7.001401181543243e-08, "loss": 0.6184, "step": 5885 }, { "epoch": 0.95, "grad_norm": 1.04391949061577, "learning_rate": 6.957947991776403e-08, "loss": 0.592, "step": 5886 }, { "epoch": 0.95, "grad_norm": 1.0419761614941379, "learning_rate": 6.914629120255312e-08, "loss": 0.5841, "step": 5887 }, { "epoch": 0.95, "grad_norm": 1.083068346517522, "learning_rate": 6.87144457878114e-08, "loss": 0.6112, "step": 5888 }, { "epoch": 0.95, "grad_norm": 1.0691754992437996, "learning_rate": 6.828394379118752e-08, "loss": 0.6316, "step": 5889 }, { "epoch": 0.95, "grad_norm": 0.988836853077234, "learning_rate": 6.785478532995993e-08, "loss": 0.5444, "step": 5890 }, { "epoch": 0.95, "grad_norm": 1.0109803459973528, "learning_rate": 6.742697052104507e-08, "loss": 0.5392, "step": 5891 }, { "epoch": 0.95, "grad_norm": 1.069822207359979, "learning_rate": 6.70004994809903e-08, "loss": 0.6435, "step": 5892 }, { "epoch": 0.95, "grad_norm": 1.0340221010661947, "learning_rate": 6.657537232597766e-08, "loss": 0.5679, "step": 5893 }, { "epoch": 0.95, "grad_norm": 1.0627810358568945, "learning_rate": 6.615158917182507e-08, "loss": 0.5926, "step": 5894 }, { "epoch": 0.95, "grad_norm": 0.994793382533743, "learning_rate": 6.572915013398184e-08, "loss": 0.6034, "step": 5895 }, { "epoch": 0.95, "grad_norm": 0.995469060372368, "learning_rate": 6.530805532753204e-08, "loss": 0.5311, "step": 5896 }, { "epoch": 0.95, "grad_norm": 1.0215230361837382, "learning_rate": 6.488830486719333e-08, "loss": 0.5905, "step": 5897 }, { "epoch": 0.95, "grad_norm": 1.033712323412795, "learning_rate": 6.446989886731758e-08, "loss": 0.587, "step": 5898 }, { "epoch": 0.95, "grad_norm": 1.0223376808357088, "learning_rate": 6.405283744189027e-08, "loss": 0.5936, "step": 5899 }, { "epoch": 0.95, "grad_norm": 1.00281986016423, "learning_rate": 6.363712070452999e-08, "loss": 0.5675, "step": 5900 }, { "epoch": 0.95, "grad_norm": 0.9823714504948695, "learning_rate": 6.322274876848944e-08, "loss": 0.5739, "step": 5901 }, { "epoch": 0.95, "grad_norm": 0.9993089425030285, "learning_rate": 6.280972174665611e-08, "loss": 0.5891, "step": 5902 }, { "epoch": 0.95, "grad_norm": 1.1230968393242087, "learning_rate": 6.239803975154835e-08, "loss": 0.5563, "step": 5903 }, { "epoch": 0.95, "grad_norm": 0.9949818208983318, "learning_rate": 6.19877028953203e-08, "loss": 0.534, "step": 5904 }, { "epoch": 0.95, "grad_norm": 1.0319019341731246, "learning_rate": 6.157871128975923e-08, "loss": 0.5755, "step": 5905 }, { "epoch": 0.95, "grad_norm": 1.0068243931849128, "learning_rate": 6.1171065046286e-08, "loss": 0.6079, "step": 5906 }, { "epoch": 0.95, "grad_norm": 1.018423274934718, "learning_rate": 6.076476427595345e-08, "loss": 0.5996, "step": 5907 }, { "epoch": 0.95, "grad_norm": 1.0262417943226345, "learning_rate": 6.035980908945027e-08, "loss": 0.6006, "step": 5908 }, { "epoch": 0.95, "grad_norm": 1.0570430283449896, "learning_rate": 5.995619959709764e-08, "loss": 0.5896, "step": 5909 }, { "epoch": 0.95, "grad_norm": 1.0761374532356365, "learning_rate": 5.9553935908848724e-08, "loss": 0.5642, "step": 5910 }, { "epoch": 0.95, "grad_norm": 1.122054330656811, "learning_rate": 5.915301813429197e-08, "loss": 0.5387, "step": 5911 }, { "epoch": 0.95, "grad_norm": 1.042170259835966, "learning_rate": 5.875344638264835e-08, "loss": 0.6282, "step": 5912 }, { "epoch": 0.95, "grad_norm": 1.0077494408115886, "learning_rate": 5.835522076277189e-08, "loss": 0.5424, "step": 5913 }, { "epoch": 0.95, "grad_norm": 1.0230562817392699, "learning_rate": 5.795834138315137e-08, "loss": 0.603, "step": 5914 }, { "epoch": 0.95, "grad_norm": 1.0696472628994662, "learning_rate": 5.756280835190586e-08, "loss": 0.6047, "step": 5915 }, { "epoch": 0.95, "grad_norm": 1.0999515788597323, "learning_rate": 5.7168621776790836e-08, "loss": 0.5857, "step": 5916 }, { "epoch": 0.95, "grad_norm": 0.6678274730734242, "learning_rate": 5.6775781765193714e-08, "loss": 0.4339, "step": 5917 }, { "epoch": 0.95, "grad_norm": 0.9288694672933757, "learning_rate": 5.638428842413335e-08, "loss": 0.5441, "step": 5918 }, { "epoch": 0.95, "grad_norm": 0.9546129438615574, "learning_rate": 5.599414186026497e-08, "loss": 0.6183, "step": 5919 }, { "epoch": 0.95, "grad_norm": 1.1239097448911732, "learning_rate": 5.5605342179874676e-08, "loss": 0.6101, "step": 5920 }, { "epoch": 0.95, "grad_norm": 1.0612734427377728, "learning_rate": 5.5217889488882734e-08, "loss": 0.6476, "step": 5921 }, { "epoch": 0.95, "grad_norm": 1.0986691469737098, "learning_rate": 5.4831783892840275e-08, "loss": 0.535, "step": 5922 }, { "epoch": 0.95, "grad_norm": 1.0486178644805468, "learning_rate": 5.444702549693481e-08, "loss": 0.5622, "step": 5923 }, { "epoch": 0.95, "grad_norm": 1.0515931427585945, "learning_rate": 5.406361440598529e-08, "loss": 0.5598, "step": 5924 }, { "epoch": 0.95, "grad_norm": 1.0528764721211823, "learning_rate": 5.368155072444148e-08, "loss": 0.525, "step": 5925 }, { "epoch": 0.95, "grad_norm": 1.2108866243703074, "learning_rate": 5.330083455638957e-08, "loss": 0.5574, "step": 5926 }, { "epoch": 0.95, "grad_norm": 0.9822537781329186, "learning_rate": 5.2921466005547706e-08, "loss": 0.5721, "step": 5927 }, { "epoch": 0.96, "grad_norm": 1.1114821790273113, "learning_rate": 5.2543445175264883e-08, "loss": 0.5788, "step": 5928 }, { "epoch": 0.96, "grad_norm": 1.0349070822170474, "learning_rate": 5.216677216852539e-08, "loss": 0.5853, "step": 5929 }, { "epoch": 0.96, "grad_norm": 1.0309374259114716, "learning_rate": 5.179144708794437e-08, "loss": 0.5539, "step": 5930 }, { "epoch": 0.96, "grad_norm": 1.0121527434692474, "learning_rate": 5.141747003577224e-08, "loss": 0.5274, "step": 5931 }, { "epoch": 0.96, "grad_norm": 1.036241680769594, "learning_rate": 5.104484111388919e-08, "loss": 0.6459, "step": 5932 }, { "epoch": 0.96, "grad_norm": 1.0473748186048766, "learning_rate": 5.067356042381011e-08, "loss": 0.5392, "step": 5933 }, { "epoch": 0.96, "grad_norm": 1.0136360800078754, "learning_rate": 5.0303628066681874e-08, "loss": 0.522, "step": 5934 }, { "epoch": 0.96, "grad_norm": 1.012518778975016, "learning_rate": 4.9935044143284984e-08, "loss": 0.5662, "step": 5935 }, { "epoch": 0.96, "grad_norm": 0.9789977408359223, "learning_rate": 4.956780875403189e-08, "loss": 0.5089, "step": 5936 }, { "epoch": 0.96, "grad_norm": 1.0064219777351655, "learning_rate": 4.920192199896645e-08, "loss": 0.5407, "step": 5937 }, { "epoch": 0.96, "grad_norm": 1.0533164127089127, "learning_rate": 4.883738397776727e-08, "loss": 0.6073, "step": 5938 }, { "epoch": 0.96, "grad_norm": 1.0516117085533667, "learning_rate": 4.84741947897438e-08, "loss": 0.5379, "step": 5939 }, { "epoch": 0.96, "grad_norm": 0.9722539234449139, "learning_rate": 4.8112354533839664e-08, "loss": 0.559, "step": 5940 }, { "epoch": 0.96, "grad_norm": 1.0359129173947965, "learning_rate": 4.775186330862991e-08, "loss": 0.6012, "step": 5941 }, { "epoch": 0.96, "grad_norm": 1.1427769649015262, "learning_rate": 4.7392721212322076e-08, "loss": 0.6105, "step": 5942 }, { "epoch": 0.96, "grad_norm": 0.9837993358718313, "learning_rate": 4.703492834275625e-08, "loss": 0.5505, "step": 5943 }, { "epoch": 0.96, "grad_norm": 1.0015468148964117, "learning_rate": 4.6678484797405e-08, "loss": 0.5739, "step": 5944 }, { "epoch": 0.96, "grad_norm": 1.0974162753954928, "learning_rate": 4.6323390673373434e-08, "loss": 0.7025, "step": 5945 }, { "epoch": 0.96, "grad_norm": 0.9968159389320547, "learning_rate": 4.5969646067400285e-08, "loss": 0.5641, "step": 5946 }, { "epoch": 0.96, "grad_norm": 1.035479829165455, "learning_rate": 4.561725107585346e-08, "loss": 0.5722, "step": 5947 }, { "epoch": 0.96, "grad_norm": 1.0281188604673126, "learning_rate": 4.5266205794735617e-08, "loss": 0.6042, "step": 5948 }, { "epoch": 0.96, "grad_norm": 1.013322386425791, "learning_rate": 4.491651031968136e-08, "loss": 0.6053, "step": 5949 }, { "epoch": 0.96, "grad_norm": 0.6667967103538348, "learning_rate": 4.456816474595782e-08, "loss": 0.4382, "step": 5950 }, { "epoch": 0.96, "grad_norm": 1.0123668350722073, "learning_rate": 4.4221169168462975e-08, "loss": 0.4831, "step": 5951 }, { "epoch": 0.96, "grad_norm": 1.102265518270378, "learning_rate": 4.387552368172898e-08, "loss": 0.5949, "step": 5952 }, { "epoch": 0.96, "grad_norm": 1.1509554567397842, "learning_rate": 4.35312283799183e-08, "loss": 0.6086, "step": 5953 }, { "epoch": 0.96, "grad_norm": 1.015887155827983, "learning_rate": 4.318828335682701e-08, "loss": 0.5947, "step": 5954 }, { "epoch": 0.96, "grad_norm": 0.9909227706323633, "learning_rate": 4.28466887058826e-08, "loss": 0.5841, "step": 5955 }, { "epoch": 0.96, "grad_norm": 0.9510704396811385, "learning_rate": 4.250644452014507e-08, "loss": 0.5954, "step": 5956 }, { "epoch": 0.96, "grad_norm": 0.9436507208142618, "learning_rate": 4.216755089230584e-08, "loss": 0.5294, "step": 5957 }, { "epoch": 0.96, "grad_norm": 0.9636949924797883, "learning_rate": 4.183000791468994e-08, "loss": 0.6215, "step": 5958 }, { "epoch": 0.96, "grad_norm": 0.9996279796476775, "learning_rate": 4.149381567925215e-08, "loss": 0.5767, "step": 5959 }, { "epoch": 0.96, "grad_norm": 1.0244527099554446, "learning_rate": 4.1158974277580866e-08, "loss": 0.591, "step": 5960 }, { "epoch": 0.96, "grad_norm": 1.0087529619898716, "learning_rate": 4.0825483800895905e-08, "loss": 0.5452, "step": 5961 }, { "epoch": 0.96, "grad_norm": 1.0517600076489957, "learning_rate": 4.04933443400507e-08, "loss": 0.5124, "step": 5962 }, { "epoch": 0.96, "grad_norm": 1.0195473040672522, "learning_rate": 4.0162555985526766e-08, "loss": 0.532, "step": 5963 }, { "epoch": 0.96, "grad_norm": 0.9887309734278957, "learning_rate": 3.9833118827442565e-08, "loss": 0.55, "step": 5964 }, { "epoch": 0.96, "grad_norm": 1.0391874702430555, "learning_rate": 3.950503295554409e-08, "loss": 0.5959, "step": 5965 }, { "epoch": 0.96, "grad_norm": 0.6342514427984829, "learning_rate": 3.917829845921095e-08, "loss": 0.4595, "step": 5966 }, { "epoch": 0.96, "grad_norm": 0.984273316744748, "learning_rate": 3.885291542745584e-08, "loss": 0.4875, "step": 5967 }, { "epoch": 0.96, "grad_norm": 0.939514839659328, "learning_rate": 3.8528883948921183e-08, "loss": 0.5456, "step": 5968 }, { "epoch": 0.96, "grad_norm": 1.015766670228296, "learning_rate": 3.8206204111882475e-08, "loss": 0.5938, "step": 5969 }, { "epoch": 0.96, "grad_norm": 0.6580458998161236, "learning_rate": 3.788487600424606e-08, "loss": 0.4838, "step": 5970 }, { "epoch": 0.96, "grad_norm": 1.149977093564522, "learning_rate": 3.7564899713550815e-08, "loss": 0.6022, "step": 5971 }, { "epoch": 0.96, "grad_norm": 0.9457986939808961, "learning_rate": 3.724627532696812e-08, "loss": 0.4883, "step": 5972 }, { "epoch": 0.96, "grad_norm": 1.0280661459406126, "learning_rate": 3.6929002931297975e-08, "loss": 0.5579, "step": 5973 }, { "epoch": 0.96, "grad_norm": 1.0247971418262811, "learning_rate": 3.661308261297625e-08, "loss": 0.5172, "step": 5974 }, { "epoch": 0.96, "grad_norm": 1.0717537955333103, "learning_rate": 3.629851445806687e-08, "loss": 0.6657, "step": 5975 }, { "epoch": 0.96, "grad_norm": 1.1129269698927888, "learning_rate": 3.59852985522674e-08, "loss": 0.5521, "step": 5976 }, { "epoch": 0.96, "grad_norm": 1.049013127165195, "learning_rate": 3.5673434980906806e-08, "loss": 0.6346, "step": 5977 }, { "epoch": 0.96, "grad_norm": 1.1147235334605654, "learning_rate": 3.536292382894435e-08, "loss": 0.6262, "step": 5978 }, { "epoch": 0.96, "grad_norm": 0.649763230737416, "learning_rate": 3.505376518097292e-08, "loss": 0.4652, "step": 5979 }, { "epoch": 0.96, "grad_norm": 1.028686180399358, "learning_rate": 3.474595912121514e-08, "loss": 0.5233, "step": 5980 }, { "epoch": 0.96, "grad_norm": 0.6687192185715938, "learning_rate": 3.443950573352672e-08, "loss": 0.4527, "step": 5981 }, { "epoch": 0.96, "grad_norm": 0.9934755332403051, "learning_rate": 3.41344051013931e-08, "loss": 0.568, "step": 5982 }, { "epoch": 0.96, "grad_norm": 1.047055680393411, "learning_rate": 3.3830657307932224e-08, "loss": 0.5356, "step": 5983 }, { "epoch": 0.96, "grad_norm": 1.0645050359885233, "learning_rate": 3.352826243589346e-08, "loss": 0.6017, "step": 5984 }, { "epoch": 0.96, "grad_norm": 0.9829859783603944, "learning_rate": 3.322722056765759e-08, "loss": 0.5861, "step": 5985 }, { "epoch": 0.96, "grad_norm": 0.933033284978129, "learning_rate": 3.292753178523733e-08, "loss": 0.4298, "step": 5986 }, { "epoch": 0.96, "grad_norm": 0.990088279543204, "learning_rate": 3.262919617027516e-08, "loss": 0.486, "step": 5987 }, { "epoch": 0.96, "grad_norm": 1.1023272300908387, "learning_rate": 3.233221380404605e-08, "loss": 0.6508, "step": 5988 }, { "epoch": 0.96, "grad_norm": 1.0570378646831333, "learning_rate": 3.2036584767456965e-08, "loss": 0.6037, "step": 5989 }, { "epoch": 0.97, "grad_norm": 1.1745892198345733, "learning_rate": 3.1742309141044594e-08, "loss": 0.6475, "step": 5990 }, { "epoch": 0.97, "grad_norm": 1.010077456937398, "learning_rate": 3.1449387004978125e-08, "loss": 0.5963, "step": 5991 }, { "epoch": 0.97, "grad_norm": 1.0686158008622504, "learning_rate": 3.115781843905763e-08, "loss": 0.6162, "step": 5992 }, { "epoch": 0.97, "grad_norm": 1.0950986324380942, "learning_rate": 3.086760352271401e-08, "loss": 0.6065, "step": 5993 }, { "epoch": 0.97, "grad_norm": 0.9944661068018521, "learning_rate": 3.057874233501068e-08, "loss": 0.5392, "step": 5994 }, { "epoch": 0.97, "grad_norm": 1.0005636005788723, "learning_rate": 3.0291234954640256e-08, "loss": 0.5621, "step": 5995 }, { "epoch": 0.97, "grad_norm": 1.0380618235767174, "learning_rate": 3.0005081459928976e-08, "loss": 0.5696, "step": 5996 }, { "epoch": 0.97, "grad_norm": 0.9716246179147302, "learning_rate": 2.9720281928831694e-08, "loss": 0.4748, "step": 5997 }, { "epoch": 0.97, "grad_norm": 1.1186111012642908, "learning_rate": 2.9436836438936356e-08, "loss": 0.5986, "step": 5998 }, { "epoch": 0.97, "grad_norm": 1.0699754116151239, "learning_rate": 2.9154745067460632e-08, "loss": 0.5617, "step": 5999 }, { "epoch": 0.97, "grad_norm": 1.0359113211246755, "learning_rate": 2.8874007891255273e-08, "loss": 0.645, "step": 6000 }, { "epoch": 0.97, "grad_norm": 0.9880499982328612, "learning_rate": 2.859462498679966e-08, "loss": 0.6027, "step": 6001 }, { "epoch": 0.97, "grad_norm": 1.0288891848585462, "learning_rate": 2.831659643020568e-08, "loss": 0.6045, "step": 6002 }, { "epoch": 0.97, "grad_norm": 1.1018752004272745, "learning_rate": 2.8039922297216638e-08, "loss": 0.5773, "step": 6003 }, { "epoch": 0.97, "grad_norm": 1.0683884670752244, "learning_rate": 2.776460266320502e-08, "loss": 0.6113, "step": 6004 }, { "epoch": 0.97, "grad_norm": 1.0517028291846477, "learning_rate": 2.7490637603176385e-08, "loss": 0.5133, "step": 6005 }, { "epoch": 0.97, "grad_norm": 1.0005565705303059, "learning_rate": 2.7218027191766027e-08, "loss": 0.5172, "step": 6006 }, { "epoch": 0.97, "grad_norm": 1.0184607254375562, "learning_rate": 2.6946771503240653e-08, "loss": 0.5846, "step": 6007 }, { "epoch": 0.97, "grad_norm": 1.0109404072443888, "learning_rate": 2.6676870611497817e-08, "loss": 0.5254, "step": 6008 }, { "epoch": 0.97, "grad_norm": 1.0353208929479085, "learning_rate": 2.6408324590065926e-08, "loss": 0.656, "step": 6009 }, { "epoch": 0.97, "grad_norm": 1.0101639011165042, "learning_rate": 2.6141133512103677e-08, "loss": 0.5457, "step": 6010 }, { "epoch": 0.97, "grad_norm": 1.0448329217035721, "learning_rate": 2.5875297450402848e-08, "loss": 0.6158, "step": 6011 }, { "epoch": 0.97, "grad_norm": 0.9986518312080574, "learning_rate": 2.5610816477382728e-08, "loss": 0.6002, "step": 6012 }, { "epoch": 0.97, "grad_norm": 1.0393833225269538, "learning_rate": 2.5347690665096236e-08, "loss": 0.555, "step": 6013 }, { "epoch": 0.97, "grad_norm": 1.0739016027471615, "learning_rate": 2.5085920085226035e-08, "loss": 0.529, "step": 6014 }, { "epoch": 0.97, "grad_norm": 1.1066436119120648, "learning_rate": 2.482550480908563e-08, "loss": 0.6016, "step": 6015 }, { "epoch": 0.97, "grad_norm": 1.1197941456947438, "learning_rate": 2.456644490761939e-08, "loss": 0.5893, "step": 6016 }, { "epoch": 0.97, "grad_norm": 0.9869868324157809, "learning_rate": 2.4308740451401413e-08, "loss": 0.5592, "step": 6017 }, { "epoch": 0.97, "grad_norm": 1.1033168990058149, "learning_rate": 2.4052391510638873e-08, "loss": 0.6427, "step": 6018 }, { "epoch": 0.97, "grad_norm": 1.022031404359787, "learning_rate": 2.379739815516757e-08, "loss": 0.5448, "step": 6019 }, { "epoch": 0.97, "grad_norm": 0.9654395401859253, "learning_rate": 2.354376045445472e-08, "loss": 0.5427, "step": 6020 }, { "epoch": 0.97, "grad_norm": 0.9696483833118121, "learning_rate": 2.3291478477598383e-08, "loss": 0.5388, "step": 6021 }, { "epoch": 0.97, "grad_norm": 1.042697620614537, "learning_rate": 2.304055229332691e-08, "loss": 0.5265, "step": 6022 }, { "epoch": 0.97, "grad_norm": 1.0403060616026418, "learning_rate": 2.279098197000007e-08, "loss": 0.5569, "step": 6023 }, { "epoch": 0.97, "grad_norm": 1.0929912311388212, "learning_rate": 2.254276757560736e-08, "loss": 0.5695, "step": 6024 }, { "epoch": 0.97, "grad_norm": 0.9993848019720345, "learning_rate": 2.2295909177769138e-08, "loss": 0.6064, "step": 6025 }, { "epoch": 0.97, "grad_norm": 1.0506677649019072, "learning_rate": 2.205040684373605e-08, "loss": 0.5429, "step": 6026 }, { "epoch": 0.97, "grad_norm": 1.0384675906242966, "learning_rate": 2.1806260640390155e-08, "loss": 0.6062, "step": 6027 }, { "epoch": 0.97, "grad_norm": 0.9247310579188803, "learning_rate": 2.15634706342438e-08, "loss": 0.4775, "step": 6028 }, { "epoch": 0.97, "grad_norm": 1.0665385886185803, "learning_rate": 2.1322036891439634e-08, "loss": 0.6205, "step": 6029 }, { "epoch": 0.97, "grad_norm": 1.1784458536650553, "learning_rate": 2.1081959477750604e-08, "loss": 0.6951, "step": 6030 }, { "epoch": 0.97, "grad_norm": 1.0927106332328886, "learning_rate": 2.0843238458580494e-08, "loss": 0.6427, "step": 6031 }, { "epoch": 0.97, "grad_norm": 1.03369561541982, "learning_rate": 2.060587389896285e-08, "loss": 0.5974, "step": 6032 }, { "epoch": 0.97, "grad_norm": 1.0992525353131506, "learning_rate": 2.0369865863563708e-08, "loss": 0.6062, "step": 6033 }, { "epoch": 0.97, "grad_norm": 1.0417870840008874, "learning_rate": 2.0135214416677205e-08, "loss": 0.5374, "step": 6034 }, { "epoch": 0.97, "grad_norm": 1.0321039099791618, "learning_rate": 1.990191962222887e-08, "loss": 0.6301, "step": 6035 }, { "epoch": 0.97, "grad_norm": 1.0386259060362732, "learning_rate": 1.9669981543775085e-08, "loss": 0.5964, "step": 6036 }, { "epoch": 0.97, "grad_norm": 1.0568892260790435, "learning_rate": 1.943940024450197e-08, "loss": 0.6277, "step": 6037 }, { "epoch": 0.97, "grad_norm": 1.060713907273282, "learning_rate": 1.9210175787226503e-08, "loss": 0.5961, "step": 6038 }, { "epoch": 0.97, "grad_norm": 0.9682470881197897, "learning_rate": 1.898230823439484e-08, "loss": 0.4966, "step": 6039 }, { "epoch": 0.97, "grad_norm": 1.060418836250532, "learning_rate": 1.8755797648085105e-08, "loss": 0.5889, "step": 6040 }, { "epoch": 0.97, "grad_norm": 1.0359832530880622, "learning_rate": 1.8530644090005156e-08, "loss": 0.6253, "step": 6041 }, { "epoch": 0.97, "grad_norm": 1.0308354518405056, "learning_rate": 1.8306847621492042e-08, "loss": 0.5519, "step": 6042 }, { "epoch": 0.97, "grad_norm": 0.9826030975887552, "learning_rate": 1.808440830351532e-08, "loss": 0.5388, "step": 6043 }, { "epoch": 0.97, "grad_norm": 1.0209681616769581, "learning_rate": 1.7863326196673193e-08, "loss": 0.5491, "step": 6044 }, { "epoch": 0.97, "grad_norm": 1.0161220416178605, "learning_rate": 1.764360136119414e-08, "loss": 0.5689, "step": 6045 }, { "epoch": 0.97, "grad_norm": 1.0171699496113011, "learning_rate": 1.742523385693806e-08, "loss": 0.5552, "step": 6046 }, { "epoch": 0.97, "grad_norm": 1.0068480346224993, "learning_rate": 1.7208223743392927e-08, "loss": 0.4944, "step": 6047 }, { "epoch": 0.97, "grad_norm": 0.9376974781085439, "learning_rate": 1.6992571079679775e-08, "loss": 0.4934, "step": 6048 }, { "epoch": 0.97, "grad_norm": 1.122715469435472, "learning_rate": 1.6778275924547726e-08, "loss": 0.5731, "step": 6049 }, { "epoch": 0.97, "grad_norm": 1.1345736771873207, "learning_rate": 1.656533833637619e-08, "loss": 0.6813, "step": 6050 }, { "epoch": 0.97, "grad_norm": 0.9876240670428966, "learning_rate": 1.6353758373175986e-08, "loss": 0.6256, "step": 6051 }, { "epoch": 0.98, "grad_norm": 1.0243227261975625, "learning_rate": 1.6143536092586564e-08, "loss": 0.6075, "step": 6052 }, { "epoch": 0.98, "grad_norm": 1.0491101871978217, "learning_rate": 1.593467155187933e-08, "loss": 0.6481, "step": 6053 }, { "epoch": 0.98, "grad_norm": 1.0048984113471824, "learning_rate": 1.5727164807953777e-08, "loss": 0.4735, "step": 6054 }, { "epoch": 0.98, "grad_norm": 0.9904350508901384, "learning_rate": 1.5521015917340787e-08, "loss": 0.5252, "step": 6055 }, { "epoch": 0.98, "grad_norm": 1.0773577469501434, "learning_rate": 1.5316224936200997e-08, "loss": 0.5856, "step": 6056 }, { "epoch": 0.98, "grad_norm": 1.076435305942875, "learning_rate": 1.5112791920325332e-08, "loss": 0.5555, "step": 6057 }, { "epoch": 0.98, "grad_norm": 1.055061134867831, "learning_rate": 1.491071692513446e-08, "loss": 0.5866, "step": 6058 }, { "epoch": 0.98, "grad_norm": 1.0378648709294953, "learning_rate": 1.4710000005678792e-08, "loss": 0.5823, "step": 6059 }, { "epoch": 0.98, "grad_norm": 1.0489727628892882, "learning_rate": 1.4510641216639587e-08, "loss": 0.4813, "step": 6060 }, { "epoch": 0.98, "grad_norm": 1.0058389814126618, "learning_rate": 1.4312640612327289e-08, "loss": 0.6395, "step": 6061 }, { "epoch": 0.98, "grad_norm": 0.9796919194829005, "learning_rate": 1.4115998246683193e-08, "loss": 0.5348, "step": 6062 }, { "epoch": 0.98, "grad_norm": 1.07488176315803, "learning_rate": 1.3920714173278338e-08, "loss": 0.6353, "step": 6063 }, { "epoch": 0.98, "grad_norm": 0.9738456468264551, "learning_rate": 1.372678844531239e-08, "loss": 0.4619, "step": 6064 }, { "epoch": 0.98, "grad_norm": 0.9940628420085107, "learning_rate": 1.3534221115616975e-08, "loss": 0.5384, "step": 6065 }, { "epoch": 0.98, "grad_norm": 1.0138613082662014, "learning_rate": 1.3343012236652353e-08, "loss": 0.5462, "step": 6066 }, { "epoch": 0.98, "grad_norm": 1.1017571957182446, "learning_rate": 1.3153161860509078e-08, "loss": 0.6238, "step": 6067 }, { "epoch": 0.98, "grad_norm": 1.0066358316676416, "learning_rate": 1.2964670038908e-08, "loss": 0.4811, "step": 6068 }, { "epoch": 0.98, "grad_norm": 0.9908461624636431, "learning_rate": 1.2777536823199155e-08, "loss": 0.4938, "step": 6069 }, { "epoch": 0.98, "grad_norm": 0.9938861098008926, "learning_rate": 1.2591762264362872e-08, "loss": 0.5671, "step": 6070 }, { "epoch": 0.98, "grad_norm": 1.117231795371279, "learning_rate": 1.2407346413009224e-08, "loss": 0.5714, "step": 6071 }, { "epoch": 0.98, "grad_norm": 1.0596293916296122, "learning_rate": 1.2224289319378025e-08, "loss": 0.5421, "step": 6072 }, { "epoch": 0.98, "grad_norm": 1.027496352421042, "learning_rate": 1.204259103333938e-08, "loss": 0.6068, "step": 6073 }, { "epoch": 0.98, "grad_norm": 0.9735783994586902, "learning_rate": 1.1862251604393138e-08, "loss": 0.5381, "step": 6074 }, { "epoch": 0.98, "grad_norm": 0.9652057351408223, "learning_rate": 1.1683271081668334e-08, "loss": 0.5038, "step": 6075 }, { "epoch": 0.98, "grad_norm": 1.0287664508355712, "learning_rate": 1.1505649513923744e-08, "loss": 0.5579, "step": 6076 }, { "epoch": 0.98, "grad_norm": 1.1254638553424168, "learning_rate": 1.1329386949548993e-08, "loss": 0.5673, "step": 6077 }, { "epoch": 0.98, "grad_norm": 1.0621023278864372, "learning_rate": 1.115448343656289e-08, "loss": 0.5937, "step": 6078 }, { "epoch": 0.98, "grad_norm": 0.9439501980536436, "learning_rate": 1.098093902261399e-08, "loss": 0.5669, "step": 6079 }, { "epoch": 0.98, "grad_norm": 1.022412626013796, "learning_rate": 1.0808753754980029e-08, "loss": 0.5074, "step": 6080 }, { "epoch": 0.98, "grad_norm": 0.9851438894304576, "learning_rate": 1.0637927680570149e-08, "loss": 0.5328, "step": 6081 }, { "epoch": 0.98, "grad_norm": 1.0571939724898407, "learning_rate": 1.0468460845921014e-08, "loss": 0.5309, "step": 6082 }, { "epoch": 0.98, "grad_norm": 1.0068854287604425, "learning_rate": 1.0300353297200139e-08, "loss": 0.5763, "step": 6083 }, { "epoch": 0.98, "grad_norm": 1.0573620959013172, "learning_rate": 1.0133605080204779e-08, "loss": 0.6127, "step": 6084 }, { "epoch": 0.98, "grad_norm": 1.1122790903781885, "learning_rate": 9.968216240361927e-09, "loss": 0.6164, "step": 6085 }, { "epoch": 0.98, "grad_norm": 1.0805030872753052, "learning_rate": 9.804186822728324e-09, "loss": 0.5554, "step": 6086 }, { "epoch": 0.98, "grad_norm": 1.007465496823581, "learning_rate": 9.641516871989331e-09, "loss": 0.5334, "step": 6087 }, { "epoch": 0.98, "grad_norm": 1.0673019748820423, "learning_rate": 9.480206432461725e-09, "loss": 0.5737, "step": 6088 }, { "epoch": 0.98, "grad_norm": 0.9985507562910875, "learning_rate": 9.32025554809035e-09, "loss": 0.5345, "step": 6089 }, { "epoch": 0.98, "grad_norm": 0.9971575235860275, "learning_rate": 9.161664262449799e-09, "loss": 0.6047, "step": 6090 }, { "epoch": 0.98, "grad_norm": 0.9911103389218824, "learning_rate": 9.004432618745507e-09, "loss": 0.5889, "step": 6091 }, { "epoch": 0.98, "grad_norm": 1.0357703634874746, "learning_rate": 8.848560659810989e-09, "loss": 0.5911, "step": 6092 }, { "epoch": 0.98, "grad_norm": 0.9602679315792186, "learning_rate": 8.694048428110614e-09, "loss": 0.56, "step": 6093 }, { "epoch": 0.98, "grad_norm": 1.0373607960057563, "learning_rate": 8.540895965737928e-09, "loss": 0.5352, "step": 6094 }, { "epoch": 0.98, "grad_norm": 1.0091122134975479, "learning_rate": 8.38910331441567e-09, "loss": 0.6252, "step": 6095 }, { "epoch": 0.98, "grad_norm": 1.0091368786766277, "learning_rate": 8.23867051549576e-09, "loss": 0.5669, "step": 6096 }, { "epoch": 0.98, "grad_norm": 1.0580920145093027, "learning_rate": 8.089597609960976e-09, "loss": 0.6397, "step": 6097 }, { "epoch": 0.98, "grad_norm": 0.935980110913327, "learning_rate": 7.94188463842216e-09, "loss": 0.505, "step": 6098 }, { "epoch": 0.98, "grad_norm": 1.023334838052072, "learning_rate": 7.795531641121013e-09, "loss": 0.6305, "step": 6099 }, { "epoch": 0.98, "grad_norm": 0.9685433501730999, "learning_rate": 7.65053865792842e-09, "loss": 0.5018, "step": 6100 }, { "epoch": 0.98, "grad_norm": 0.992892912556299, "learning_rate": 7.506905728343893e-09, "loss": 0.4982, "step": 6101 }, { "epoch": 0.98, "grad_norm": 0.9998551841717478, "learning_rate": 7.364632891496692e-09, "loss": 0.5191, "step": 6102 }, { "epoch": 0.98, "grad_norm": 0.990183717577766, "learning_rate": 7.223720186146366e-09, "loss": 0.5313, "step": 6103 }, { "epoch": 0.98, "grad_norm": 1.0330959611771864, "learning_rate": 7.08416765068165e-09, "loss": 0.5294, "step": 6104 }, { "epoch": 0.98, "grad_norm": 1.05190502969452, "learning_rate": 6.945975323119913e-09, "loss": 0.5541, "step": 6105 }, { "epoch": 0.98, "grad_norm": 1.071577803082671, "learning_rate": 6.809143241109373e-09, "loss": 0.5759, "step": 6106 }, { "epoch": 0.98, "grad_norm": 0.969015238135913, "learning_rate": 6.673671441925766e-09, "loss": 0.6539, "step": 6107 }, { "epoch": 0.98, "grad_norm": 1.0736419859618975, "learning_rate": 6.539559962476238e-09, "loss": 0.603, "step": 6108 }, { "epoch": 0.98, "grad_norm": 0.9281036570834018, "learning_rate": 6.4068088392960084e-09, "loss": 0.4484, "step": 6109 }, { "epoch": 0.98, "grad_norm": 0.9691735021225716, "learning_rate": 6.275418108550591e-09, "loss": 0.5434, "step": 6110 }, { "epoch": 0.98, "grad_norm": 0.932596668774295, "learning_rate": 6.1453878060335755e-09, "loss": 0.4885, "step": 6111 }, { "epoch": 0.98, "grad_norm": 1.0500518374015095, "learning_rate": 6.0167179671694055e-09, "loss": 0.5763, "step": 6112 }, { "epoch": 0.98, "grad_norm": 0.6035114941550267, "learning_rate": 5.8894086270111505e-09, "loss": 0.4542, "step": 6113 }, { "epoch": 0.99, "grad_norm": 0.9972747102694812, "learning_rate": 5.7634598202416235e-09, "loss": 0.5713, "step": 6114 }, { "epoch": 0.99, "grad_norm": 0.646022688923708, "learning_rate": 5.638871581172822e-09, "loss": 0.4433, "step": 6115 }, { "epoch": 0.99, "grad_norm": 1.0365478188837125, "learning_rate": 5.515643943745375e-09, "loss": 0.6057, "step": 6116 }, { "epoch": 0.99, "grad_norm": 1.1070703971096747, "learning_rate": 5.393776941530759e-09, "loss": 0.6072, "step": 6117 }, { "epoch": 0.99, "grad_norm": 1.0745398601819056, "learning_rate": 5.273270607727976e-09, "loss": 0.5967, "step": 6118 }, { "epoch": 0.99, "grad_norm": 1.008255045280376, "learning_rate": 5.1541249751668745e-09, "loss": 0.5379, "step": 6119 }, { "epoch": 0.99, "grad_norm": 0.933313812656356, "learning_rate": 5.0363400763059346e-09, "loss": 0.5529, "step": 6120 }, { "epoch": 0.99, "grad_norm": 0.7395389956068767, "learning_rate": 4.919915943232822e-09, "loss": 0.4884, "step": 6121 }, { "epoch": 0.99, "grad_norm": 1.007876759717825, "learning_rate": 4.80485260766439e-09, "loss": 0.6308, "step": 6122 }, { "epoch": 0.99, "grad_norm": 1.082508457015373, "learning_rate": 4.691150100948338e-09, "loss": 0.5723, "step": 6123 }, { "epoch": 0.99, "grad_norm": 0.6600835017230215, "learning_rate": 4.578808454058781e-09, "loss": 0.4713, "step": 6124 }, { "epoch": 0.99, "grad_norm": 1.0778406904243396, "learning_rate": 4.4678276976017895e-09, "loss": 0.5522, "step": 6125 }, { "epoch": 0.99, "grad_norm": 1.1528647064682462, "learning_rate": 4.358207861810959e-09, "loss": 0.5798, "step": 6126 }, { "epoch": 0.99, "grad_norm": 0.9963952404545122, "learning_rate": 4.249948976550178e-09, "loss": 0.5307, "step": 6127 }, { "epoch": 0.99, "grad_norm": 1.0747539120347367, "learning_rate": 4.143051071311965e-09, "loss": 0.5658, "step": 6128 }, { "epoch": 0.99, "grad_norm": 1.0301774275911888, "learning_rate": 4.037514175218027e-09, "loss": 0.596, "step": 6129 }, { "epoch": 0.99, "grad_norm": 1.0025789412957742, "learning_rate": 3.933338317019808e-09, "loss": 0.6152, "step": 6130 }, { "epoch": 0.99, "grad_norm": 1.1235622709269641, "learning_rate": 3.83052352509794e-09, "loss": 0.6478, "step": 6131 }, { "epoch": 0.99, "grad_norm": 1.0006984545537025, "learning_rate": 3.729069827461685e-09, "loss": 0.5367, "step": 6132 }, { "epoch": 0.99, "grad_norm": 0.9970821499202145, "learning_rate": 3.628977251749488e-09, "loss": 0.6065, "step": 6133 }, { "epoch": 0.99, "grad_norm": 0.633620624780548, "learning_rate": 3.530245825229539e-09, "loss": 0.4632, "step": 6134 }, { "epoch": 0.99, "grad_norm": 1.107699522009888, "learning_rate": 3.4328755747992103e-09, "loss": 0.6112, "step": 6135 }, { "epoch": 0.99, "grad_norm": 0.9713928310904248, "learning_rate": 3.336866526985061e-09, "loss": 0.558, "step": 6136 }, { "epoch": 0.99, "grad_norm": 0.964809304299755, "learning_rate": 3.2422187079417242e-09, "loss": 0.5845, "step": 6137 }, { "epoch": 0.99, "grad_norm": 0.953593170793836, "learning_rate": 3.148932143455241e-09, "loss": 0.5758, "step": 6138 }, { "epoch": 0.99, "grad_norm": 1.0668095124418397, "learning_rate": 3.0570068589380606e-09, "loss": 0.5583, "step": 6139 }, { "epoch": 0.99, "grad_norm": 1.023734018974233, "learning_rate": 2.9664428794340393e-09, "loss": 0.6433, "step": 6140 }, { "epoch": 0.99, "grad_norm": 1.0148768386092069, "learning_rate": 2.877240229614553e-09, "loss": 0.4971, "step": 6141 }, { "epoch": 0.99, "grad_norm": 0.9460995846149767, "learning_rate": 2.7893989337818283e-09, "loss": 0.5436, "step": 6142 }, { "epoch": 0.99, "grad_norm": 1.1002763225031482, "learning_rate": 2.7029190158656125e-09, "loss": 0.597, "step": 6143 }, { "epoch": 0.99, "grad_norm": 0.9610353383184768, "learning_rate": 2.6178004994253936e-09, "loss": 0.5469, "step": 6144 }, { "epoch": 0.99, "grad_norm": 0.9914114508362192, "learning_rate": 2.5340434076503994e-09, "loss": 0.6009, "step": 6145 }, { "epoch": 0.99, "grad_norm": 0.6487762398526201, "learning_rate": 2.4516477633579338e-09, "loss": 0.5013, "step": 6146 }, { "epoch": 0.99, "grad_norm": 1.0156101100489658, "learning_rate": 2.370613588994486e-09, "loss": 0.484, "step": 6147 }, { "epoch": 0.99, "grad_norm": 1.1627599762560006, "learning_rate": 2.2909409066362854e-09, "loss": 0.6794, "step": 6148 }, { "epoch": 0.99, "grad_norm": 1.0733042254008707, "learning_rate": 2.2126297379887473e-09, "loss": 0.6338, "step": 6149 }, { "epoch": 0.99, "grad_norm": 1.018659992916284, "learning_rate": 2.1356801043853624e-09, "loss": 0.5561, "step": 6150 }, { "epoch": 0.99, "grad_norm": 1.0653913053243522, "learning_rate": 2.060092026789917e-09, "loss": 0.5583, "step": 6151 }, { "epoch": 0.99, "grad_norm": 1.0322219392742877, "learning_rate": 1.9858655257942726e-09, "loss": 0.5764, "step": 6152 }, { "epoch": 0.99, "grad_norm": 1.0187572648277592, "learning_rate": 1.9130006216200314e-09, "loss": 0.6284, "step": 6153 }, { "epoch": 0.99, "grad_norm": 0.9741050372475817, "learning_rate": 1.841497334117426e-09, "loss": 0.5356, "step": 6154 }, { "epoch": 0.99, "grad_norm": 1.0528706205744032, "learning_rate": 1.771355682765874e-09, "loss": 0.6183, "step": 6155 }, { "epoch": 0.99, "grad_norm": 0.9529098490161597, "learning_rate": 1.7025756866739795e-09, "loss": 0.5154, "step": 6156 }, { "epoch": 0.99, "grad_norm": 1.0260655015735392, "learning_rate": 1.6351573645795316e-09, "loss": 0.6401, "step": 6157 }, { "epoch": 0.99, "grad_norm": 0.9785927370283991, "learning_rate": 1.5691007348489495e-09, "loss": 0.5262, "step": 6158 }, { "epoch": 0.99, "grad_norm": 1.0023156458056082, "learning_rate": 1.5044058154778385e-09, "loss": 0.5684, "step": 6159 }, { "epoch": 0.99, "grad_norm": 0.9956653057538485, "learning_rate": 1.441072624090989e-09, "loss": 0.5037, "step": 6160 }, { "epoch": 0.99, "grad_norm": 1.0443396664572782, "learning_rate": 1.3791011779423769e-09, "loss": 0.6064, "step": 6161 }, { "epoch": 0.99, "grad_norm": 1.002802211768055, "learning_rate": 1.3184914939140537e-09, "loss": 0.5944, "step": 6162 }, { "epoch": 0.99, "grad_norm": 1.0187578677247746, "learning_rate": 1.2592435885178112e-09, "loss": 0.4841, "step": 6163 }, { "epoch": 0.99, "grad_norm": 1.0323323324330262, "learning_rate": 1.2013574778951819e-09, "loss": 0.5843, "step": 6164 }, { "epoch": 0.99, "grad_norm": 0.9583023689224676, "learning_rate": 1.1448331778152189e-09, "loss": 0.4871, "step": 6165 }, { "epoch": 0.99, "grad_norm": 1.014101463885054, "learning_rate": 1.0896707036772703e-09, "loss": 0.5925, "step": 6166 }, { "epoch": 0.99, "grad_norm": 1.0861057934313525, "learning_rate": 1.0358700705082048e-09, "loss": 0.6003, "step": 6167 }, { "epoch": 0.99, "grad_norm": 1.0468390881227134, "learning_rate": 9.834312929657419e-10, "loss": 0.5825, "step": 6168 }, { "epoch": 0.99, "grad_norm": 1.0668872982331805, "learning_rate": 9.323543853351212e-10, "loss": 0.6826, "step": 6169 }, { "epoch": 0.99, "grad_norm": 0.9208274407055418, "learning_rate": 8.826393615318784e-10, "loss": 0.5703, "step": 6170 }, { "epoch": 0.99, "grad_norm": 0.975338600482535, "learning_rate": 8.342862350985137e-10, "loss": 0.4622, "step": 6171 }, { "epoch": 0.99, "grad_norm": 1.047378735295744, "learning_rate": 7.872950192083783e-10, "loss": 0.5905, "step": 6172 }, { "epoch": 0.99, "grad_norm": 0.9496929556563557, "learning_rate": 7.416657266634542e-10, "loss": 0.5127, "step": 6173 }, { "epoch": 0.99, "grad_norm": 0.9693853647972032, "learning_rate": 6.973983698943532e-10, "loss": 0.5019, "step": 6174 }, { "epoch": 0.99, "grad_norm": 1.0201178810878129, "learning_rate": 6.544929609597628e-10, "loss": 0.6044, "step": 6175 }, { "epoch": 1.0, "grad_norm": 1.0368535263982326, "learning_rate": 6.129495115497764e-10, "loss": 0.5626, "step": 6176 }, { "epoch": 1.0, "grad_norm": 1.0282627798580182, "learning_rate": 5.727680329808971e-10, "loss": 0.6514, "step": 6177 }, { "epoch": 1.0, "grad_norm": 1.0087698862022365, "learning_rate": 5.339485362004793e-10, "loss": 0.4787, "step": 6178 }, { "epoch": 1.0, "grad_norm": 1.054843370275368, "learning_rate": 4.96491031782842e-10, "loss": 0.6147, "step": 6179 }, { "epoch": 1.0, "grad_norm": 1.1403561273488698, "learning_rate": 4.603955299337104e-10, "loss": 0.5943, "step": 6180 }, { "epoch": 1.0, "grad_norm": 0.6299107626400982, "learning_rate": 4.2566204048577473e-10, "loss": 0.441, "step": 6181 }, { "epoch": 1.0, "grad_norm": 1.0421485468693035, "learning_rate": 3.9229057290146587e-10, "loss": 0.5308, "step": 6182 }, { "epoch": 1.0, "grad_norm": 0.6571222166553881, "learning_rate": 3.6028113627240015e-10, "loss": 0.4514, "step": 6183 }, { "epoch": 1.0, "grad_norm": 1.0382762399662364, "learning_rate": 3.2963373931882425e-10, "loss": 0.5939, "step": 6184 }, { "epoch": 1.0, "grad_norm": 1.0353351838259006, "learning_rate": 3.003483903890603e-10, "loss": 0.5504, "step": 6185 }, { "epoch": 1.0, "grad_norm": 1.0037410242411964, "learning_rate": 2.724250974628362e-10, "loss": 0.5159, "step": 6186 }, { "epoch": 1.0, "grad_norm": 0.9437987787586366, "learning_rate": 2.458638681457348e-10, "loss": 0.5558, "step": 6187 }, { "epoch": 1.0, "grad_norm": 0.8987079984731946, "learning_rate": 2.2066470967418985e-10, "loss": 0.4366, "step": 6188 }, { "epoch": 1.0, "grad_norm": 1.1069131194271307, "learning_rate": 1.9682762891382045e-10, "loss": 0.6337, "step": 6189 }, { "epoch": 1.0, "grad_norm": 1.0059220665422945, "learning_rate": 1.74352632357766e-10, "loss": 0.5416, "step": 6190 }, { "epoch": 1.0, "grad_norm": 1.0178639414680455, "learning_rate": 1.5323972612890647e-10, "loss": 0.5728, "step": 6191 }, { "epoch": 1.0, "grad_norm": 1.0053577679578694, "learning_rate": 1.3348891597930734e-10, "loss": 0.5193, "step": 6192 }, { "epoch": 1.0, "grad_norm": 1.157801710997135, "learning_rate": 1.1510020728910943e-10, "loss": 0.6408, "step": 6193 }, { "epoch": 1.0, "grad_norm": 1.0568511084265244, "learning_rate": 9.807360506874919e-11, "loss": 0.5634, "step": 6194 }, { "epoch": 1.0, "grad_norm": 1.0386398308947, "learning_rate": 8.240911395562823e-11, "loss": 0.5886, "step": 6195 }, { "epoch": 1.0, "grad_norm": 1.0195762522641245, "learning_rate": 6.810673821855407e-11, "loss": 0.6168, "step": 6196 }, { "epoch": 1.0, "grad_norm": 1.0220328269684127, "learning_rate": 5.516648175274419e-11, "loss": 0.5266, "step": 6197 }, { "epoch": 1.0, "grad_norm": 1.059553168300954, "learning_rate": 4.358834808371182e-11, "loss": 0.5748, "step": 6198 }, { "epoch": 1.0, "grad_norm": 0.9465146315770474, "learning_rate": 3.3372340366155663e-11, "loss": 0.5277, "step": 6199 }, { "epoch": 1.0, "grad_norm": 1.0693354860113116, "learning_rate": 2.4518461382849745e-11, "loss": 0.6031, "step": 6200 }, { "epoch": 1.0, "grad_norm": 1.0011966740331144, "learning_rate": 1.702671354575358e-11, "loss": 0.6298, "step": 6201 }, { "epoch": 1.0, "grad_norm": 1.1349782264437707, "learning_rate": 1.0897098895457092e-11, "loss": 0.6135, "step": 6202 }, { "epoch": 1.0, "grad_norm": 1.0384279976212387, "learning_rate": 6.129619102845929e-12, "loss": 0.6528, "step": 6203 }, { "epoch": 1.0, "grad_norm": 1.0027548377380584, "learning_rate": 2.7242754663259207e-12, "loss": 0.5874, "step": 6204 }, { "epoch": 1.0, "grad_norm": 1.139677711687267, "learning_rate": 6.810689129332915e-13, "loss": 0.5835, "step": 6205 }, { "epoch": 1.0, "grad_norm": 0.9912142881766588, "learning_rate": 0.0, "loss": 0.4097, "step": 6206 }, { "epoch": 1.0, "step": 6206, "total_flos": 9688178644156416.0, "train_loss": 0.6073194185630222, "train_runtime": 63272.6884, "train_samples_per_second": 12.555, "train_steps_per_second": 0.098 } ], "logging_steps": 1.0, "max_steps": 6206, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "total_flos": 9688178644156416.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }