{ "best_metric": null, "best_model_checkpoint": null, "epoch": 193.21963394342762, "global_step": 464500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.21, "learning_rate": 1.997920133111481e-05, "loss": 3.456, "step": 500 }, { "epoch": 0.42, "learning_rate": 1.995840266222962e-05, "loss": 1.8719, "step": 1000 }, { "epoch": 0.62, "learning_rate": 1.9937603993344426e-05, "loss": 1.7095, "step": 1500 }, { "epoch": 0.83, "learning_rate": 1.9916805324459236e-05, "loss": 1.6155, "step": 2000 }, { "epoch": 1.0, "eval_cer": 0.8924, "eval_gen_len": 19.884, "eval_loss": 1.4984486103057861, "eval_runtime": 548.3383, "eval_samples_per_second": 5.847, "eval_steps_per_second": 1.463, "step": 2404 }, { "epoch": 1.04, "learning_rate": 1.9896006655574045e-05, "loss": 1.5555, "step": 2500 }, { "epoch": 1.25, "learning_rate": 1.9875207986688854e-05, "loss": 1.4646, "step": 3000 }, { "epoch": 1.46, "learning_rate": 1.9854409317803664e-05, "loss": 1.4079, "step": 3500 }, { "epoch": 1.66, "learning_rate": 1.983361064891847e-05, "loss": 1.3488, "step": 4000 }, { "epoch": 1.87, "learning_rate": 1.981281198003328e-05, "loss": 1.3039, "step": 4500 }, { "epoch": 2.0, "eval_cer": 0.5947, "eval_gen_len": 15.5827, "eval_loss": 1.144504427909851, "eval_runtime": 282.2519, "eval_samples_per_second": 11.359, "eval_steps_per_second": 2.841, "step": 4808 }, { "epoch": 2.08, "learning_rate": 1.979201331114809e-05, "loss": 1.2269, "step": 5000 }, { "epoch": 2.29, "learning_rate": 1.9771214642262898e-05, "loss": 1.1561, "step": 5500 }, { "epoch": 2.5, "learning_rate": 1.9750415973377707e-05, "loss": 1.1288, "step": 6000 }, { "epoch": 2.7, "learning_rate": 1.9729700499168054e-05, "loss": 1.0835, "step": 6500 }, { "epoch": 2.91, "learning_rate": 1.9708901830282864e-05, "loss": 1.0647, "step": 7000 }, { "epoch": 3.0, "eval_cer": 0.4993, "eval_gen_len": 14.9208, "eval_loss": 0.9073113799095154, "eval_runtime": 267.439, "eval_samples_per_second": 11.988, "eval_steps_per_second": 2.999, "step": 7212 }, { "epoch": 3.12, "learning_rate": 1.9688103161397673e-05, "loss": 0.9848, "step": 7500 }, { "epoch": 3.33, "learning_rate": 1.966730449251248e-05, "loss": 0.9265, "step": 8000 }, { "epoch": 3.54, "learning_rate": 1.964650582362729e-05, "loss": 0.9057, "step": 8500 }, { "epoch": 3.74, "learning_rate": 1.9625707154742098e-05, "loss": 0.8799, "step": 9000 }, { "epoch": 3.95, "learning_rate": 1.9604950083194678e-05, "loss": 0.8491, "step": 9500 }, { "epoch": 4.0, "eval_cer": 0.4462, "eval_gen_len": 13.549, "eval_loss": 0.7219734191894531, "eval_runtime": 263.8796, "eval_samples_per_second": 12.149, "eval_steps_per_second": 3.039, "step": 9616 }, { "epoch": 4.16, "learning_rate": 1.9584151414309487e-05, "loss": 0.7793, "step": 10000 }, { "epoch": 4.37, "learning_rate": 1.9563352745424296e-05, "loss": 0.7483, "step": 10500 }, { "epoch": 4.58, "learning_rate": 1.9542554076539106e-05, "loss": 0.732, "step": 11000 }, { "epoch": 4.78, "learning_rate": 1.9521755407653912e-05, "loss": 0.7186, "step": 11500 }, { "epoch": 4.99, "learning_rate": 1.9500956738768718e-05, "loss": 0.7024, "step": 12000 }, { "epoch": 5.0, "eval_cer": 0.4716, "eval_gen_len": 15.7813, "eval_loss": 0.6081481575965881, "eval_runtime": 336.6018, "eval_samples_per_second": 9.525, "eval_steps_per_second": 2.383, "step": 12020 }, { "epoch": 5.2, "learning_rate": 1.9480158069883527e-05, "loss": 0.6114, "step": 12500 }, { "epoch": 5.41, "learning_rate": 1.945940099833611e-05, "loss": 0.6193, "step": 13000 }, { "epoch": 5.62, "learning_rate": 1.9438602329450916e-05, "loss": 0.6114, "step": 13500 }, { "epoch": 5.82, "learning_rate": 1.9417803660565726e-05, "loss": 0.5797, "step": 14000 }, { "epoch": 6.0, "eval_cer": 0.3726, "eval_gen_len": 13.8163, "eval_loss": 0.5052656531333923, "eval_runtime": 259.2943, "eval_samples_per_second": 12.364, "eval_steps_per_second": 3.093, "step": 14424 }, { "epoch": 6.03, "learning_rate": 1.9397004991680535e-05, "loss": 0.5658, "step": 14500 }, { "epoch": 6.24, "learning_rate": 1.9376206322795344e-05, "loss": 0.52, "step": 15000 }, { "epoch": 6.45, "learning_rate": 1.9355407653910154e-05, "loss": 0.4992, "step": 15500 }, { "epoch": 6.66, "learning_rate": 1.933460898502496e-05, "loss": 0.5095, "step": 16000 }, { "epoch": 6.86, "learning_rate": 1.931381031613977e-05, "loss": 0.4966, "step": 16500 }, { "epoch": 7.0, "eval_cer": 0.3997, "eval_gen_len": 15.0505, "eval_loss": 0.4698517620563507, "eval_runtime": 299.617, "eval_samples_per_second": 10.7, "eval_steps_per_second": 2.677, "step": 16828 }, { "epoch": 7.07, "learning_rate": 1.929305324459235e-05, "loss": 0.4493, "step": 17000 }, { "epoch": 7.28, "learning_rate": 1.9272254575707155e-05, "loss": 0.4288, "step": 17500 }, { "epoch": 7.49, "learning_rate": 1.9251455906821964e-05, "loss": 0.4397, "step": 18000 }, { "epoch": 7.7, "learning_rate": 1.9230657237936774e-05, "loss": 0.423, "step": 18500 }, { "epoch": 7.9, "learning_rate": 1.9209900166389354e-05, "loss": 0.4234, "step": 19000 }, { "epoch": 8.0, "eval_cer": 0.3414, "eval_gen_len": 14.2976, "eval_loss": 0.41973158717155457, "eval_runtime": 277.9117, "eval_samples_per_second": 11.536, "eval_steps_per_second": 2.886, "step": 19232 }, { "epoch": 8.11, "learning_rate": 1.918910149750416e-05, "loss": 0.377, "step": 19500 }, { "epoch": 8.32, "learning_rate": 1.916834442595674e-05, "loss": 0.3612, "step": 20000 }, { "epoch": 8.53, "learning_rate": 1.914754575707155e-05, "loss": 0.3725, "step": 20500 }, { "epoch": 8.74, "learning_rate": 1.9126747088186358e-05, "loss": 0.3607, "step": 21000 }, { "epoch": 8.94, "learning_rate": 1.9105948419301164e-05, "loss": 0.3661, "step": 21500 }, { "epoch": 9.0, "eval_cer": 0.3568, "eval_gen_len": 14.5349, "eval_loss": 0.36951112747192383, "eval_runtime": 287.7236, "eval_samples_per_second": 11.143, "eval_steps_per_second": 2.787, "step": 21636 }, { "epoch": 9.15, "learning_rate": 1.9085149750415974e-05, "loss": 0.3287, "step": 22000 }, { "epoch": 9.36, "learning_rate": 1.9064351081530783e-05, "loss": 0.3185, "step": 22500 }, { "epoch": 9.57, "learning_rate": 1.9043552412645592e-05, "loss": 0.3082, "step": 23000 }, { "epoch": 9.78, "learning_rate": 1.90227537437604e-05, "loss": 0.3177, "step": 23500 }, { "epoch": 9.98, "learning_rate": 1.9001996672212978e-05, "loss": 0.3094, "step": 24000 }, { "epoch": 10.0, "eval_cer": 0.3123, "eval_gen_len": 13.6931, "eval_loss": 0.35325565934181213, "eval_runtime": 287.7264, "eval_samples_per_second": 11.143, "eval_steps_per_second": 2.787, "step": 24040 }, { "epoch": 10.19, "learning_rate": 1.8981198003327787e-05, "loss": 0.2623, "step": 24500 }, { "epoch": 10.4, "learning_rate": 1.8960399334442597e-05, "loss": 0.2666, "step": 25000 }, { "epoch": 10.61, "learning_rate": 1.8939600665557406e-05, "loss": 0.2667, "step": 25500 }, { "epoch": 10.82, "learning_rate": 1.8918843594009986e-05, "loss": 0.2695, "step": 26000 }, { "epoch": 11.0, "eval_cer": 0.3036, "eval_gen_len": 13.8085, "eval_loss": 0.34151870012283325, "eval_runtime": 267.5842, "eval_samples_per_second": 11.981, "eval_steps_per_second": 2.997, "step": 26444 }, { "epoch": 11.02, "learning_rate": 1.8898086522462566e-05, "loss": 0.2818, "step": 26500 }, { "epoch": 11.23, "learning_rate": 1.8877287853577372e-05, "loss": 0.24, "step": 27000 }, { "epoch": 11.44, "learning_rate": 1.885648918469218e-05, "loss": 0.2347, "step": 27500 }, { "epoch": 11.65, "learning_rate": 1.883569051580699e-05, "loss": 0.2372, "step": 28000 }, { "epoch": 11.86, "learning_rate": 1.88148918469218e-05, "loss": 0.242, "step": 28500 }, { "epoch": 12.0, "eval_cer": 0.2954, "eval_gen_len": 13.5817, "eval_loss": 0.33137527108192444, "eval_runtime": 249.8381, "eval_samples_per_second": 12.832, "eval_steps_per_second": 3.21, "step": 28848 }, { "epoch": 12.06, "learning_rate": 1.8794093178036606e-05, "loss": 0.2294, "step": 29000 }, { "epoch": 12.27, "learning_rate": 1.8773336106489186e-05, "loss": 0.2139, "step": 29500 }, { "epoch": 12.48, "learning_rate": 1.8752537437603995e-05, "loss": 0.2173, "step": 30000 }, { "epoch": 12.69, "learning_rate": 1.8731738768718805e-05, "loss": 0.214, "step": 30500 }, { "epoch": 12.9, "learning_rate": 1.871094009983361e-05, "loss": 0.2123, "step": 31000 }, { "epoch": 13.0, "eval_cer": 0.2927, "eval_gen_len": 13.8079, "eval_loss": 0.3195803463459015, "eval_runtime": 264.0497, "eval_samples_per_second": 12.142, "eval_steps_per_second": 3.037, "step": 31252 }, { "epoch": 13.1, "learning_rate": 1.869014143094842e-05, "loss": 0.191, "step": 31500 }, { "epoch": 13.31, "learning_rate": 1.866934276206323e-05, "loss": 0.187, "step": 32000 }, { "epoch": 13.52, "learning_rate": 1.864854409317804e-05, "loss": 0.1903, "step": 32500 }, { "epoch": 13.73, "learning_rate": 1.8627745424292848e-05, "loss": 0.1978, "step": 33000 }, { "epoch": 13.94, "learning_rate": 1.8606946755407654e-05, "loss": 0.1954, "step": 33500 }, { "epoch": 14.0, "eval_cer": 0.2802, "eval_gen_len": 13.5215, "eval_loss": 0.3065420985221863, "eval_runtime": 255.1945, "eval_samples_per_second": 12.563, "eval_steps_per_second": 3.143, "step": 33656 }, { "epoch": 14.14, "learning_rate": 1.8586231281198004e-05, "loss": 0.1658, "step": 34000 }, { "epoch": 14.35, "learning_rate": 1.8565432612312814e-05, "loss": 0.165, "step": 34500 }, { "epoch": 14.56, "learning_rate": 1.854463394342762e-05, "loss": 0.1673, "step": 35000 }, { "epoch": 14.77, "learning_rate": 1.852383527454243e-05, "loss": 0.174, "step": 35500 }, { "epoch": 14.98, "learning_rate": 1.850303660565724e-05, "loss": 0.1734, "step": 36000 }, { "epoch": 15.0, "eval_cer": 0.301, "eval_gen_len": 14.1818, "eval_loss": 0.3554905652999878, "eval_runtime": 265.2219, "eval_samples_per_second": 12.088, "eval_steps_per_second": 3.024, "step": 36060 }, { "epoch": 15.18, "learning_rate": 1.8482237936772048e-05, "loss": 0.1436, "step": 36500 }, { "epoch": 15.39, "learning_rate": 1.8461439267886857e-05, "loss": 0.1566, "step": 37000 }, { "epoch": 15.6, "learning_rate": 1.8440640599001663e-05, "loss": 0.1548, "step": 37500 }, { "epoch": 15.81, "learning_rate": 1.8419883527454246e-05, "loss": 0.1598, "step": 38000 }, { "epoch": 16.0, "eval_cer": 0.2854, "eval_gen_len": 13.975, "eval_loss": 0.3254939913749695, "eval_runtime": 272.7931, "eval_samples_per_second": 11.752, "eval_steps_per_second": 2.94, "step": 38464 }, { "epoch": 16.01, "learning_rate": 1.8399084858569052e-05, "loss": 0.1543, "step": 38500 }, { "epoch": 16.22, "learning_rate": 1.8378286189683862e-05, "loss": 0.136, "step": 39000 }, { "epoch": 16.43, "learning_rate": 1.835748752079867e-05, "loss": 0.1356, "step": 39500 }, { "epoch": 16.64, "learning_rate": 1.833673044925125e-05, "loss": 0.14, "step": 40000 }, { "epoch": 16.85, "learning_rate": 1.8315931780366057e-05, "loss": 0.1442, "step": 40500 }, { "epoch": 17.0, "eval_cer": 0.2748, "eval_gen_len": 13.4192, "eval_loss": 0.3075180649757385, "eval_runtime": 259.5551, "eval_samples_per_second": 12.352, "eval_steps_per_second": 3.09, "step": 40868 }, { "epoch": 17.05, "learning_rate": 1.8295133111480866e-05, "loss": 0.1417, "step": 41000 }, { "epoch": 17.26, "learning_rate": 1.8274334442595676e-05, "loss": 0.1241, "step": 41500 }, { "epoch": 17.47, "learning_rate": 1.8253577371048256e-05, "loss": 0.1254, "step": 42000 }, { "epoch": 17.68, "learning_rate": 1.823277870216306e-05, "loss": 0.1315, "step": 42500 }, { "epoch": 17.89, "learning_rate": 1.821198003327787e-05, "loss": 0.1288, "step": 43000 }, { "epoch": 18.0, "eval_cer": 0.2763, "eval_gen_len": 13.5976, "eval_loss": 0.30095288157463074, "eval_runtime": 261.6386, "eval_samples_per_second": 12.254, "eval_steps_per_second": 3.065, "step": 43272 }, { "epoch": 18.09, "learning_rate": 1.819118136439268e-05, "loss": 0.123, "step": 43500 }, { "epoch": 18.3, "learning_rate": 1.817042429284526e-05, "loss": 0.1178, "step": 44000 }, { "epoch": 18.51, "learning_rate": 1.8149625623960066e-05, "loss": 0.1203, "step": 44500 }, { "epoch": 18.72, "learning_rate": 1.8128826955074876e-05, "loss": 0.1206, "step": 45000 }, { "epoch": 18.93, "learning_rate": 1.8108028286189685e-05, "loss": 0.1249, "step": 45500 }, { "epoch": 19.0, "eval_cer": 0.2781, "eval_gen_len": 13.9969, "eval_loss": 0.3196047246456146, "eval_runtime": 267.3702, "eval_samples_per_second": 11.991, "eval_steps_per_second": 3.0, "step": 45676 }, { "epoch": 19.13, "learning_rate": 1.8087271214642265e-05, "loss": 0.1067, "step": 46000 }, { "epoch": 19.34, "learning_rate": 1.8066514143094845e-05, "loss": 0.106, "step": 46500 }, { "epoch": 19.55, "learning_rate": 1.8045715474209654e-05, "loss": 0.1083, "step": 47000 }, { "epoch": 19.76, "learning_rate": 1.802491680532446e-05, "loss": 0.1098, "step": 47500 }, { "epoch": 19.97, "learning_rate": 1.800411813643927e-05, "loss": 0.1182, "step": 48000 }, { "epoch": 20.0, "eval_cer": 0.2783, "eval_gen_len": 13.6728, "eval_loss": 0.3209761679172516, "eval_runtime": 262.3576, "eval_samples_per_second": 12.22, "eval_steps_per_second": 3.057, "step": 48080 }, { "epoch": 20.17, "learning_rate": 1.798331946755408e-05, "loss": 0.0975, "step": 48500 }, { "epoch": 20.38, "learning_rate": 1.7962520798668888e-05, "loss": 0.0975, "step": 49000 }, { "epoch": 20.59, "learning_rate": 1.7941722129783698e-05, "loss": 0.1024, "step": 49500 }, { "epoch": 20.8, "learning_rate": 1.7920923460898504e-05, "loss": 0.1056, "step": 50000 }, { "epoch": 21.0, "eval_cer": 0.2707, "eval_gen_len": 13.5714, "eval_loss": 0.3157811760902405, "eval_runtime": 260.7596, "eval_samples_per_second": 12.295, "eval_steps_per_second": 3.076, "step": 50484 }, { "epoch": 21.01, "learning_rate": 1.7900166389351083e-05, "loss": 0.1029, "step": 50500 }, { "epoch": 21.21, "learning_rate": 1.7879367720465893e-05, "loss": 0.0943, "step": 51000 }, { "epoch": 21.42, "learning_rate": 1.7858569051580702e-05, "loss": 0.0929, "step": 51500 }, { "epoch": 21.63, "learning_rate": 1.7837770382695508e-05, "loss": 0.0965, "step": 52000 }, { "epoch": 21.84, "learning_rate": 1.7816971713810317e-05, "loss": 0.0916, "step": 52500 }, { "epoch": 22.0, "eval_cer": 0.2685, "eval_gen_len": 13.6404, "eval_loss": 0.315157949924469, "eval_runtime": 259.3506, "eval_samples_per_second": 12.362, "eval_steps_per_second": 3.092, "step": 52888 }, { "epoch": 22.05, "learning_rate": 1.7796173044925127e-05, "loss": 0.0992, "step": 53000 }, { "epoch": 22.25, "learning_rate": 1.7775415973377707e-05, "loss": 0.0884, "step": 53500 }, { "epoch": 22.46, "learning_rate": 1.7754617304492513e-05, "loss": 0.0922, "step": 54000 }, { "epoch": 22.67, "learning_rate": 1.7733818635607322e-05, "loss": 0.0826, "step": 54500 }, { "epoch": 22.88, "learning_rate": 1.771301996672213e-05, "loss": 0.0934, "step": 55000 }, { "epoch": 23.0, "eval_cer": 0.2711, "eval_gen_len": 13.8575, "eval_loss": 0.3333517909049988, "eval_runtime": 261.6177, "eval_samples_per_second": 12.255, "eval_steps_per_second": 3.066, "step": 55292 }, { "epoch": 23.09, "learning_rate": 1.769222129783694e-05, "loss": 0.0861, "step": 55500 }, { "epoch": 23.29, "learning_rate": 1.7671464226289517e-05, "loss": 0.081, "step": 56000 }, { "epoch": 23.5, "learning_rate": 1.7650665557404327e-05, "loss": 0.0798, "step": 56500 }, { "epoch": 23.71, "learning_rate": 1.7629866888519136e-05, "loss": 0.0843, "step": 57000 }, { "epoch": 23.92, "learning_rate": 1.7609068219633945e-05, "loss": 0.0941, "step": 57500 }, { "epoch": 24.0, "eval_cer": 0.2656, "eval_gen_len": 13.5583, "eval_loss": 0.3143361210823059, "eval_runtime": 261.1394, "eval_samples_per_second": 12.277, "eval_steps_per_second": 3.071, "step": 57696 }, { "epoch": 24.13, "learning_rate": 1.7588269550748755e-05, "loss": 0.0809, "step": 58000 }, { "epoch": 24.33, "learning_rate": 1.756747088186356e-05, "loss": 0.0784, "step": 58500 }, { "epoch": 24.54, "learning_rate": 1.754671381031614e-05, "loss": 0.0785, "step": 59000 }, { "epoch": 24.75, "learning_rate": 1.752591514143095e-05, "loss": 0.0782, "step": 59500 }, { "epoch": 24.96, "learning_rate": 1.750511647254576e-05, "loss": 0.0773, "step": 60000 }, { "epoch": 25.0, "eval_cer": 0.2641, "eval_gen_len": 13.5561, "eval_loss": 0.3231368660926819, "eval_runtime": 255.8084, "eval_samples_per_second": 12.533, "eval_steps_per_second": 3.135, "step": 60100 }, { "epoch": 25.17, "learning_rate": 1.7484317803660565e-05, "loss": 0.0744, "step": 60500 }, { "epoch": 25.37, "learning_rate": 1.7463519134775375e-05, "loss": 0.0719, "step": 61000 }, { "epoch": 25.58, "learning_rate": 1.7442762063227955e-05, "loss": 0.0821, "step": 61500 }, { "epoch": 25.79, "learning_rate": 1.7421963394342764e-05, "loss": 0.0781, "step": 62000 }, { "epoch": 26.0, "learning_rate": 1.740116472545757e-05, "loss": 0.0759, "step": 62500 }, { "epoch": 26.0, "eval_cer": 0.2668, "eval_gen_len": 13.7564, "eval_loss": 0.3243275582790375, "eval_runtime": 262.8593, "eval_samples_per_second": 12.197, "eval_steps_per_second": 3.051, "step": 62504 }, { "epoch": 26.21, "learning_rate": 1.738036605657238e-05, "loss": 0.0664, "step": 63000 }, { "epoch": 26.41, "learning_rate": 1.735956738768719e-05, "loss": 0.0683, "step": 63500 }, { "epoch": 26.62, "learning_rate": 1.733881031613977e-05, "loss": 0.0712, "step": 64000 }, { "epoch": 26.83, "learning_rate": 1.7318011647254578e-05, "loss": 0.077, "step": 64500 }, { "epoch": 27.0, "eval_cer": 0.267, "eval_gen_len": 13.7558, "eval_loss": 0.33405444025993347, "eval_runtime": 259.6084, "eval_samples_per_second": 12.349, "eval_steps_per_second": 3.089, "step": 64908 }, { "epoch": 27.04, "learning_rate": 1.7297212978369387e-05, "loss": 0.076, "step": 65000 }, { "epoch": 27.25, "learning_rate": 1.7276414309484197e-05, "loss": 0.0595, "step": 65500 }, { "epoch": 27.45, "learning_rate": 1.7255615640599003e-05, "loss": 0.0608, "step": 66000 }, { "epoch": 27.66, "learning_rate": 1.7234858569051583e-05, "loss": 0.0705, "step": 66500 }, { "epoch": 27.87, "learning_rate": 1.7214059900166392e-05, "loss": 0.0743, "step": 67000 }, { "epoch": 28.0, "eval_cer": 0.2796, "eval_gen_len": 14.1085, "eval_loss": 0.3349040448665619, "eval_runtime": 282.8059, "eval_samples_per_second": 11.336, "eval_steps_per_second": 2.836, "step": 67312 }, { "epoch": 28.08, "learning_rate": 1.71932612312812e-05, "loss": 0.0674, "step": 67500 }, { "epoch": 28.29, "learning_rate": 1.7172462562396007e-05, "loss": 0.0621, "step": 68000 }, { "epoch": 28.49, "learning_rate": 1.7151663893510817e-05, "loss": 0.0661, "step": 68500 }, { "epoch": 28.7, "learning_rate": 1.7130865224625626e-05, "loss": 0.0611, "step": 69000 }, { "epoch": 28.91, "learning_rate": 1.7110108153078206e-05, "loss": 0.0662, "step": 69500 }, { "epoch": 29.0, "eval_cer": 0.2623, "eval_gen_len": 13.5396, "eval_loss": 0.3251936137676239, "eval_runtime": 258.4167, "eval_samples_per_second": 12.406, "eval_steps_per_second": 3.104, "step": 69716 }, { "epoch": 29.12, "learning_rate": 1.7089309484193012e-05, "loss": 0.0652, "step": 70000 }, { "epoch": 29.33, "learning_rate": 1.706851081530782e-05, "loss": 0.0557, "step": 70500 }, { "epoch": 29.53, "learning_rate": 1.704771214642263e-05, "loss": 0.0584, "step": 71000 }, { "epoch": 29.74, "learning_rate": 1.702691347753744e-05, "loss": 0.0627, "step": 71500 }, { "epoch": 29.95, "learning_rate": 1.700611480865225e-05, "loss": 0.0685, "step": 72000 }, { "epoch": 30.0, "eval_cer": 0.2643, "eval_gen_len": 13.6528, "eval_loss": 0.33093786239624023, "eval_runtime": 269.9487, "eval_samples_per_second": 11.876, "eval_steps_per_second": 2.971, "step": 72120 }, { "epoch": 30.16, "learning_rate": 1.6985357737104826e-05, "loss": 0.0592, "step": 72500 }, { "epoch": 30.37, "learning_rate": 1.6964559068219635e-05, "loss": 0.0564, "step": 73000 }, { "epoch": 30.57, "learning_rate": 1.6943760399334445e-05, "loss": 0.0623, "step": 73500 }, { "epoch": 30.78, "learning_rate": 1.692296173044925e-05, "loss": 0.0599, "step": 74000 }, { "epoch": 30.99, "learning_rate": 1.690220465890183e-05, "loss": 0.0619, "step": 74500 }, { "epoch": 31.0, "eval_cer": 0.266, "eval_gen_len": 13.7171, "eval_loss": 0.3532868027687073, "eval_runtime": 257.8089, "eval_samples_per_second": 12.436, "eval_steps_per_second": 3.111, "step": 74524 }, { "epoch": 31.2, "learning_rate": 1.688140599001664e-05, "loss": 0.0499, "step": 75000 }, { "epoch": 31.41, "learning_rate": 1.686060732113145e-05, "loss": 0.0592, "step": 75500 }, { "epoch": 31.61, "learning_rate": 1.6839808652246255e-05, "loss": 0.0555, "step": 76000 }, { "epoch": 31.82, "learning_rate": 1.6819009983361064e-05, "loss": 0.0602, "step": 76500 }, { "epoch": 32.0, "eval_cer": 0.2661, "eval_gen_len": 13.8924, "eval_loss": 0.34633541107177734, "eval_runtime": 271.365, "eval_samples_per_second": 11.814, "eval_steps_per_second": 2.955, "step": 76928 }, { "epoch": 32.03, "learning_rate": 1.6798211314475874e-05, "loss": 0.0575, "step": 77000 }, { "epoch": 32.24, "learning_rate": 1.6777412645590683e-05, "loss": 0.0506, "step": 77500 }, { "epoch": 32.45, "learning_rate": 1.6756613976705493e-05, "loss": 0.0566, "step": 78000 }, { "epoch": 32.65, "learning_rate": 1.6735856905158072e-05, "loss": 0.0532, "step": 78500 }, { "epoch": 32.86, "learning_rate": 1.6715058236272882e-05, "loss": 0.0568, "step": 79000 }, { "epoch": 33.0, "eval_cer": 0.2645, "eval_gen_len": 13.5443, "eval_loss": 0.3539523482322693, "eval_runtime": 256.805, "eval_samples_per_second": 12.484, "eval_steps_per_second": 3.123, "step": 79332 }, { "epoch": 33.07, "learning_rate": 1.669425956738769e-05, "loss": 0.057, "step": 79500 }, { "epoch": 33.28, "learning_rate": 1.6673460898502497e-05, "loss": 0.0467, "step": 80000 }, { "epoch": 33.49, "learning_rate": 1.6652703826955077e-05, "loss": 0.0514, "step": 80500 }, { "epoch": 33.69, "learning_rate": 1.6631905158069886e-05, "loss": 0.0522, "step": 81000 }, { "epoch": 33.9, "learning_rate": 1.6611106489184692e-05, "loss": 0.0528, "step": 81500 }, { "epoch": 34.0, "eval_cer": 0.2621, "eval_gen_len": 13.4984, "eval_loss": 0.3489411473274231, "eval_runtime": 254.1092, "eval_samples_per_second": 12.617, "eval_steps_per_second": 3.156, "step": 81736 }, { "epoch": 34.11, "learning_rate": 1.6590307820299502e-05, "loss": 0.05, "step": 82000 }, { "epoch": 34.32, "learning_rate": 1.656955074875208e-05, "loss": 0.0477, "step": 82500 }, { "epoch": 34.53, "learning_rate": 1.654875207986689e-05, "loss": 0.0567, "step": 83000 }, { "epoch": 34.73, "learning_rate": 1.6527953410981697e-05, "loss": 0.0477, "step": 83500 }, { "epoch": 34.94, "learning_rate": 1.6507154742096506e-05, "loss": 0.0544, "step": 84000 }, { "epoch": 35.0, "eval_cer": 0.2662, "eval_gen_len": 13.7679, "eval_loss": 0.3497730493545532, "eval_runtime": 268.9078, "eval_samples_per_second": 11.922, "eval_steps_per_second": 2.982, "step": 84140 }, { "epoch": 35.15, "learning_rate": 1.6486356073211316e-05, "loss": 0.0433, "step": 84500 }, { "epoch": 35.36, "learning_rate": 1.6465599001663896e-05, "loss": 0.0426, "step": 85000 }, { "epoch": 35.57, "learning_rate": 1.64448003327787e-05, "loss": 0.0477, "step": 85500 }, { "epoch": 35.77, "learning_rate": 1.642400166389351e-05, "loss": 0.0497, "step": 86000 }, { "epoch": 35.98, "learning_rate": 1.640320299500832e-05, "loss": 0.0571, "step": 86500 }, { "epoch": 36.0, "eval_cer": 0.2661, "eval_gen_len": 13.8001, "eval_loss": 0.35858893394470215, "eval_runtime": 266.9991, "eval_samples_per_second": 12.008, "eval_steps_per_second": 3.004, "step": 86544 }, { "epoch": 36.19, "learning_rate": 1.63824459234609e-05, "loss": 0.0406, "step": 87000 }, { "epoch": 36.4, "learning_rate": 1.6361647254575706e-05, "loss": 0.0431, "step": 87500 }, { "epoch": 36.61, "learning_rate": 1.6340848585690516e-05, "loss": 0.0507, "step": 88000 }, { "epoch": 36.81, "learning_rate": 1.6320049916805325e-05, "loss": 0.0492, "step": 88500 }, { "epoch": 37.0, "eval_cer": 0.2564, "eval_gen_len": 13.5415, "eval_loss": 0.35140517354011536, "eval_runtime": 259.4147, "eval_samples_per_second": 12.359, "eval_steps_per_second": 3.092, "step": 88948 }, { "epoch": 37.02, "learning_rate": 1.6299251247920134e-05, "loss": 0.0515, "step": 89000 }, { "epoch": 37.23, "learning_rate": 1.6278452579034944e-05, "loss": 0.0421, "step": 89500 }, { "epoch": 37.44, "learning_rate": 1.6257695507487523e-05, "loss": 0.0432, "step": 90000 }, { "epoch": 37.65, "learning_rate": 1.6236896838602333e-05, "loss": 0.0449, "step": 90500 }, { "epoch": 37.85, "learning_rate": 1.621609816971714e-05, "loss": 0.0484, "step": 91000 }, { "epoch": 38.0, "eval_cer": 0.2639, "eval_gen_len": 13.8534, "eval_loss": 0.3746128976345062, "eval_runtime": 254.0333, "eval_samples_per_second": 12.62, "eval_steps_per_second": 3.157, "step": 91352 }, { "epoch": 38.06, "learning_rate": 1.6195299500831948e-05, "loss": 0.0481, "step": 91500 }, { "epoch": 38.27, "learning_rate": 1.6174500831946758e-05, "loss": 0.0402, "step": 92000 }, { "epoch": 38.48, "learning_rate": 1.6153743760399337e-05, "loss": 0.0395, "step": 92500 }, { "epoch": 38.69, "learning_rate": 1.6132945091514143e-05, "loss": 0.0456, "step": 93000 }, { "epoch": 38.89, "learning_rate": 1.6112146422628953e-05, "loss": 0.0451, "step": 93500 }, { "epoch": 39.0, "eval_cer": 0.2627, "eval_gen_len": 13.7527, "eval_loss": 0.35255342721939087, "eval_runtime": 264.6452, "eval_samples_per_second": 12.114, "eval_steps_per_second": 3.03, "step": 93756 }, { "epoch": 39.1, "learning_rate": 1.6091347753743762e-05, "loss": 0.0363, "step": 94000 }, { "epoch": 39.31, "learning_rate": 1.6070590682196342e-05, "loss": 0.0382, "step": 94500 }, { "epoch": 39.52, "learning_rate": 1.6049792013311148e-05, "loss": 0.0448, "step": 95000 }, { "epoch": 39.73, "learning_rate": 1.6028993344425957e-05, "loss": 0.0399, "step": 95500 }, { "epoch": 39.93, "learning_rate": 1.6008194675540767e-05, "loss": 0.045, "step": 96000 }, { "epoch": 40.0, "eval_cer": 0.2583, "eval_gen_len": 13.6694, "eval_loss": 0.3510436713695526, "eval_runtime": 264.9384, "eval_samples_per_second": 12.101, "eval_steps_per_second": 3.027, "step": 96160 }, { "epoch": 40.14, "learning_rate": 1.5987437603993347e-05, "loss": 0.0393, "step": 96500 }, { "epoch": 40.35, "learning_rate": 1.5966638935108153e-05, "loss": 0.0338, "step": 97000 }, { "epoch": 40.56, "learning_rate": 1.5945840266222962e-05, "loss": 0.0398, "step": 97500 }, { "epoch": 40.77, "learning_rate": 1.5925083194675542e-05, "loss": 0.0425, "step": 98000 }, { "epoch": 40.97, "learning_rate": 1.590428452579035e-05, "loss": 0.0455, "step": 98500 }, { "epoch": 41.0, "eval_cer": 0.2619, "eval_gen_len": 13.7012, "eval_loss": 0.3675419092178345, "eval_runtime": 255.341, "eval_samples_per_second": 12.556, "eval_steps_per_second": 3.141, "step": 98564 }, { "epoch": 41.18, "learning_rate": 1.5883485856905157e-05, "loss": 0.0372, "step": 99000 }, { "epoch": 41.39, "learning_rate": 1.5862687188019967e-05, "loss": 0.0378, "step": 99500 }, { "epoch": 41.6, "learning_rate": 1.5841888519134776e-05, "loss": 0.0369, "step": 100000 }, { "epoch": 41.81, "learning_rate": 1.5821089850249585e-05, "loss": 0.0452, "step": 100500 }, { "epoch": 42.0, "eval_cer": 0.2672, "eval_gen_len": 13.8653, "eval_loss": 0.3757382035255432, "eval_runtime": 268.6077, "eval_samples_per_second": 11.936, "eval_steps_per_second": 2.986, "step": 100968 }, { "epoch": 42.01, "learning_rate": 1.5800291181364395e-05, "loss": 0.046, "step": 101000 }, { "epoch": 42.22, "learning_rate": 1.57794925124792e-05, "loss": 0.0319, "step": 101500 }, { "epoch": 42.43, "learning_rate": 1.5758735440931784e-05, "loss": 0.0383, "step": 102000 }, { "epoch": 42.64, "learning_rate": 1.573793677204659e-05, "loss": 0.0376, "step": 102500 }, { "epoch": 42.85, "learning_rate": 1.57171381031614e-05, "loss": 0.0403, "step": 103000 }, { "epoch": 43.0, "eval_cer": 0.2583, "eval_gen_len": 13.6532, "eval_loss": 0.3421362340450287, "eval_runtime": 257.7664, "eval_samples_per_second": 12.438, "eval_steps_per_second": 3.111, "step": 103372 }, { "epoch": 43.05, "learning_rate": 1.5696339434276205e-05, "loss": 0.0419, "step": 103500 }, { "epoch": 43.26, "learning_rate": 1.5675540765391015e-05, "loss": 0.0322, "step": 104000 }, { "epoch": 43.47, "learning_rate": 1.5654783693843594e-05, "loss": 0.0396, "step": 104500 }, { "epoch": 43.68, "learning_rate": 1.5633985024958404e-05, "loss": 0.0386, "step": 105000 }, { "epoch": 43.89, "learning_rate": 1.5613186356073213e-05, "loss": 0.0372, "step": 105500 }, { "epoch": 44.0, "eval_cer": 0.2553, "eval_gen_len": 13.7679, "eval_loss": 0.37172210216522217, "eval_runtime": 266.5505, "eval_samples_per_second": 12.028, "eval_steps_per_second": 3.009, "step": 105776 }, { "epoch": 44.09, "learning_rate": 1.5592387687188023e-05, "loss": 0.0381, "step": 106000 }, { "epoch": 44.3, "learning_rate": 1.55716306156406e-05, "loss": 0.0343, "step": 106500 }, { "epoch": 44.51, "learning_rate": 1.555083194675541e-05, "loss": 0.0348, "step": 107000 }, { "epoch": 44.72, "learning_rate": 1.5530033277870218e-05, "loss": 0.0412, "step": 107500 }, { "epoch": 44.93, "learning_rate": 1.5509234608985027e-05, "loss": 0.041, "step": 108000 }, { "epoch": 45.0, "eval_cer": 0.258, "eval_gen_len": 13.7246, "eval_loss": 0.367112934589386, "eval_runtime": 255.2355, "eval_samples_per_second": 12.561, "eval_steps_per_second": 3.142, "step": 108180 }, { "epoch": 45.13, "learning_rate": 1.5488477537437604e-05, "loss": 0.0358, "step": 108500 }, { "epoch": 45.34, "learning_rate": 1.5467678868552413e-05, "loss": 0.0347, "step": 109000 }, { "epoch": 45.55, "learning_rate": 1.5446880199667222e-05, "loss": 0.0372, "step": 109500 }, { "epoch": 45.76, "learning_rate": 1.5426081530782032e-05, "loss": 0.0369, "step": 110000 }, { "epoch": 45.97, "learning_rate": 1.540528286189684e-05, "loss": 0.0372, "step": 110500 }, { "epoch": 46.0, "eval_cer": 0.2596, "eval_gen_len": 13.8244, "eval_loss": 0.36525318026542664, "eval_runtime": 258.7606, "eval_samples_per_second": 12.39, "eval_steps_per_second": 3.099, "step": 110584 }, { "epoch": 46.17, "learning_rate": 1.5384525790349418e-05, "loss": 0.0276, "step": 111000 }, { "epoch": 46.38, "learning_rate": 1.5363727121464227e-05, "loss": 0.0311, "step": 111500 }, { "epoch": 46.59, "learning_rate": 1.5342928452579036e-05, "loss": 0.0343, "step": 112000 }, { "epoch": 46.8, "learning_rate": 1.5322129783693846e-05, "loss": 0.0418, "step": 112500 }, { "epoch": 47.0, "eval_cer": 0.2573, "eval_gen_len": 13.7427, "eval_loss": 0.3767205476760864, "eval_runtime": 259.1923, "eval_samples_per_second": 12.369, "eval_steps_per_second": 3.094, "step": 112988 }, { "epoch": 47.0, "learning_rate": 1.5301372712146422e-05, "loss": 0.0387, "step": 113000 }, { "epoch": 47.21, "learning_rate": 1.528057404326123e-05, "loss": 0.0288, "step": 113500 }, { "epoch": 47.42, "learning_rate": 1.525977537437604e-05, "loss": 0.0316, "step": 114000 }, { "epoch": 47.63, "learning_rate": 1.5238976705490849e-05, "loss": 0.0302, "step": 114500 }, { "epoch": 47.84, "learning_rate": 1.521821963394343e-05, "loss": 0.036, "step": 115000 }, { "epoch": 48.0, "eval_cer": 0.2645, "eval_gen_len": 13.9616, "eval_loss": 0.3853040933609009, "eval_runtime": 258.5981, "eval_samples_per_second": 12.398, "eval_steps_per_second": 3.101, "step": 115392 }, { "epoch": 48.04, "learning_rate": 1.5197420965058238e-05, "loss": 0.0414, "step": 115500 }, { "epoch": 48.25, "learning_rate": 1.5176622296173047e-05, "loss": 0.0302, "step": 116000 }, { "epoch": 48.46, "learning_rate": 1.5155823627287855e-05, "loss": 0.0336, "step": 116500 }, { "epoch": 48.67, "learning_rate": 1.5135066555740435e-05, "loss": 0.0346, "step": 117000 }, { "epoch": 48.88, "learning_rate": 1.5114267886855242e-05, "loss": 0.0354, "step": 117500 }, { "epoch": 49.0, "eval_cer": 0.2571, "eval_gen_len": 13.3799, "eval_loss": 0.3713897466659546, "eval_runtime": 256.4932, "eval_samples_per_second": 12.499, "eval_steps_per_second": 3.127, "step": 117796 }, { "epoch": 49.08, "learning_rate": 1.5093469217970052e-05, "loss": 0.0285, "step": 118000 }, { "epoch": 49.29, "learning_rate": 1.507267054908486e-05, "loss": 0.0311, "step": 118500 }, { "epoch": 49.5, "learning_rate": 1.5051871880199669e-05, "loss": 0.0297, "step": 119000 }, { "epoch": 49.71, "learning_rate": 1.5031114808652247e-05, "loss": 0.0327, "step": 119500 }, { "epoch": 49.92, "learning_rate": 1.5010316139767056e-05, "loss": 0.0336, "step": 120000 }, { "epoch": 50.0, "eval_cer": 0.2592, "eval_gen_len": 13.7667, "eval_loss": 0.38062140345573425, "eval_runtime": 257.9154, "eval_samples_per_second": 12.43, "eval_steps_per_second": 3.11, "step": 120200 }, { "epoch": 50.12, "learning_rate": 1.4989517470881864e-05, "loss": 0.0291, "step": 120500 }, { "epoch": 50.33, "learning_rate": 1.4968718801996673e-05, "loss": 0.0286, "step": 121000 }, { "epoch": 50.54, "learning_rate": 1.4947920133111483e-05, "loss": 0.0282, "step": 121500 }, { "epoch": 50.75, "learning_rate": 1.4927163061564061e-05, "loss": 0.0299, "step": 122000 }, { "epoch": 50.96, "learning_rate": 1.4906364392678869e-05, "loss": 0.0367, "step": 122500 }, { "epoch": 51.0, "eval_cer": 0.2567, "eval_gen_len": 13.7402, "eval_loss": 0.36996766924858093, "eval_runtime": 265.273, "eval_samples_per_second": 12.086, "eval_steps_per_second": 3.023, "step": 122604 }, { "epoch": 51.16, "learning_rate": 1.4885607321131449e-05, "loss": 0.0269, "step": 123000 }, { "epoch": 51.37, "learning_rate": 1.4864808652246256e-05, "loss": 0.0288, "step": 123500 }, { "epoch": 51.58, "learning_rate": 1.4844009983361066e-05, "loss": 0.0306, "step": 124000 }, { "epoch": 51.79, "learning_rate": 1.4823211314475873e-05, "loss": 0.0352, "step": 124500 }, { "epoch": 52.0, "learning_rate": 1.4802412645590683e-05, "loss": 0.0278, "step": 125000 }, { "epoch": 52.0, "eval_cer": 0.2552, "eval_gen_len": 13.6507, "eval_loss": 0.37308937311172485, "eval_runtime": 264.0574, "eval_samples_per_second": 12.141, "eval_steps_per_second": 3.037, "step": 125008 }, { "epoch": 52.2, "learning_rate": 1.478161397670549e-05, "loss": 0.0245, "step": 125500 }, { "epoch": 52.41, "learning_rate": 1.47608153078203e-05, "loss": 0.0256, "step": 126000 }, { "epoch": 52.62, "learning_rate": 1.4740016638935109e-05, "loss": 0.0303, "step": 126500 }, { "epoch": 52.83, "learning_rate": 1.4719259567387689e-05, "loss": 0.0318, "step": 127000 }, { "epoch": 53.0, "eval_cer": 0.2558, "eval_gen_len": 13.4878, "eval_loss": 0.3711611330509186, "eval_runtime": 256.1423, "eval_samples_per_second": 12.516, "eval_steps_per_second": 3.131, "step": 127412 }, { "epoch": 53.04, "learning_rate": 1.4698460898502498e-05, "loss": 0.0308, "step": 127500 }, { "epoch": 53.24, "learning_rate": 1.4677662229617306e-05, "loss": 0.0258, "step": 128000 }, { "epoch": 53.45, "learning_rate": 1.4656863560732115e-05, "loss": 0.0268, "step": 128500 }, { "epoch": 53.66, "learning_rate": 1.4636064891846925e-05, "loss": 0.0306, "step": 129000 }, { "epoch": 53.87, "learning_rate": 1.4615307820299503e-05, "loss": 0.0307, "step": 129500 }, { "epoch": 54.0, "eval_cer": 0.2545, "eval_gen_len": 13.6042, "eval_loss": 0.3638122081756592, "eval_runtime": 259.1147, "eval_samples_per_second": 12.373, "eval_steps_per_second": 3.095, "step": 129816 }, { "epoch": 54.08, "learning_rate": 1.459450915141431e-05, "loss": 0.0246, "step": 130000 }, { "epoch": 54.28, "learning_rate": 1.457371048252912e-05, "loss": 0.0206, "step": 130500 }, { "epoch": 54.49, "learning_rate": 1.455291181364393e-05, "loss": 0.0298, "step": 131000 }, { "epoch": 54.7, "learning_rate": 1.4532196339434276e-05, "loss": 0.0284, "step": 131500 }, { "epoch": 54.91, "learning_rate": 1.4511397670549086e-05, "loss": 0.0277, "step": 132000 }, { "epoch": 55.0, "eval_cer": 0.2574, "eval_gen_len": 13.8247, "eval_loss": 0.3866593539714813, "eval_runtime": 266.2003, "eval_samples_per_second": 12.044, "eval_steps_per_second": 3.013, "step": 132220 }, { "epoch": 55.12, "learning_rate": 1.4490599001663895e-05, "loss": 0.0258, "step": 132500 }, { "epoch": 55.32, "learning_rate": 1.4469800332778703e-05, "loss": 0.0264, "step": 133000 }, { "epoch": 55.53, "learning_rate": 1.4449001663893512e-05, "loss": 0.0273, "step": 133500 }, { "epoch": 55.74, "learning_rate": 1.442820299500832e-05, "loss": 0.033, "step": 134000 }, { "epoch": 55.95, "learning_rate": 1.4407404326123129e-05, "loss": 0.0289, "step": 134500 }, { "epoch": 56.0, "eval_cer": 0.26, "eval_gen_len": 13.9024, "eval_loss": 0.38215455412864685, "eval_runtime": 268.768, "eval_samples_per_second": 11.929, "eval_steps_per_second": 2.984, "step": 134624 }, { "epoch": 56.16, "learning_rate": 1.4386605657237937e-05, "loss": 0.0263, "step": 135000 }, { "epoch": 56.36, "learning_rate": 1.4365848585690517e-05, "loss": 0.0227, "step": 135500 }, { "epoch": 56.57, "learning_rate": 1.4345049916805324e-05, "loss": 0.0276, "step": 136000 }, { "epoch": 56.78, "learning_rate": 1.4324251247920134e-05, "loss": 0.0277, "step": 136500 }, { "epoch": 56.99, "learning_rate": 1.4303494176372714e-05, "loss": 0.0259, "step": 137000 }, { "epoch": 57.0, "eval_cer": 0.2541, "eval_gen_len": 13.6722, "eval_loss": 0.3895968198776245, "eval_runtime": 256.4716, "eval_samples_per_second": 12.5, "eval_steps_per_second": 3.127, "step": 137028 }, { "epoch": 57.2, "learning_rate": 1.4282695507487523e-05, "loss": 0.0202, "step": 137500 }, { "epoch": 57.4, "learning_rate": 1.4261896838602332e-05, "loss": 0.0236, "step": 138000 }, { "epoch": 57.61, "learning_rate": 1.424109816971714e-05, "loss": 0.0288, "step": 138500 }, { "epoch": 57.82, "learning_rate": 1.422029950083195e-05, "loss": 0.0277, "step": 139000 }, { "epoch": 58.0, "eval_cer": 0.2584, "eval_gen_len": 13.7208, "eval_loss": 0.38816508650779724, "eval_runtime": 264.0106, "eval_samples_per_second": 12.143, "eval_steps_per_second": 3.038, "step": 139432 }, { "epoch": 58.03, "learning_rate": 1.4199500831946757e-05, "loss": 0.0297, "step": 139500 }, { "epoch": 58.24, "learning_rate": 1.4178702163061566e-05, "loss": 0.0206, "step": 140000 }, { "epoch": 58.44, "learning_rate": 1.4157945091514145e-05, "loss": 0.0245, "step": 140500 }, { "epoch": 58.65, "learning_rate": 1.4137146422628954e-05, "loss": 0.0264, "step": 141000 }, { "epoch": 58.86, "learning_rate": 1.4116347753743762e-05, "loss": 0.0289, "step": 141500 }, { "epoch": 59.0, "eval_cer": 0.2587, "eval_gen_len": 13.7096, "eval_loss": 0.39096423983573914, "eval_runtime": 260.1849, "eval_samples_per_second": 12.322, "eval_steps_per_second": 3.082, "step": 141836 }, { "epoch": 59.07, "learning_rate": 1.4095549084858571e-05, "loss": 0.0243, "step": 142000 }, { "epoch": 59.28, "learning_rate": 1.4074750415973379e-05, "loss": 0.0218, "step": 142500 }, { "epoch": 59.48, "learning_rate": 1.4053951747088188e-05, "loss": 0.0241, "step": 143000 }, { "epoch": 59.69, "learning_rate": 1.4033194675540766e-05, "loss": 0.0242, "step": 143500 }, { "epoch": 59.9, "learning_rate": 1.4012396006655576e-05, "loss": 0.0252, "step": 144000 }, { "epoch": 60.0, "eval_cer": 0.2515, "eval_gen_len": 13.6971, "eval_loss": 0.38846734166145325, "eval_runtime": 258.3853, "eval_samples_per_second": 12.408, "eval_steps_per_second": 3.104, "step": 144240 }, { "epoch": 60.11, "learning_rate": 1.3991597337770383e-05, "loss": 0.0259, "step": 144500 }, { "epoch": 60.32, "learning_rate": 1.3970798668885193e-05, "loss": 0.0208, "step": 145000 }, { "epoch": 60.52, "learning_rate": 1.3950000000000002e-05, "loss": 0.0252, "step": 145500 }, { "epoch": 60.73, "learning_rate": 1.392920133111481e-05, "loss": 0.0253, "step": 146000 }, { "epoch": 60.94, "learning_rate": 1.3908402662229619e-05, "loss": 0.0265, "step": 146500 }, { "epoch": 61.0, "eval_cer": 0.2569, "eval_gen_len": 13.85, "eval_loss": 0.3915986716747284, "eval_runtime": 271.1284, "eval_samples_per_second": 11.825, "eval_steps_per_second": 2.958, "step": 146644 }, { "epoch": 61.15, "learning_rate": 1.3887603993344427e-05, "loss": 0.0232, "step": 147000 }, { "epoch": 61.36, "learning_rate": 1.3866846921797007e-05, "loss": 0.0233, "step": 147500 }, { "epoch": 61.56, "learning_rate": 1.3846048252911814e-05, "loss": 0.0224, "step": 148000 }, { "epoch": 61.77, "learning_rate": 1.3825249584026624e-05, "loss": 0.0229, "step": 148500 }, { "epoch": 61.98, "learning_rate": 1.3804450915141431e-05, "loss": 0.0229, "step": 149000 }, { "epoch": 62.0, "eval_cer": 0.2565, "eval_gen_len": 13.8206, "eval_loss": 0.3992536962032318, "eval_runtime": 268.8678, "eval_samples_per_second": 11.924, "eval_steps_per_second": 2.983, "step": 149048 }, { "epoch": 62.19, "learning_rate": 1.378365224625624e-05, "loss": 0.0199, "step": 149500 }, { "epoch": 62.4, "learning_rate": 1.3762895174708819e-05, "loss": 0.0239, "step": 150000 }, { "epoch": 62.6, "learning_rate": 1.3742096505823628e-05, "loss": 0.0261, "step": 150500 }, { "epoch": 62.81, "learning_rate": 1.3721297836938436e-05, "loss": 0.0225, "step": 151000 }, { "epoch": 63.0, "eval_cer": 0.2507, "eval_gen_len": 13.6354, "eval_loss": 0.3880002200603485, "eval_runtime": 264.7668, "eval_samples_per_second": 12.109, "eval_steps_per_second": 3.029, "step": 151452 }, { "epoch": 63.02, "learning_rate": 1.3700499168053245e-05, "loss": 0.0268, "step": 151500 }, { "epoch": 63.23, "learning_rate": 1.3679742096505825e-05, "loss": 0.0188, "step": 152000 }, { "epoch": 63.44, "learning_rate": 1.3658985024958405e-05, "loss": 0.022, "step": 152500 }, { "epoch": 63.64, "learning_rate": 1.3638186356073213e-05, "loss": 0.0232, "step": 153000 }, { "epoch": 63.85, "learning_rate": 1.3617387687188022e-05, "loss": 0.0221, "step": 153500 }, { "epoch": 64.0, "eval_cer": 0.2583, "eval_gen_len": 13.9485, "eval_loss": 0.3893982470035553, "eval_runtime": 270.1502, "eval_samples_per_second": 11.867, "eval_steps_per_second": 2.969, "step": 153856 }, { "epoch": 64.06, "learning_rate": 1.359658901830283e-05, "loss": 0.0268, "step": 154000 }, { "epoch": 64.27, "learning_rate": 1.3575790349417639e-05, "loss": 0.0159, "step": 154500 }, { "epoch": 64.48, "learning_rate": 1.3554991680532448e-05, "loss": 0.0191, "step": 155000 }, { "epoch": 64.68, "learning_rate": 1.3534193011647256e-05, "loss": 0.02, "step": 155500 }, { "epoch": 64.89, "learning_rate": 1.3513394342762065e-05, "loss": 0.0234, "step": 156000 }, { "epoch": 65.0, "eval_cer": 0.2515, "eval_gen_len": 13.6329, "eval_loss": 0.39693862199783325, "eval_runtime": 262.2666, "eval_samples_per_second": 12.224, "eval_steps_per_second": 3.058, "step": 156260 }, { "epoch": 65.1, "learning_rate": 1.3492595673876873e-05, "loss": 0.0225, "step": 156500 }, { "epoch": 65.31, "learning_rate": 1.3471838602329453e-05, "loss": 0.0219, "step": 157000 }, { "epoch": 65.52, "learning_rate": 1.3451081530782031e-05, "loss": 0.0224, "step": 157500 }, { "epoch": 65.72, "learning_rate": 1.3430282861896839e-05, "loss": 0.0222, "step": 158000 }, { "epoch": 65.93, "learning_rate": 1.3409484193011648e-05, "loss": 0.0251, "step": 158500 }, { "epoch": 66.0, "eval_cer": 0.2524, "eval_gen_len": 13.9046, "eval_loss": 0.3977407217025757, "eval_runtime": 268.0146, "eval_samples_per_second": 11.962, "eval_steps_per_second": 2.992, "step": 158664 }, { "epoch": 66.14, "learning_rate": 1.3388685524126458e-05, "loss": 0.0181, "step": 159000 }, { "epoch": 66.35, "learning_rate": 1.3367886855241265e-05, "loss": 0.0194, "step": 159500 }, { "epoch": 66.56, "learning_rate": 1.3347129783693843e-05, "loss": 0.0209, "step": 160000 }, { "epoch": 66.76, "learning_rate": 1.3326331114808653e-05, "loss": 0.0235, "step": 160500 }, { "epoch": 66.97, "learning_rate": 1.3305532445923462e-05, "loss": 0.0253, "step": 161000 }, { "epoch": 67.0, "eval_cer": 0.2511, "eval_gen_len": 13.6563, "eval_loss": 0.40182340145111084, "eval_runtime": 252.4386, "eval_samples_per_second": 12.7, "eval_steps_per_second": 3.177, "step": 161068 }, { "epoch": 67.18, "learning_rate": 1.328473377703827e-05, "loss": 0.0218, "step": 161500 }, { "epoch": 67.39, "learning_rate": 1.326393510815308e-05, "loss": 0.0175, "step": 162000 }, { "epoch": 67.6, "learning_rate": 1.3243136439267887e-05, "loss": 0.0204, "step": 162500 }, { "epoch": 67.8, "learning_rate": 1.3222337770382696e-05, "loss": 0.0197, "step": 163000 }, { "epoch": 68.0, "eval_cer": 0.2559, "eval_gen_len": 14.0237, "eval_loss": 0.3884351849555969, "eval_runtime": 272.5844, "eval_samples_per_second": 11.761, "eval_steps_per_second": 2.942, "step": 163472 }, { "epoch": 68.01, "learning_rate": 1.3201539101497504e-05, "loss": 0.0241, "step": 163500 }, { "epoch": 68.22, "learning_rate": 1.3180782029950084e-05, "loss": 0.0179, "step": 164000 }, { "epoch": 68.43, "learning_rate": 1.3159983361064892e-05, "loss": 0.0175, "step": 164500 }, { "epoch": 68.64, "learning_rate": 1.3139184692179701e-05, "loss": 0.0227, "step": 165000 }, { "epoch": 68.84, "learning_rate": 1.3118386023294509e-05, "loss": 0.0219, "step": 165500 }, { "epoch": 69.0, "eval_cer": 0.2519, "eval_gen_len": 13.5009, "eval_loss": 0.402326375246048, "eval_runtime": 254.8714, "eval_samples_per_second": 12.579, "eval_steps_per_second": 3.147, "step": 165876 }, { "epoch": 69.05, "learning_rate": 1.309762895174709e-05, "loss": 0.0229, "step": 166000 }, { "epoch": 69.26, "learning_rate": 1.30768302828619e-05, "loss": 0.0183, "step": 166500 }, { "epoch": 69.47, "learning_rate": 1.3056073211314478e-05, "loss": 0.0195, "step": 167000 }, { "epoch": 69.68, "learning_rate": 1.3035274542429285e-05, "loss": 0.0204, "step": 167500 }, { "epoch": 69.88, "learning_rate": 1.3014475873544095e-05, "loss": 0.0207, "step": 168000 }, { "epoch": 70.0, "eval_cer": 0.2559, "eval_gen_len": 13.4392, "eval_loss": 0.390462189912796, "eval_runtime": 247.3478, "eval_samples_per_second": 12.962, "eval_steps_per_second": 3.242, "step": 168280 }, { "epoch": 70.09, "learning_rate": 1.2993677204658904e-05, "loss": 0.0216, "step": 168500 }, { "epoch": 70.3, "learning_rate": 1.2972878535773712e-05, "loss": 0.0192, "step": 169000 }, { "epoch": 70.51, "learning_rate": 1.2952079866888521e-05, "loss": 0.0192, "step": 169500 }, { "epoch": 70.72, "learning_rate": 1.2931281198003329e-05, "loss": 0.0187, "step": 170000 }, { "epoch": 70.92, "learning_rate": 1.2910524126455907e-05, "loss": 0.0233, "step": 170500 }, { "epoch": 71.0, "eval_cer": 0.2574, "eval_gen_len": 13.7012, "eval_loss": 0.4090117812156677, "eval_runtime": 257.6398, "eval_samples_per_second": 12.444, "eval_steps_per_second": 3.113, "step": 170684 }, { "epoch": 71.13, "learning_rate": 1.2889725457570716e-05, "loss": 0.019, "step": 171000 }, { "epoch": 71.34, "learning_rate": 1.2868926788685526e-05, "loss": 0.0159, "step": 171500 }, { "epoch": 71.55, "learning_rate": 1.2848128119800333e-05, "loss": 0.0213, "step": 172000 }, { "epoch": 71.76, "learning_rate": 1.2827329450915143e-05, "loss": 0.0177, "step": 172500 }, { "epoch": 71.96, "learning_rate": 1.280653078202995e-05, "loss": 0.024, "step": 173000 }, { "epoch": 72.0, "eval_cer": 0.2472, "eval_gen_len": 13.6382, "eval_loss": 0.38612431287765503, "eval_runtime": 258.2841, "eval_samples_per_second": 12.413, "eval_steps_per_second": 3.105, "step": 173088 }, { "epoch": 72.17, "learning_rate": 1.278577371048253e-05, "loss": 0.0176, "step": 173500 }, { "epoch": 72.38, "learning_rate": 1.2764975041597338e-05, "loss": 0.017, "step": 174000 }, { "epoch": 72.59, "learning_rate": 1.2744176372712147e-05, "loss": 0.0167, "step": 174500 }, { "epoch": 72.8, "learning_rate": 1.2723377703826955e-05, "loss": 0.0219, "step": 175000 }, { "epoch": 73.0, "eval_cer": 0.2647, "eval_gen_len": 14.1634, "eval_loss": 0.4713122546672821, "eval_runtime": 271.6072, "eval_samples_per_second": 11.804, "eval_steps_per_second": 2.953, "step": 175492 }, { "epoch": 73.0, "learning_rate": 1.2702579034941764e-05, "loss": 0.0198, "step": 175500 }, { "epoch": 73.21, "learning_rate": 1.2681780366056574e-05, "loss": 0.0147, "step": 176000 }, { "epoch": 73.42, "learning_rate": 1.2660981697171381e-05, "loss": 0.0181, "step": 176500 }, { "epoch": 73.63, "learning_rate": 1.264018302828619e-05, "loss": 0.02, "step": 177000 }, { "epoch": 73.84, "learning_rate": 1.2619384359400998e-05, "loss": 0.0189, "step": 177500 }, { "epoch": 74.0, "eval_cer": 0.2536, "eval_gen_len": 13.9267, "eval_loss": 0.39804303646087646, "eval_runtime": 266.4263, "eval_samples_per_second": 12.033, "eval_steps_per_second": 3.01, "step": 177896 }, { "epoch": 74.04, "learning_rate": 1.2598627287853578e-05, "loss": 0.0215, "step": 178000 }, { "epoch": 74.25, "learning_rate": 1.2577828618968386e-05, "loss": 0.0159, "step": 178500 }, { "epoch": 74.46, "learning_rate": 1.2557029950083195e-05, "loss": 0.0188, "step": 179000 }, { "epoch": 74.67, "learning_rate": 1.2536231281198003e-05, "loss": 0.0169, "step": 179500 }, { "epoch": 74.88, "learning_rate": 1.2515474209650585e-05, "loss": 0.0162, "step": 180000 }, { "epoch": 75.0, "eval_cer": 0.2529, "eval_gen_len": 13.6974, "eval_loss": 0.39674171805381775, "eval_runtime": 262.8858, "eval_samples_per_second": 12.195, "eval_steps_per_second": 3.051, "step": 180300 }, { "epoch": 75.08, "learning_rate": 1.2494675540765392e-05, "loss": 0.0193, "step": 180500 }, { "epoch": 75.29, "learning_rate": 1.2473876871880202e-05, "loss": 0.0162, "step": 181000 }, { "epoch": 75.5, "learning_rate": 1.2453078202995011e-05, "loss": 0.0174, "step": 181500 }, { "epoch": 75.71, "learning_rate": 1.243232113144759e-05, "loss": 0.017, "step": 182000 }, { "epoch": 75.92, "learning_rate": 1.2411522462562397e-05, "loss": 0.0183, "step": 182500 }, { "epoch": 76.0, "eval_cer": 0.2512, "eval_gen_len": 13.708, "eval_loss": 0.42503321170806885, "eval_runtime": 262.6259, "eval_samples_per_second": 12.207, "eval_steps_per_second": 3.054, "step": 182704 }, { "epoch": 76.12, "learning_rate": 1.2390765391014977e-05, "loss": 0.0172, "step": 183000 }, { "epoch": 76.33, "learning_rate": 1.2369966722129784e-05, "loss": 0.0164, "step": 183500 }, { "epoch": 76.54, "learning_rate": 1.2349168053244594e-05, "loss": 0.0179, "step": 184000 }, { "epoch": 76.75, "learning_rate": 1.2328369384359401e-05, "loss": 0.0181, "step": 184500 }, { "epoch": 76.96, "learning_rate": 1.230757071547421e-05, "loss": 0.0202, "step": 185000 }, { "epoch": 77.0, "eval_cer": 0.2548, "eval_gen_len": 13.8085, "eval_loss": 0.4239508807659149, "eval_runtime": 252.2131, "eval_samples_per_second": 12.711, "eval_steps_per_second": 3.18, "step": 185108 }, { "epoch": 77.16, "learning_rate": 1.228677204658902e-05, "loss": 0.015, "step": 185500 }, { "epoch": 77.37, "learning_rate": 1.2265973377703828e-05, "loss": 0.0148, "step": 186000 }, { "epoch": 77.58, "learning_rate": 1.2245174708818637e-05, "loss": 0.0179, "step": 186500 }, { "epoch": 77.79, "learning_rate": 1.2224417637271215e-05, "loss": 0.019, "step": 187000 }, { "epoch": 78.0, "learning_rate": 1.2203660565723794e-05, "loss": 0.0186, "step": 187500 }, { "epoch": 78.0, "eval_cer": 0.2522, "eval_gen_len": 13.806, "eval_loss": 0.4266161620616913, "eval_runtime": 261.9494, "eval_samples_per_second": 12.239, "eval_steps_per_second": 3.062, "step": 187512 }, { "epoch": 78.2, "learning_rate": 1.2182861896838603e-05, "loss": 0.0155, "step": 188000 }, { "epoch": 78.41, "learning_rate": 1.216206322795341e-05, "loss": 0.0167, "step": 188500 }, { "epoch": 78.62, "learning_rate": 1.214126455906822e-05, "loss": 0.0176, "step": 189000 }, { "epoch": 78.83, "learning_rate": 1.212046589018303e-05, "loss": 0.016, "step": 189500 }, { "epoch": 79.0, "eval_cer": 0.2499, "eval_gen_len": 13.713, "eval_loss": 0.4089159667491913, "eval_runtime": 255.1636, "eval_samples_per_second": 12.564, "eval_steps_per_second": 3.143, "step": 189916 }, { "epoch": 79.03, "learning_rate": 1.2099667221297837e-05, "loss": 0.0165, "step": 190000 }, { "epoch": 79.24, "learning_rate": 1.2078868552412646e-05, "loss": 0.0136, "step": 190500 }, { "epoch": 79.45, "learning_rate": 1.2058111480865226e-05, "loss": 0.0154, "step": 191000 }, { "epoch": 79.66, "learning_rate": 1.2037312811980036e-05, "loss": 0.0168, "step": 191500 }, { "epoch": 79.87, "learning_rate": 1.2016514143094843e-05, "loss": 0.0188, "step": 192000 }, { "epoch": 80.0, "eval_cer": 0.2501, "eval_gen_len": 13.7745, "eval_loss": 0.41349881887435913, "eval_runtime": 263.2542, "eval_samples_per_second": 12.178, "eval_steps_per_second": 3.046, "step": 192320 }, { "epoch": 80.07, "learning_rate": 1.1995715474209653e-05, "loss": 0.0169, "step": 192500 }, { "epoch": 80.28, "learning_rate": 1.1974916805324459e-05, "loss": 0.0142, "step": 193000 }, { "epoch": 80.49, "learning_rate": 1.1954118136439268e-05, "loss": 0.0186, "step": 193500 }, { "epoch": 80.7, "learning_rate": 1.1933319467554076e-05, "loss": 0.0173, "step": 194000 }, { "epoch": 80.91, "learning_rate": 1.1912520798668885e-05, "loss": 0.016, "step": 194500 }, { "epoch": 81.0, "eval_cer": 0.2477, "eval_gen_len": 13.6622, "eval_loss": 0.3864258825778961, "eval_runtime": 259.6006, "eval_samples_per_second": 12.35, "eval_steps_per_second": 3.089, "step": 194724 }, { "epoch": 81.11, "learning_rate": 1.1891763727121467e-05, "loss": 0.0166, "step": 195000 }, { "epoch": 81.32, "learning_rate": 1.1870965058236274e-05, "loss": 0.0141, "step": 195500 }, { "epoch": 81.53, "learning_rate": 1.1850207986688853e-05, "loss": 0.0151, "step": 196000 }, { "epoch": 81.74, "learning_rate": 1.1829409317803662e-05, "loss": 0.0176, "step": 196500 }, { "epoch": 81.95, "learning_rate": 1.180861064891847e-05, "loss": 0.0156, "step": 197000 }, { "epoch": 82.0, "eval_cer": 0.2551, "eval_gen_len": 13.9261, "eval_loss": 0.4278740882873535, "eval_runtime": 261.2997, "eval_samples_per_second": 12.269, "eval_steps_per_second": 3.069, "step": 197128 }, { "epoch": 82.15, "learning_rate": 1.1787811980033279e-05, "loss": 0.0173, "step": 197500 }, { "epoch": 82.36, "learning_rate": 1.1767013311148088e-05, "loss": 0.0153, "step": 198000 }, { "epoch": 82.57, "learning_rate": 1.1746214642262896e-05, "loss": 0.016, "step": 198500 }, { "epoch": 82.78, "learning_rate": 1.1725415973377705e-05, "loss": 0.0136, "step": 199000 }, { "epoch": 82.99, "learning_rate": 1.1704617304492513e-05, "loss": 0.018, "step": 199500 }, { "epoch": 83.0, "eval_cer": 0.25, "eval_gen_len": 13.704, "eval_loss": 0.4216358959674835, "eval_runtime": 257.2522, "eval_samples_per_second": 12.462, "eval_steps_per_second": 3.118, "step": 199532 }, { "epoch": 83.19, "learning_rate": 1.1683818635607322e-05, "loss": 0.0125, "step": 200000 }, { "epoch": 83.4, "learning_rate": 1.16630615640599e-05, "loss": 0.0158, "step": 200500 }, { "epoch": 83.61, "learning_rate": 1.1642304492512479e-05, "loss": 0.0153, "step": 201000 }, { "epoch": 83.82, "learning_rate": 1.1621505823627288e-05, "loss": 0.0159, "step": 201500 }, { "epoch": 84.0, "eval_cer": 0.2502, "eval_gen_len": 13.7121, "eval_loss": 0.42220476269721985, "eval_runtime": 260.9684, "eval_samples_per_second": 12.285, "eval_steps_per_second": 3.073, "step": 201936 }, { "epoch": 84.03, "learning_rate": 1.1600707154742097e-05, "loss": 0.0159, "step": 202000 }, { "epoch": 84.23, "learning_rate": 1.1579908485856905e-05, "loss": 0.0157, "step": 202500 }, { "epoch": 84.44, "learning_rate": 1.1559109816971715e-05, "loss": 0.0149, "step": 203000 }, { "epoch": 84.65, "learning_rate": 1.1538311148086522e-05, "loss": 0.0148, "step": 203500 }, { "epoch": 84.86, "learning_rate": 1.1517512479201332e-05, "loss": 0.0165, "step": 204000 }, { "epoch": 85.0, "eval_cer": 0.2482, "eval_gen_len": 13.7233, "eval_loss": 0.4069821238517761, "eval_runtime": 256.2579, "eval_samples_per_second": 12.511, "eval_steps_per_second": 3.13, "step": 204340 }, { "epoch": 85.07, "learning_rate": 1.149675540765391e-05, "loss": 0.0162, "step": 204500 }, { "epoch": 85.27, "learning_rate": 1.1475956738768719e-05, "loss": 0.0128, "step": 205000 }, { "epoch": 85.48, "learning_rate": 1.1455158069883527e-05, "loss": 0.0149, "step": 205500 }, { "epoch": 85.69, "learning_rate": 1.1434359400998336e-05, "loss": 0.0182, "step": 206000 }, { "epoch": 85.9, "learning_rate": 1.1413560732113146e-05, "loss": 0.0149, "step": 206500 }, { "epoch": 86.0, "eval_cer": 0.2493, "eval_gen_len": 13.7623, "eval_loss": 0.4060095250606537, "eval_runtime": 264.6587, "eval_samples_per_second": 12.114, "eval_steps_per_second": 3.03, "step": 206744 }, { "epoch": 86.11, "learning_rate": 1.1392762063227953e-05, "loss": 0.0131, "step": 207000 }, { "epoch": 86.31, "learning_rate": 1.1371963394342763e-05, "loss": 0.0145, "step": 207500 }, { "epoch": 86.52, "learning_rate": 1.135116472545757e-05, "loss": 0.0133, "step": 208000 }, { "epoch": 86.73, "learning_rate": 1.1330407653910152e-05, "loss": 0.0141, "step": 208500 }, { "epoch": 86.94, "learning_rate": 1.130960898502496e-05, "loss": 0.014, "step": 209000 }, { "epoch": 87.0, "eval_cer": 0.2461, "eval_gen_len": 13.6067, "eval_loss": 0.42620450258255005, "eval_runtime": 258.5687, "eval_samples_per_second": 12.399, "eval_steps_per_second": 3.102, "step": 209148 }, { "epoch": 87.15, "learning_rate": 1.1288810316139769e-05, "loss": 0.0131, "step": 209500 }, { "epoch": 87.35, "learning_rate": 1.1268011647254578e-05, "loss": 0.0121, "step": 210000 }, { "epoch": 87.56, "learning_rate": 1.1247212978369386e-05, "loss": 0.015, "step": 210500 }, { "epoch": 87.77, "learning_rate": 1.1226414309484195e-05, "loss": 0.0137, "step": 211000 }, { "epoch": 87.98, "learning_rate": 1.1205615640599003e-05, "loss": 0.0161, "step": 211500 }, { "epoch": 88.0, "eval_cer": 0.249, "eval_gen_len": 13.758, "eval_loss": 0.4252397418022156, "eval_runtime": 265.7747, "eval_samples_per_second": 12.063, "eval_steps_per_second": 3.018, "step": 211552 }, { "epoch": 88.19, "learning_rate": 1.1184816971713812e-05, "loss": 0.0126, "step": 212000 }, { "epoch": 88.39, "learning_rate": 1.116405990016639e-05, "loss": 0.0136, "step": 212500 }, { "epoch": 88.6, "learning_rate": 1.1143302828618969e-05, "loss": 0.0141, "step": 213000 }, { "epoch": 88.81, "learning_rate": 1.1122504159733778e-05, "loss": 0.0142, "step": 213500 }, { "epoch": 89.0, "eval_cer": 0.2511, "eval_gen_len": 13.8013, "eval_loss": 0.43667590618133545, "eval_runtime": 258.922, "eval_samples_per_second": 12.382, "eval_steps_per_second": 3.097, "step": 213956 }, { "epoch": 89.02, "learning_rate": 1.1101705490848587e-05, "loss": 0.0155, "step": 214000 }, { "epoch": 89.23, "learning_rate": 1.1080906821963395e-05, "loss": 0.0149, "step": 214500 }, { "epoch": 89.43, "learning_rate": 1.1060108153078204e-05, "loss": 0.0138, "step": 215000 }, { "epoch": 89.64, "learning_rate": 1.1039309484193012e-05, "loss": 0.0131, "step": 215500 }, { "epoch": 89.85, "learning_rate": 1.1018510815307822e-05, "loss": 0.0146, "step": 216000 }, { "epoch": 90.0, "eval_cer": 0.2483, "eval_gen_len": 13.6778, "eval_loss": 0.41625672578811646, "eval_runtime": 260.915, "eval_samples_per_second": 12.288, "eval_steps_per_second": 3.074, "step": 216360 }, { "epoch": 90.06, "learning_rate": 1.099771214642263e-05, "loss": 0.0131, "step": 216500 }, { "epoch": 90.27, "learning_rate": 1.0976955074875209e-05, "loss": 0.0128, "step": 217000 }, { "epoch": 90.47, "learning_rate": 1.0956156405990017e-05, "loss": 0.0139, "step": 217500 }, { "epoch": 90.68, "learning_rate": 1.0935357737104826e-05, "loss": 0.0146, "step": 218000 }, { "epoch": 90.89, "learning_rate": 1.0914559068219634e-05, "loss": 0.0127, "step": 218500 }, { "epoch": 91.0, "eval_cer": 0.2466, "eval_gen_len": 13.6344, "eval_loss": 0.42400336265563965, "eval_runtime": 259.4085, "eval_samples_per_second": 12.359, "eval_steps_per_second": 3.092, "step": 218764 }, { "epoch": 91.1, "learning_rate": 1.0893843594009986e-05, "loss": 0.0131, "step": 219000 }, { "epoch": 91.31, "learning_rate": 1.0873044925124794e-05, "loss": 0.0139, "step": 219500 }, { "epoch": 91.51, "learning_rate": 1.0852246256239603e-05, "loss": 0.0144, "step": 220000 }, { "epoch": 91.72, "learning_rate": 1.083144758735441e-05, "loss": 0.016, "step": 220500 }, { "epoch": 91.93, "learning_rate": 1.081064891846922e-05, "loss": 0.0147, "step": 221000 }, { "epoch": 92.0, "eval_cer": 0.2457, "eval_gen_len": 13.5948, "eval_loss": 0.4094228148460388, "eval_runtime": 257.4623, "eval_samples_per_second": 12.452, "eval_steps_per_second": 3.115, "step": 221168 }, { "epoch": 92.14, "learning_rate": 1.078985024958403e-05, "loss": 0.0136, "step": 221500 }, { "epoch": 92.35, "learning_rate": 1.0769051580698837e-05, "loss": 0.0123, "step": 222000 }, { "epoch": 92.55, "learning_rate": 1.0748252911813646e-05, "loss": 0.0113, "step": 222500 }, { "epoch": 92.76, "learning_rate": 1.0727454242928454e-05, "loss": 0.0153, "step": 223000 }, { "epoch": 92.97, "learning_rate": 1.0706697171381034e-05, "loss": 0.0153, "step": 223500 }, { "epoch": 93.0, "eval_cer": 0.2414, "eval_gen_len": 13.5168, "eval_loss": 0.419572651386261, "eval_runtime": 254.173, "eval_samples_per_second": 12.613, "eval_steps_per_second": 3.155, "step": 223572 }, { "epoch": 93.18, "learning_rate": 1.0685898502495842e-05, "loss": 0.0131, "step": 224000 }, { "epoch": 93.39, "learning_rate": 1.0665099833610651e-05, "loss": 0.0138, "step": 224500 }, { "epoch": 93.59, "learning_rate": 1.0644301164725459e-05, "loss": 0.0125, "step": 225000 }, { "epoch": 93.8, "learning_rate": 1.0623544093178037e-05, "loss": 0.0158, "step": 225500 }, { "epoch": 94.0, "eval_cer": 0.2491, "eval_gen_len": 13.8378, "eval_loss": 0.4395461678504944, "eval_runtime": 257.6708, "eval_samples_per_second": 12.442, "eval_steps_per_second": 3.112, "step": 225976 }, { "epoch": 94.01, "learning_rate": 1.0602745424292846e-05, "loss": 0.0141, "step": 226000 }, { "epoch": 94.22, "learning_rate": 1.0581946755407656e-05, "loss": 0.0147, "step": 226500 }, { "epoch": 94.43, "learning_rate": 1.0561148086522463e-05, "loss": 0.0119, "step": 227000 }, { "epoch": 94.63, "learning_rate": 1.0540391014975041e-05, "loss": 0.0139, "step": 227500 }, { "epoch": 94.84, "learning_rate": 1.0519633943427621e-05, "loss": 0.0138, "step": 228000 }, { "epoch": 95.0, "eval_cer": 0.2518, "eval_gen_len": 13.7695, "eval_loss": 0.43901219964027405, "eval_runtime": 262.2107, "eval_samples_per_second": 12.227, "eval_steps_per_second": 3.059, "step": 228380 }, { "epoch": 95.05, "learning_rate": 1.0498835274542429e-05, "loss": 0.0121, "step": 228500 }, { "epoch": 95.26, "learning_rate": 1.0478036605657238e-05, "loss": 0.0106, "step": 229000 }, { "epoch": 95.47, "learning_rate": 1.0457237936772046e-05, "loss": 0.0126, "step": 229500 }, { "epoch": 95.67, "learning_rate": 1.0436439267886855e-05, "loss": 0.0156, "step": 230000 }, { "epoch": 95.88, "learning_rate": 1.0415640599001665e-05, "loss": 0.0123, "step": 230500 }, { "epoch": 96.0, "eval_cer": 0.2475, "eval_gen_len": 13.69, "eval_loss": 0.4405384361743927, "eval_runtime": 260.5068, "eval_samples_per_second": 12.307, "eval_steps_per_second": 3.079, "step": 230784 }, { "epoch": 96.09, "learning_rate": 1.0394841930116472e-05, "loss": 0.011, "step": 231000 }, { "epoch": 96.3, "learning_rate": 1.0374084858569054e-05, "loss": 0.0132, "step": 231500 }, { "epoch": 96.51, "learning_rate": 1.0353286189683862e-05, "loss": 0.0135, "step": 232000 }, { "epoch": 96.71, "learning_rate": 1.0332487520798671e-05, "loss": 0.0143, "step": 232500 }, { "epoch": 96.92, "learning_rate": 1.0311688851913479e-05, "loss": 0.0136, "step": 233000 }, { "epoch": 97.0, "eval_cer": 0.2492, "eval_gen_len": 13.791, "eval_loss": 0.4154476225376129, "eval_runtime": 266.6041, "eval_samples_per_second": 12.025, "eval_steps_per_second": 3.008, "step": 233188 }, { "epoch": 97.13, "learning_rate": 1.0290890183028288e-05, "loss": 0.0117, "step": 233500 }, { "epoch": 97.34, "learning_rate": 1.0270091514143094e-05, "loss": 0.0116, "step": 234000 }, { "epoch": 97.55, "learning_rate": 1.0249292845257903e-05, "loss": 0.0117, "step": 234500 }, { "epoch": 97.75, "learning_rate": 1.0228494176372713e-05, "loss": 0.0158, "step": 235000 }, { "epoch": 97.96, "learning_rate": 1.0207737104825293e-05, "loss": 0.012, "step": 235500 }, { "epoch": 98.0, "eval_cer": 0.2481, "eval_gen_len": 13.8702, "eval_loss": 0.43725699186325073, "eval_runtime": 257.4309, "eval_samples_per_second": 12.454, "eval_steps_per_second": 3.115, "step": 235592 }, { "epoch": 98.17, "learning_rate": 1.0186938435940102e-05, "loss": 0.0098, "step": 236000 }, { "epoch": 98.38, "learning_rate": 1.016618136439268e-05, "loss": 0.0118, "step": 236500 }, { "epoch": 98.59, "learning_rate": 1.0145382695507488e-05, "loss": 0.0128, "step": 237000 }, { "epoch": 98.79, "learning_rate": 1.0124584026622297e-05, "loss": 0.0122, "step": 237500 }, { "epoch": 99.0, "eval_cer": 0.2504, "eval_gen_len": 13.8051, "eval_loss": 0.44157010316848755, "eval_runtime": 284.8185, "eval_samples_per_second": 11.256, "eval_steps_per_second": 2.816, "step": 237996 }, { "epoch": 99.0, "learning_rate": 1.0103785357737107e-05, "loss": 0.0136, "step": 238000 }, { "epoch": 99.21, "learning_rate": 1.0082986688851914e-05, "loss": 0.0115, "step": 238500 }, { "epoch": 99.42, "learning_rate": 1.0062188019966724e-05, "loss": 0.0109, "step": 239000 }, { "epoch": 99.63, "learning_rate": 1.0041389351081531e-05, "loss": 0.0135, "step": 239500 }, { "epoch": 99.83, "learning_rate": 1.002059068219634e-05, "loss": 0.0146, "step": 240000 }, { "epoch": 100.0, "eval_cer": 0.2489, "eval_gen_len": 13.6868, "eval_loss": 0.42966365814208984, "eval_runtime": 282.168, "eval_samples_per_second": 11.362, "eval_steps_per_second": 2.842, "step": 240400 }, { "epoch": 100.04, "learning_rate": 9.99979201331115e-06, "loss": 0.0136, "step": 240500 }, { "epoch": 100.25, "learning_rate": 9.979034941763728e-06, "loss": 0.0132, "step": 241000 }, { "epoch": 100.46, "learning_rate": 9.958236272878536e-06, "loss": 0.0112, "step": 241500 }, { "epoch": 100.67, "learning_rate": 9.937437603993345e-06, "loss": 0.0139, "step": 242000 }, { "epoch": 100.87, "learning_rate": 9.916638935108155e-06, "loss": 0.0135, "step": 242500 }, { "epoch": 101.0, "eval_cer": 0.2428, "eval_gen_len": 13.5689, "eval_loss": 0.4310346245765686, "eval_runtime": 280.207, "eval_samples_per_second": 11.442, "eval_steps_per_second": 2.862, "step": 242804 }, { "epoch": 101.08, "learning_rate": 9.895881863560733e-06, "loss": 0.0118, "step": 243000 }, { "epoch": 101.29, "learning_rate": 9.875083194675542e-06, "loss": 0.0111, "step": 243500 }, { "epoch": 101.5, "learning_rate": 9.854284525790352e-06, "loss": 0.0115, "step": 244000 }, { "epoch": 101.71, "learning_rate": 9.83352745424293e-06, "loss": 0.0124, "step": 244500 }, { "epoch": 101.91, "learning_rate": 9.812728785357737e-06, "loss": 0.0136, "step": 245000 }, { "epoch": 102.0, "eval_cer": 0.246, "eval_gen_len": 13.4972, "eval_loss": 0.422376424074173, "eval_runtime": 269.57, "eval_samples_per_second": 11.893, "eval_steps_per_second": 2.975, "step": 245208 }, { "epoch": 102.12, "learning_rate": 9.791930116472547e-06, "loss": 0.0124, "step": 245500 }, { "epoch": 102.33, "learning_rate": 9.771131447587356e-06, "loss": 0.0101, "step": 246000 }, { "epoch": 102.54, "learning_rate": 9.750332778702164e-06, "loss": 0.0109, "step": 246500 }, { "epoch": 102.75, "learning_rate": 9.729534109816973e-06, "loss": 0.0108, "step": 247000 }, { "epoch": 102.95, "learning_rate": 9.70873544093178e-06, "loss": 0.014, "step": 247500 }, { "epoch": 103.0, "eval_cer": 0.2502, "eval_gen_len": 13.8394, "eval_loss": 0.45676541328430176, "eval_runtime": 270.1526, "eval_samples_per_second": 11.867, "eval_steps_per_second": 2.969, "step": 247612 }, { "epoch": 103.16, "learning_rate": 9.68793677204659e-06, "loss": 0.0106, "step": 248000 }, { "epoch": 103.37, "learning_rate": 9.667179700499168e-06, "loss": 0.0117, "step": 248500 }, { "epoch": 103.58, "learning_rate": 9.646381031613978e-06, "loss": 0.0114, "step": 249000 }, { "epoch": 103.79, "learning_rate": 9.625582362728785e-06, "loss": 0.0135, "step": 249500 }, { "epoch": 103.99, "learning_rate": 9.604783693843595e-06, "loss": 0.0125, "step": 250000 }, { "epoch": 104.0, "eval_cer": 0.2465, "eval_gen_len": 13.4345, "eval_loss": 0.39919513463974, "eval_runtime": 270.6771, "eval_samples_per_second": 11.844, "eval_steps_per_second": 2.963, "step": 250016 }, { "epoch": 104.2, "learning_rate": 9.583985024958402e-06, "loss": 0.01, "step": 250500 }, { "epoch": 104.41, "learning_rate": 9.563227953410982e-06, "loss": 0.0126, "step": 251000 }, { "epoch": 104.62, "learning_rate": 9.542429284525792e-06, "loss": 0.0109, "step": 251500 }, { "epoch": 104.83, "learning_rate": 9.5216306156406e-06, "loss": 0.0135, "step": 252000 }, { "epoch": 105.0, "eval_cer": 0.2472, "eval_gen_len": 13.7277, "eval_loss": 0.44164207577705383, "eval_runtime": 274.8419, "eval_samples_per_second": 11.665, "eval_steps_per_second": 2.918, "step": 252420 }, { "epoch": 105.03, "learning_rate": 9.500831946755409e-06, "loss": 0.0119, "step": 252500 }, { "epoch": 105.24, "learning_rate": 9.480033277870218e-06, "loss": 0.0095, "step": 253000 }, { "epoch": 105.45, "learning_rate": 9.459276206322796e-06, "loss": 0.0115, "step": 253500 }, { "epoch": 105.66, "learning_rate": 9.438477537437604e-06, "loss": 0.0135, "step": 254000 }, { "epoch": 105.87, "learning_rate": 9.417678868552413e-06, "loss": 0.012, "step": 254500 }, { "epoch": 106.0, "eval_cer": 0.2416, "eval_gen_len": 13.4994, "eval_loss": 0.41192150115966797, "eval_runtime": 275.6326, "eval_samples_per_second": 11.631, "eval_steps_per_second": 2.91, "step": 254824 }, { "epoch": 106.07, "learning_rate": 9.396880199667223e-06, "loss": 0.0114, "step": 255000 }, { "epoch": 106.28, "learning_rate": 9.376123128119801e-06, "loss": 0.0116, "step": 255500 }, { "epoch": 106.49, "learning_rate": 9.355324459234609e-06, "loss": 0.0093, "step": 256000 }, { "epoch": 106.7, "learning_rate": 9.334525790349418e-06, "loss": 0.0128, "step": 256500 }, { "epoch": 106.91, "learning_rate": 9.313768718801998e-06, "loss": 0.0133, "step": 257000 }, { "epoch": 107.0, "eval_cer": 0.2476, "eval_gen_len": 13.6494, "eval_loss": 0.42318016290664673, "eval_runtime": 280.8295, "eval_samples_per_second": 11.416, "eval_steps_per_second": 2.856, "step": 257228 }, { "epoch": 107.11, "learning_rate": 9.292970049916805e-06, "loss": 0.0097, "step": 257500 }, { "epoch": 107.32, "learning_rate": 9.272171381031615e-06, "loss": 0.012, "step": 258000 }, { "epoch": 107.53, "learning_rate": 9.251372712146424e-06, "loss": 0.0093, "step": 258500 }, { "epoch": 107.74, "learning_rate": 9.230574043261232e-06, "loss": 0.0103, "step": 259000 }, { "epoch": 107.95, "learning_rate": 9.209775374376041e-06, "loss": 0.0103, "step": 259500 }, { "epoch": 108.0, "eval_cer": 0.2434, "eval_gen_len": 13.4925, "eval_loss": 0.425822377204895, "eval_runtime": 284.9328, "eval_samples_per_second": 11.252, "eval_steps_per_second": 2.815, "step": 259632 }, { "epoch": 108.15, "learning_rate": 9.188976705490849e-06, "loss": 0.0108, "step": 260000 }, { "epoch": 108.36, "learning_rate": 9.168178036605658e-06, "loss": 0.0109, "step": 260500 }, { "epoch": 108.57, "learning_rate": 9.147420965058236e-06, "loss": 0.0125, "step": 261000 }, { "epoch": 108.78, "learning_rate": 9.126622296173046e-06, "loss": 0.0112, "step": 261500 }, { "epoch": 108.99, "learning_rate": 9.105823627287854e-06, "loss": 0.0116, "step": 262000 }, { "epoch": 109.0, "eval_cer": 0.2462, "eval_gen_len": 13.7115, "eval_loss": 0.43990305066108704, "eval_runtime": 285.2346, "eval_samples_per_second": 11.24, "eval_steps_per_second": 2.812, "step": 262036 }, { "epoch": 109.19, "learning_rate": 9.085066555740433e-06, "loss": 0.0088, "step": 262500 }, { "epoch": 109.4, "learning_rate": 9.064267886855243e-06, "loss": 0.0098, "step": 263000 }, { "epoch": 109.61, "learning_rate": 9.04346921797005e-06, "loss": 0.0106, "step": 263500 }, { "epoch": 109.82, "learning_rate": 9.02267054908486e-06, "loss": 0.0123, "step": 264000 }, { "epoch": 110.0, "eval_cer": 0.2462, "eval_gen_len": 13.6023, "eval_loss": 0.41700801253318787, "eval_runtime": 287.8449, "eval_samples_per_second": 11.138, "eval_steps_per_second": 2.786, "step": 264440 }, { "epoch": 110.02, "learning_rate": 9.00187188019967e-06, "loss": 0.0125, "step": 264500 }, { "epoch": 110.23, "learning_rate": 8.981073211314477e-06, "loss": 0.01, "step": 265000 }, { "epoch": 110.44, "learning_rate": 8.960316139767055e-06, "loss": 0.0099, "step": 265500 }, { "epoch": 110.65, "learning_rate": 8.939517470881864e-06, "loss": 0.0108, "step": 266000 }, { "epoch": 110.86, "learning_rate": 8.918718801996674e-06, "loss": 0.0109, "step": 266500 }, { "epoch": 111.0, "eval_cer": 0.2476, "eval_gen_len": 13.6859, "eval_loss": 0.4497167766094208, "eval_runtime": 326.8835, "eval_samples_per_second": 9.808, "eval_steps_per_second": 2.453, "step": 266844 }, { "epoch": 111.06, "learning_rate": 8.897920133111481e-06, "loss": 0.0106, "step": 267000 }, { "epoch": 111.27, "learning_rate": 8.87712146422629e-06, "loss": 0.0103, "step": 267500 }, { "epoch": 111.48, "learning_rate": 8.856322795341098e-06, "loss": 0.0104, "step": 268000 }, { "epoch": 111.69, "learning_rate": 8.835524126455908e-06, "loss": 0.0126, "step": 268500 }, { "epoch": 111.9, "learning_rate": 8.814725457570717e-06, "loss": 0.0115, "step": 269000 }, { "epoch": 112.0, "eval_cer": 0.2528, "eval_gen_len": 13.9145, "eval_loss": 0.4540727734565735, "eval_runtime": 317.49, "eval_samples_per_second": 10.098, "eval_steps_per_second": 2.526, "step": 269248 }, { "epoch": 112.1, "learning_rate": 8.794009983361066e-06, "loss": 0.0108, "step": 269500 }, { "epoch": 112.31, "learning_rate": 8.773211314475875e-06, "loss": 0.0092, "step": 270000 }, { "epoch": 112.52, "learning_rate": 8.752412645590683e-06, "loss": 0.0104, "step": 270500 }, { "epoch": 112.73, "learning_rate": 8.731613976705492e-06, "loss": 0.0091, "step": 271000 }, { "epoch": 112.94, "learning_rate": 8.7108153078203e-06, "loss": 0.0115, "step": 271500 }, { "epoch": 113.0, "eval_cer": 0.2449, "eval_gen_len": 13.4545, "eval_loss": 0.4440736770629883, "eval_runtime": 280.5689, "eval_samples_per_second": 11.427, "eval_steps_per_second": 2.858, "step": 271652 }, { "epoch": 113.14, "learning_rate": 8.69001663893511e-06, "loss": 0.0092, "step": 272000 }, { "epoch": 113.35, "learning_rate": 8.669217970049919e-06, "loss": 0.0092, "step": 272500 }, { "epoch": 113.56, "learning_rate": 8.648419301164726e-06, "loss": 0.0109, "step": 273000 }, { "epoch": 113.77, "learning_rate": 8.627620632279536e-06, "loss": 0.0098, "step": 273500 }, { "epoch": 113.98, "learning_rate": 8.606821963394343e-06, "loss": 0.0113, "step": 274000 }, { "epoch": 114.0, "eval_cer": 0.2471, "eval_gen_len": 13.5477, "eval_loss": 0.4469629228115082, "eval_runtime": 276.6766, "eval_samples_per_second": 11.588, "eval_steps_per_second": 2.899, "step": 274056 }, { "epoch": 114.18, "learning_rate": 8.586064891846923e-06, "loss": 0.0115, "step": 274500 }, { "epoch": 114.39, "learning_rate": 8.565266222961731e-06, "loss": 0.0077, "step": 275000 }, { "epoch": 114.6, "learning_rate": 8.54446755407654e-06, "loss": 0.0103, "step": 275500 }, { "epoch": 114.81, "learning_rate": 8.523668885191348e-06, "loss": 0.01, "step": 276000 }, { "epoch": 115.0, "eval_cer": 0.2537, "eval_gen_len": 13.9704, "eval_loss": 0.47477516531944275, "eval_runtime": 291.6272, "eval_samples_per_second": 10.993, "eval_steps_per_second": 2.75, "step": 276460 }, { "epoch": 115.02, "learning_rate": 8.502911813643926e-06, "loss": 0.0116, "step": 276500 }, { "epoch": 115.22, "learning_rate": 8.482113144758736e-06, "loss": 0.0102, "step": 277000 }, { "epoch": 115.43, "learning_rate": 8.461314475873545e-06, "loss": 0.0085, "step": 277500 }, { "epoch": 115.64, "learning_rate": 8.440515806988353e-06, "loss": 0.0104, "step": 278000 }, { "epoch": 115.85, "learning_rate": 8.419758735440932e-06, "loss": 0.0125, "step": 278500 }, { "epoch": 116.0, "eval_cer": 0.2438, "eval_gen_len": 13.4816, "eval_loss": 0.4395754933357239, "eval_runtime": 273.4039, "eval_samples_per_second": 11.726, "eval_steps_per_second": 2.933, "step": 278864 }, { "epoch": 116.06, "learning_rate": 8.398960066555742e-06, "loss": 0.0093, "step": 279000 }, { "epoch": 116.26, "learning_rate": 8.37816139767055e-06, "loss": 0.01, "step": 279500 }, { "epoch": 116.47, "learning_rate": 8.357362728785359e-06, "loss": 0.0087, "step": 280000 }, { "epoch": 116.68, "learning_rate": 8.336564059900167e-06, "loss": 0.0099, "step": 280500 }, { "epoch": 116.89, "learning_rate": 8.315765391014976e-06, "loss": 0.0101, "step": 281000 }, { "epoch": 117.0, "eval_cer": 0.2464, "eval_gen_len": 13.7723, "eval_loss": 0.44487103819847107, "eval_runtime": 278.0443, "eval_samples_per_second": 11.531, "eval_steps_per_second": 2.884, "step": 281268 }, { "epoch": 117.1, "learning_rate": 8.295008319467554e-06, "loss": 0.0094, "step": 281500 }, { "epoch": 117.3, "learning_rate": 8.274209650582364e-06, "loss": 0.0077, "step": 282000 }, { "epoch": 117.51, "learning_rate": 8.253410981697171e-06, "loss": 0.01, "step": 282500 }, { "epoch": 117.72, "learning_rate": 8.23261231281198e-06, "loss": 0.0113, "step": 283000 }, { "epoch": 117.93, "learning_rate": 8.21181364392679e-06, "loss": 0.0108, "step": 283500 }, { "epoch": 118.0, "eval_cer": 0.249, "eval_gen_len": 13.7711, "eval_loss": 0.4563674330711365, "eval_runtime": 286.5388, "eval_samples_per_second": 11.189, "eval_steps_per_second": 2.799, "step": 283672 }, { "epoch": 118.14, "learning_rate": 8.191014975041598e-06, "loss": 0.0093, "step": 284000 }, { "epoch": 118.34, "learning_rate": 8.170216306156407e-06, "loss": 0.0097, "step": 284500 }, { "epoch": 118.55, "learning_rate": 8.149459234608985e-06, "loss": 0.0088, "step": 285000 }, { "epoch": 118.76, "learning_rate": 8.128660565723795e-06, "loss": 0.0087, "step": 285500 }, { "epoch": 118.97, "learning_rate": 8.107861896838602e-06, "loss": 0.0121, "step": 286000 }, { "epoch": 119.0, "eval_cer": 0.2484, "eval_gen_len": 13.7848, "eval_loss": 0.4589692950248718, "eval_runtime": 285.9698, "eval_samples_per_second": 11.211, "eval_steps_per_second": 2.804, "step": 286076 }, { "epoch": 119.18, "learning_rate": 8.087063227953412e-06, "loss": 0.0092, "step": 286500 }, { "epoch": 119.38, "learning_rate": 8.06626455906822e-06, "loss": 0.0104, "step": 287000 }, { "epoch": 119.59, "learning_rate": 8.045465890183029e-06, "loss": 0.0094, "step": 287500 }, { "epoch": 119.8, "learning_rate": 8.024667221297838e-06, "loss": 0.0111, "step": 288000 }, { "epoch": 120.0, "eval_cer": 0.2488, "eval_gen_len": 13.8621, "eval_loss": 0.46863117814064026, "eval_runtime": 282.0826, "eval_samples_per_second": 11.365, "eval_steps_per_second": 2.843, "step": 288480 }, { "epoch": 120.01, "learning_rate": 8.003868552412646e-06, "loss": 0.0105, "step": 288500 }, { "epoch": 120.22, "learning_rate": 7.983111480865226e-06, "loss": 0.0086, "step": 289000 }, { "epoch": 120.42, "learning_rate": 7.962312811980035e-06, "loss": 0.0101, "step": 289500 }, { "epoch": 120.63, "learning_rate": 7.941514143094843e-06, "loss": 0.0103, "step": 290000 }, { "epoch": 120.84, "learning_rate": 7.920715474209652e-06, "loss": 0.009, "step": 290500 }, { "epoch": 121.0, "eval_cer": 0.2451, "eval_gen_len": 13.7648, "eval_loss": 0.44192788004875183, "eval_runtime": 276.6356, "eval_samples_per_second": 11.589, "eval_steps_per_second": 2.899, "step": 290884 }, { "epoch": 121.05, "learning_rate": 7.89991680532446e-06, "loss": 0.0103, "step": 291000 }, { "epoch": 121.26, "learning_rate": 7.87915973377704e-06, "loss": 0.0109, "step": 291500 }, { "epoch": 121.46, "learning_rate": 7.858361064891847e-06, "loss": 0.0096, "step": 292000 }, { "epoch": 121.67, "learning_rate": 7.837603993344427e-06, "loss": 0.0104, "step": 292500 }, { "epoch": 121.88, "learning_rate": 7.816805324459236e-06, "loss": 0.0093, "step": 293000 }, { "epoch": 122.0, "eval_cer": 0.242, "eval_gen_len": 13.6089, "eval_loss": 0.43688440322875977, "eval_runtime": 279.7745, "eval_samples_per_second": 11.459, "eval_steps_per_second": 2.867, "step": 293288 }, { "epoch": 122.09, "learning_rate": 7.796006655574044e-06, "loss": 0.0096, "step": 293500 }, { "epoch": 122.3, "learning_rate": 7.775207986688853e-06, "loss": 0.0083, "step": 294000 }, { "epoch": 122.5, "learning_rate": 7.754409317803661e-06, "loss": 0.0092, "step": 294500 }, { "epoch": 122.71, "learning_rate": 7.73361064891847e-06, "loss": 0.0119, "step": 295000 }, { "epoch": 122.92, "learning_rate": 7.71281198003328e-06, "loss": 0.0092, "step": 295500 }, { "epoch": 123.0, "eval_cer": 0.2475, "eval_gen_len": 13.6304, "eval_loss": 0.4489113390445709, "eval_runtime": 292.1033, "eval_samples_per_second": 10.976, "eval_steps_per_second": 2.746, "step": 295692 }, { "epoch": 123.13, "learning_rate": 7.692054908485858e-06, "loss": 0.0096, "step": 296000 }, { "epoch": 123.34, "learning_rate": 7.671256239600666e-06, "loss": 0.0093, "step": 296500 }, { "epoch": 123.54, "learning_rate": 7.650457570715475e-06, "loss": 0.0094, "step": 297000 }, { "epoch": 123.75, "learning_rate": 7.629658901830283e-06, "loss": 0.0111, "step": 297500 }, { "epoch": 123.96, "learning_rate": 7.608860232945092e-06, "loss": 0.0099, "step": 298000 }, { "epoch": 124.0, "eval_cer": 0.2447, "eval_gen_len": 13.6413, "eval_loss": 0.45137402415275574, "eval_runtime": 273.6913, "eval_samples_per_second": 11.714, "eval_steps_per_second": 2.93, "step": 298096 }, { "epoch": 124.17, "learning_rate": 7.588061564059901e-06, "loss": 0.0078, "step": 298500 }, { "epoch": 124.38, "learning_rate": 7.56726289517471e-06, "loss": 0.0093, "step": 299000 }, { "epoch": 124.58, "learning_rate": 7.5464642262895185e-06, "loss": 0.0089, "step": 299500 }, { "epoch": 124.79, "learning_rate": 7.525707154742097e-06, "loss": 0.01, "step": 300000 }, { "epoch": 125.0, "learning_rate": 7.5049500831946766e-06, "loss": 0.0103, "step": 300500 }, { "epoch": 125.0, "eval_cer": 0.2467, "eval_gen_len": 13.8041, "eval_loss": 0.45419880747795105, "eval_runtime": 277.7542, "eval_samples_per_second": 11.543, "eval_steps_per_second": 2.887, "step": 300500 }, { "epoch": 125.21, "learning_rate": 7.484151414309485e-06, "loss": 0.0076, "step": 301000 }, { "epoch": 125.42, "learning_rate": 7.463352745424294e-06, "loss": 0.0089, "step": 301500 }, { "epoch": 125.62, "learning_rate": 7.442554076539102e-06, "loss": 0.0089, "step": 302000 }, { "epoch": 125.83, "learning_rate": 7.4217554076539115e-06, "loss": 0.0121, "step": 302500 }, { "epoch": 126.0, "eval_cer": 0.2496, "eval_gen_len": 13.8525, "eval_loss": 0.4686892330646515, "eval_runtime": 288.7714, "eval_samples_per_second": 11.102, "eval_steps_per_second": 2.777, "step": 302904 }, { "epoch": 126.04, "learning_rate": 7.40095673876872e-06, "loss": 0.0091, "step": 303000 }, { "epoch": 126.25, "learning_rate": 7.3801580698835285e-06, "loss": 0.0089, "step": 303500 }, { "epoch": 126.46, "learning_rate": 7.359359400998337e-06, "loss": 0.0092, "step": 304000 }, { "epoch": 126.66, "learning_rate": 7.338560732113146e-06, "loss": 0.0081, "step": 304500 }, { "epoch": 126.87, "learning_rate": 7.317845257903495e-06, "loss": 0.0116, "step": 305000 }, { "epoch": 127.0, "eval_cer": 0.2443, "eval_gen_len": 13.6432, "eval_loss": 0.4484730660915375, "eval_runtime": 273.2803, "eval_samples_per_second": 11.732, "eval_steps_per_second": 2.935, "step": 305308 }, { "epoch": 127.08, "learning_rate": 7.297046589018303e-06, "loss": 0.0077, "step": 305500 }, { "epoch": 127.29, "learning_rate": 7.276247920133111e-06, "loss": 0.009, "step": 306000 }, { "epoch": 127.5, "learning_rate": 7.25544925124792e-06, "loss": 0.0094, "step": 306500 }, { "epoch": 127.7, "learning_rate": 7.234650582362729e-06, "loss": 0.0081, "step": 307000 }, { "epoch": 127.91, "learning_rate": 7.213851913477538e-06, "loss": 0.0105, "step": 307500 }, { "epoch": 128.0, "eval_cer": 0.2437, "eval_gen_len": 13.7661, "eval_loss": 0.4494189918041229, "eval_runtime": 282.0309, "eval_samples_per_second": 11.368, "eval_steps_per_second": 2.844, "step": 307712 }, { "epoch": 128.12, "learning_rate": 7.193094841930118e-06, "loss": 0.0085, "step": 308000 }, { "epoch": 128.33, "learning_rate": 7.172296173044926e-06, "loss": 0.0086, "step": 308500 }, { "epoch": 128.54, "learning_rate": 7.151497504159735e-06, "loss": 0.0091, "step": 309000 }, { "epoch": 128.74, "learning_rate": 7.130698835274543e-06, "loss": 0.0094, "step": 309500 }, { "epoch": 128.95, "learning_rate": 7.109900166389352e-06, "loss": 0.0087, "step": 310000 }, { "epoch": 129.0, "eval_cer": 0.2465, "eval_gen_len": 13.5352, "eval_loss": 0.46537643671035767, "eval_runtime": 272.0294, "eval_samples_per_second": 11.785, "eval_steps_per_second": 2.948, "step": 310116 }, { "epoch": 129.16, "learning_rate": 7.08910149750416e-06, "loss": 0.0088, "step": 310500 }, { "epoch": 129.37, "learning_rate": 7.0683028286189696e-06, "loss": 0.0076, "step": 311000 }, { "epoch": 129.58, "learning_rate": 7.047504159733778e-06, "loss": 0.0088, "step": 311500 }, { "epoch": 129.78, "learning_rate": 7.026705490848587e-06, "loss": 0.0092, "step": 312000 }, { "epoch": 129.99, "learning_rate": 7.005948419301165e-06, "loss": 0.0106, "step": 312500 }, { "epoch": 130.0, "eval_cer": 0.2457, "eval_gen_len": 13.5667, "eval_loss": 0.44364768266677856, "eval_runtime": 269.0291, "eval_samples_per_second": 11.917, "eval_steps_per_second": 2.981, "step": 312520 }, { "epoch": 130.2, "learning_rate": 6.985149750415974e-06, "loss": 0.008, "step": 313000 }, { "epoch": 130.41, "learning_rate": 6.964351081530783e-06, "loss": 0.0072, "step": 313500 }, { "epoch": 130.62, "learning_rate": 6.943552412645591e-06, "loss": 0.0091, "step": 314000 }, { "epoch": 130.82, "learning_rate": 6.9227537437604e-06, "loss": 0.0087, "step": 314500 }, { "epoch": 131.0, "eval_cer": 0.2451, "eval_gen_len": 13.7358, "eval_loss": 0.4612971544265747, "eval_runtime": 274.1913, "eval_samples_per_second": 11.693, "eval_steps_per_second": 2.925, "step": 314924 }, { "epoch": 131.03, "learning_rate": 6.90199667221298e-06, "loss": 0.0107, "step": 315000 }, { "epoch": 131.24, "learning_rate": 6.881239600665558e-06, "loss": 0.0082, "step": 315500 }, { "epoch": 131.45, "learning_rate": 6.860440931780366e-06, "loss": 0.0089, "step": 316000 }, { "epoch": 131.66, "learning_rate": 6.839642262895176e-06, "loss": 0.009, "step": 316500 }, { "epoch": 131.86, "learning_rate": 6.818843594009984e-06, "loss": 0.0104, "step": 317000 }, { "epoch": 132.0, "eval_cer": 0.2468, "eval_gen_len": 13.5936, "eval_loss": 0.4653697907924652, "eval_runtime": 277.9546, "eval_samples_per_second": 11.534, "eval_steps_per_second": 2.885, "step": 317328 }, { "epoch": 132.07, "learning_rate": 6.798044925124793e-06, "loss": 0.0094, "step": 317500 }, { "epoch": 132.28, "learning_rate": 6.777246256239601e-06, "loss": 0.0079, "step": 318000 }, { "epoch": 132.49, "learning_rate": 6.75644758735441e-06, "loss": 0.0098, "step": 318500 }, { "epoch": 132.7, "learning_rate": 6.735648918469218e-06, "loss": 0.0081, "step": 319000 }, { "epoch": 132.9, "learning_rate": 6.714891846921797e-06, "loss": 0.0089, "step": 319500 }, { "epoch": 133.0, "eval_cer": 0.2455, "eval_gen_len": 13.6875, "eval_loss": 0.45620593428611755, "eval_runtime": 284.3664, "eval_samples_per_second": 11.274, "eval_steps_per_second": 2.82, "step": 319732 }, { "epoch": 133.11, "learning_rate": 6.694093178036606e-06, "loss": 0.0085, "step": 320000 }, { "epoch": 133.32, "learning_rate": 6.673294509151414e-06, "loss": 0.0091, "step": 320500 }, { "epoch": 133.53, "learning_rate": 6.652495840266223e-06, "loss": 0.0073, "step": 321000 }, { "epoch": 133.74, "learning_rate": 6.631738768718803e-06, "loss": 0.0091, "step": 321500 }, { "epoch": 133.94, "learning_rate": 6.610940099833611e-06, "loss": 0.0088, "step": 322000 }, { "epoch": 134.0, "eval_cer": 0.2455, "eval_gen_len": 13.6653, "eval_loss": 0.4463290274143219, "eval_runtime": 265.7595, "eval_samples_per_second": 12.064, "eval_steps_per_second": 3.018, "step": 322136 }, { "epoch": 134.15, "learning_rate": 6.59014143094842e-06, "loss": 0.0082, "step": 322500 }, { "epoch": 134.36, "learning_rate": 6.569342762063229e-06, "loss": 0.0087, "step": 323000 }, { "epoch": 134.57, "learning_rate": 6.548544093178038e-06, "loss": 0.009, "step": 323500 }, { "epoch": 134.78, "learning_rate": 6.527745424292846e-06, "loss": 0.0093, "step": 324000 }, { "epoch": 134.98, "learning_rate": 6.506946755407655e-06, "loss": 0.0088, "step": 324500 }, { "epoch": 135.0, "eval_cer": 0.245, "eval_gen_len": 13.5749, "eval_loss": 0.4489509165287018, "eval_runtime": 256.739, "eval_samples_per_second": 12.487, "eval_steps_per_second": 3.124, "step": 324540 }, { "epoch": 135.19, "learning_rate": 6.486148086522463e-06, "loss": 0.0076, "step": 325000 }, { "epoch": 135.4, "learning_rate": 6.465349417637273e-06, "loss": 0.0078, "step": 325500 }, { "epoch": 135.61, "learning_rate": 6.444550748752081e-06, "loss": 0.007, "step": 326000 }, { "epoch": 135.82, "learning_rate": 6.423793677204659e-06, "loss": 0.0086, "step": 326500 }, { "epoch": 136.0, "eval_cer": 0.2458, "eval_gen_len": 13.8032, "eval_loss": 0.45637834072113037, "eval_runtime": 275.2079, "eval_samples_per_second": 11.649, "eval_steps_per_second": 2.914, "step": 326944 }, { "epoch": 136.02, "learning_rate": 6.402995008319468e-06, "loss": 0.0097, "step": 327000 }, { "epoch": 136.23, "learning_rate": 6.382237936772047e-06, "loss": 0.0084, "step": 327500 }, { "epoch": 136.44, "learning_rate": 6.361439267886855e-06, "loss": 0.0084, "step": 328000 }, { "epoch": 136.65, "learning_rate": 6.340640599001664e-06, "loss": 0.0083, "step": 328500 }, { "epoch": 136.86, "learning_rate": 6.319841930116472e-06, "loss": 0.0083, "step": 329000 }, { "epoch": 137.0, "eval_cer": 0.2471, "eval_gen_len": 13.6478, "eval_loss": 0.4573554992675781, "eval_runtime": 269.5066, "eval_samples_per_second": 11.896, "eval_steps_per_second": 2.976, "step": 329348 }, { "epoch": 137.06, "learning_rate": 6.299043261231281e-06, "loss": 0.0089, "step": 329500 }, { "epoch": 137.27, "learning_rate": 6.27824459234609e-06, "loss": 0.0087, "step": 330000 }, { "epoch": 137.48, "learning_rate": 6.257445923460899e-06, "loss": 0.0086, "step": 330500 }, { "epoch": 137.69, "learning_rate": 6.236647254575707e-06, "loss": 0.0085, "step": 331000 }, { "epoch": 137.9, "learning_rate": 6.215890183028287e-06, "loss": 0.0092, "step": 331500 }, { "epoch": 138.0, "eval_cer": 0.2487, "eval_gen_len": 13.8503, "eval_loss": 0.46958354115486145, "eval_runtime": 265.4531, "eval_samples_per_second": 12.077, "eval_steps_per_second": 3.021, "step": 331752 }, { "epoch": 138.1, "learning_rate": 6.195091514143096e-06, "loss": 0.0079, "step": 332000 }, { "epoch": 138.31, "learning_rate": 6.174292845257904e-06, "loss": 0.0073, "step": 332500 }, { "epoch": 138.52, "learning_rate": 6.153494176372713e-06, "loss": 0.0074, "step": 333000 }, { "epoch": 138.73, "learning_rate": 6.132695507487521e-06, "loss": 0.0089, "step": 333500 }, { "epoch": 138.94, "learning_rate": 6.111896838602331e-06, "loss": 0.0082, "step": 334000 }, { "epoch": 139.0, "eval_cer": 0.2476, "eval_gen_len": 13.7916, "eval_loss": 0.4610365629196167, "eval_runtime": 273.061, "eval_samples_per_second": 11.741, "eval_steps_per_second": 2.937, "step": 334156 }, { "epoch": 139.14, "learning_rate": 6.091098169717139e-06, "loss": 0.0076, "step": 334500 }, { "epoch": 139.35, "learning_rate": 6.070299500831948e-06, "loss": 0.0083, "step": 335000 }, { "epoch": 139.56, "learning_rate": 6.049542429284526e-06, "loss": 0.0088, "step": 335500 }, { "epoch": 139.77, "learning_rate": 6.0287437603993344e-06, "loss": 0.0078, "step": 336000 }, { "epoch": 139.98, "learning_rate": 6.007945091514144e-06, "loss": 0.0092, "step": 336500 }, { "epoch": 140.0, "eval_cer": 0.2432, "eval_gen_len": 13.6344, "eval_loss": 0.43919724225997925, "eval_runtime": 265.4301, "eval_samples_per_second": 12.079, "eval_steps_per_second": 3.022, "step": 336560 }, { "epoch": 140.18, "learning_rate": 5.987146422628952e-06, "loss": 0.007, "step": 337000 }, { "epoch": 140.39, "learning_rate": 5.966347753743761e-06, "loss": 0.0078, "step": 337500 }, { "epoch": 140.6, "learning_rate": 5.945590682196339e-06, "loss": 0.0087, "step": 338000 }, { "epoch": 140.81, "learning_rate": 5.924792013311148e-06, "loss": 0.0083, "step": 338500 }, { "epoch": 141.0, "eval_cer": 0.2461, "eval_gen_len": 13.733, "eval_loss": 0.4848983883857727, "eval_runtime": 269.6844, "eval_samples_per_second": 11.888, "eval_steps_per_second": 2.974, "step": 338964 }, { "epoch": 141.01, "learning_rate": 5.904034941763727e-06, "loss": 0.0083, "step": 339000 }, { "epoch": 141.22, "learning_rate": 5.883236272878537e-06, "loss": 0.0079, "step": 339500 }, { "epoch": 141.43, "learning_rate": 5.862437603993345e-06, "loss": 0.007, "step": 340000 }, { "epoch": 141.64, "learning_rate": 5.841638935108154e-06, "loss": 0.0085, "step": 340500 }, { "epoch": 141.85, "learning_rate": 5.820840266222962e-06, "loss": 0.0085, "step": 341000 }, { "epoch": 142.0, "eval_cer": 0.2475, "eval_gen_len": 13.8278, "eval_loss": 0.46004167199134827, "eval_runtime": 262.9827, "eval_samples_per_second": 12.191, "eval_steps_per_second": 3.05, "step": 341368 }, { "epoch": 142.05, "learning_rate": 5.800041597337771e-06, "loss": 0.0079, "step": 341500 }, { "epoch": 142.26, "learning_rate": 5.779242928452579e-06, "loss": 0.0078, "step": 342000 }, { "epoch": 142.47, "learning_rate": 5.758444259567389e-06, "loss": 0.0083, "step": 342500 }, { "epoch": 142.68, "learning_rate": 5.737645590682197e-06, "loss": 0.0072, "step": 343000 }, { "epoch": 142.89, "learning_rate": 5.7168885191347755e-06, "loss": 0.008, "step": 343500 }, { "epoch": 143.0, "eval_cer": 0.2455, "eval_gen_len": 13.7137, "eval_loss": 0.4594569206237793, "eval_runtime": 270.269, "eval_samples_per_second": 11.862, "eval_steps_per_second": 2.967, "step": 343772 }, { "epoch": 143.09, "learning_rate": 5.696089850249584e-06, "loss": 0.0085, "step": 344000 }, { "epoch": 143.3, "learning_rate": 5.675291181364393e-06, "loss": 0.0082, "step": 344500 }, { "epoch": 143.51, "learning_rate": 5.654492512479202e-06, "loss": 0.0078, "step": 345000 }, { "epoch": 143.72, "learning_rate": 5.633735440931781e-06, "loss": 0.0091, "step": 345500 }, { "epoch": 143.93, "learning_rate": 5.61293677204659e-06, "loss": 0.0084, "step": 346000 }, { "epoch": 144.0, "eval_cer": 0.2419, "eval_gen_len": 13.684, "eval_loss": 0.44198158383369446, "eval_runtime": 272.3122, "eval_samples_per_second": 11.773, "eval_steps_per_second": 2.945, "step": 346176 }, { "epoch": 144.13, "learning_rate": 5.592138103161399e-06, "loss": 0.0073, "step": 346500 }, { "epoch": 144.34, "learning_rate": 5.571339434276207e-06, "loss": 0.0076, "step": 347000 }, { "epoch": 144.55, "learning_rate": 5.550540765391016e-06, "loss": 0.0075, "step": 347500 }, { "epoch": 144.76, "learning_rate": 5.529783693843595e-06, "loss": 0.0092, "step": 348000 }, { "epoch": 144.97, "learning_rate": 5.508985024958403e-06, "loss": 0.0087, "step": 348500 }, { "epoch": 145.0, "eval_cer": 0.2435, "eval_gen_len": 13.5817, "eval_loss": 0.4379993975162506, "eval_runtime": 266.6103, "eval_samples_per_second": 12.025, "eval_steps_per_second": 3.008, "step": 348580 }, { "epoch": 145.17, "learning_rate": 5.488186356073212e-06, "loss": 0.007, "step": 349000 }, { "epoch": 145.38, "learning_rate": 5.46738768718802e-06, "loss": 0.0071, "step": 349500 }, { "epoch": 145.59, "learning_rate": 5.4466306156405994e-06, "loss": 0.0086, "step": 350000 }, { "epoch": 145.8, "learning_rate": 5.425831946755408e-06, "loss": 0.0074, "step": 350500 }, { "epoch": 146.0, "eval_cer": 0.2438, "eval_gen_len": 13.747, "eval_loss": 0.4474620223045349, "eval_runtime": 259.9915, "eval_samples_per_second": 12.331, "eval_steps_per_second": 3.085, "step": 350984 }, { "epoch": 146.01, "learning_rate": 5.4050332778702165e-06, "loss": 0.0085, "step": 351000 }, { "epoch": 146.21, "learning_rate": 5.384234608985025e-06, "loss": 0.0071, "step": 351500 }, { "epoch": 146.42, "learning_rate": 5.3634359400998335e-06, "loss": 0.0065, "step": 352000 }, { "epoch": 146.63, "learning_rate": 5.342637271214642e-06, "loss": 0.0076, "step": 352500 }, { "epoch": 146.84, "learning_rate": 5.3218386023294514e-06, "loss": 0.0076, "step": 353000 }, { "epoch": 147.0, "eval_cer": 0.2433, "eval_gen_len": 13.5989, "eval_loss": 0.4507221579551697, "eval_runtime": 259.806, "eval_samples_per_second": 12.34, "eval_steps_per_second": 3.087, "step": 353388 }, { "epoch": 147.05, "learning_rate": 5.30103993344426e-06, "loss": 0.0078, "step": 353500 }, { "epoch": 147.25, "learning_rate": 5.2802412645590685e-06, "loss": 0.0077, "step": 354000 }, { "epoch": 147.46, "learning_rate": 5.259484193011648e-06, "loss": 0.0075, "step": 354500 }, { "epoch": 147.67, "learning_rate": 5.238685524126457e-06, "loss": 0.0068, "step": 355000 }, { "epoch": 147.88, "learning_rate": 5.217886855241265e-06, "loss": 0.0091, "step": 355500 }, { "epoch": 148.0, "eval_cer": 0.2461, "eval_gen_len": 13.8125, "eval_loss": 0.47150808572769165, "eval_runtime": 262.9368, "eval_samples_per_second": 12.193, "eval_steps_per_second": 3.05, "step": 355792 }, { "epoch": 148.09, "learning_rate": 5.1971297836938436e-06, "loss": 0.0082, "step": 356000 }, { "epoch": 148.29, "learning_rate": 5.176331114808653e-06, "loss": 0.0066, "step": 356500 }, { "epoch": 148.5, "learning_rate": 5.1555324459234614e-06, "loss": 0.0081, "step": 357000 }, { "epoch": 148.71, "learning_rate": 5.13473377703827e-06, "loss": 0.0074, "step": 357500 }, { "epoch": 148.92, "learning_rate": 5.1139351081530785e-06, "loss": 0.0078, "step": 358000 }, { "epoch": 149.0, "eval_cer": 0.2472, "eval_gen_len": 13.8253, "eval_loss": 0.46047914028167725, "eval_runtime": 271.5636, "eval_samples_per_second": 11.806, "eval_steps_per_second": 2.953, "step": 358196 }, { "epoch": 149.13, "learning_rate": 5.093136439267887e-06, "loss": 0.0076, "step": 358500 }, { "epoch": 149.33, "learning_rate": 5.0723377703826955e-06, "loss": 0.0084, "step": 359000 }, { "epoch": 149.54, "learning_rate": 5.051539101497505e-06, "loss": 0.0081, "step": 359500 }, { "epoch": 149.75, "learning_rate": 5.0307404326123134e-06, "loss": 0.0075, "step": 360000 }, { "epoch": 149.96, "learning_rate": 5.009941763727122e-06, "loss": 0.0075, "step": 360500 }, { "epoch": 150.0, "eval_cer": 0.2416, "eval_gen_len": 13.4729, "eval_loss": 0.4448852837085724, "eval_runtime": 262.3745, "eval_samples_per_second": 12.219, "eval_steps_per_second": 3.057, "step": 360600 }, { "epoch": 150.17, "learning_rate": 4.989184692179701e-06, "loss": 0.0078, "step": 361000 }, { "epoch": 150.37, "learning_rate": 4.9683860232945095e-06, "loss": 0.0065, "step": 361500 }, { "epoch": 150.58, "learning_rate": 4.947587354409318e-06, "loss": 0.0077, "step": 362000 }, { "epoch": 150.79, "learning_rate": 4.926788685524127e-06, "loss": 0.008, "step": 362500 }, { "epoch": 151.0, "learning_rate": 4.905990016638936e-06, "loss": 0.0076, "step": 363000 }, { "epoch": 151.0, "eval_cer": 0.2458, "eval_gen_len": 13.7838, "eval_loss": 0.47447100281715393, "eval_runtime": 262.0741, "eval_samples_per_second": 12.233, "eval_steps_per_second": 3.06, "step": 363004 }, { "epoch": 151.21, "learning_rate": 4.885232945091515e-06, "loss": 0.0075, "step": 363500 }, { "epoch": 151.41, "learning_rate": 4.8644342762063235e-06, "loss": 0.0074, "step": 364000 }, { "epoch": 151.62, "learning_rate": 4.843635607321132e-06, "loss": 0.0077, "step": 364500 }, { "epoch": 151.83, "learning_rate": 4.822878535773711e-06, "loss": 0.0076, "step": 365000 }, { "epoch": 152.0, "eval_cer": 0.2461, "eval_gen_len": 13.7093, "eval_loss": 0.4570690095424652, "eval_runtime": 264.709, "eval_samples_per_second": 12.111, "eval_steps_per_second": 3.03, "step": 365408 }, { "epoch": 152.04, "learning_rate": 4.8020798668885195e-06, "loss": 0.0075, "step": 365500 }, { "epoch": 152.25, "learning_rate": 4.781281198003328e-06, "loss": 0.0066, "step": 366000 }, { "epoch": 152.45, "learning_rate": 4.7604825291181366e-06, "loss": 0.0079, "step": 366500 }, { "epoch": 152.66, "learning_rate": 4.739683860232945e-06, "loss": 0.0076, "step": 367000 }, { "epoch": 152.87, "learning_rate": 4.718885191347754e-06, "loss": 0.0088, "step": 367500 }, { "epoch": 153.0, "eval_cer": 0.2414, "eval_gen_len": 13.5468, "eval_loss": 0.45958101749420166, "eval_runtime": 263.6085, "eval_samples_per_second": 12.162, "eval_steps_per_second": 3.042, "step": 367812 }, { "epoch": 153.08, "learning_rate": 4.6981281198003335e-06, "loss": 0.0081, "step": 368000 }, { "epoch": 153.29, "learning_rate": 4.677329450915142e-06, "loss": 0.0072, "step": 368500 }, { "epoch": 153.49, "learning_rate": 4.6565307820299505e-06, "loss": 0.007, "step": 369000 }, { "epoch": 153.7, "learning_rate": 4.635732113144759e-06, "loss": 0.0068, "step": 369500 }, { "epoch": 153.91, "learning_rate": 4.6149334442595676e-06, "loss": 0.0082, "step": 370000 }, { "epoch": 154.0, "eval_cer": 0.2475, "eval_gen_len": 13.8384, "eval_loss": 0.46518319845199585, "eval_runtime": 270.3025, "eval_samples_per_second": 11.861, "eval_steps_per_second": 2.967, "step": 370216 }, { "epoch": 154.12, "learning_rate": 4.594134775374376e-06, "loss": 0.0067, "step": 370500 }, { "epoch": 154.33, "learning_rate": 4.5733361064891855e-06, "loss": 0.0073, "step": 371000 }, { "epoch": 154.53, "learning_rate": 4.552537437603994e-06, "loss": 0.0083, "step": 371500 }, { "epoch": 154.74, "learning_rate": 4.5317387687188025e-06, "loss": 0.0074, "step": 372000 }, { "epoch": 154.95, "learning_rate": 4.510940099833611e-06, "loss": 0.0077, "step": 372500 }, { "epoch": 155.0, "eval_cer": 0.2426, "eval_gen_len": 13.6457, "eval_loss": 0.46483084559440613, "eval_runtime": 258.2815, "eval_samples_per_second": 12.413, "eval_steps_per_second": 3.105, "step": 372620 }, { "epoch": 155.16, "learning_rate": 4.49018302828619e-06, "loss": 0.0072, "step": 373000 }, { "epoch": 155.37, "learning_rate": 4.469384359400999e-06, "loss": 0.0066, "step": 373500 }, { "epoch": 155.57, "learning_rate": 4.448627287853578e-06, "loss": 0.0074, "step": 374000 }, { "epoch": 155.78, "learning_rate": 4.427828618968386e-06, "loss": 0.0075, "step": 374500 }, { "epoch": 155.99, "learning_rate": 4.407029950083195e-06, "loss": 0.0074, "step": 375000 }, { "epoch": 156.0, "eval_cer": 0.2422, "eval_gen_len": 13.5889, "eval_loss": 0.4521370232105255, "eval_runtime": 266.1373, "eval_samples_per_second": 12.046, "eval_steps_per_second": 3.013, "step": 375024 }, { "epoch": 156.2, "learning_rate": 4.386231281198003e-06, "loss": 0.0072, "step": 375500 }, { "epoch": 156.41, "learning_rate": 4.3654326123128125e-06, "loss": 0.0073, "step": 376000 }, { "epoch": 156.61, "learning_rate": 4.344633943427621e-06, "loss": 0.0076, "step": 376500 }, { "epoch": 156.82, "learning_rate": 4.32383527454243e-06, "loss": 0.0073, "step": 377000 }, { "epoch": 157.0, "eval_cer": 0.2465, "eval_gen_len": 13.7867, "eval_loss": 0.47169268131256104, "eval_runtime": 264.268, "eval_samples_per_second": 12.132, "eval_steps_per_second": 3.035, "step": 377428 }, { "epoch": 157.03, "learning_rate": 4.303036605657238e-06, "loss": 0.0075, "step": 377500 }, { "epoch": 157.24, "learning_rate": 4.282237936772047e-06, "loss": 0.0063, "step": 378000 }, { "epoch": 157.45, "learning_rate": 4.261480865224626e-06, "loss": 0.0069, "step": 378500 }, { "epoch": 157.65, "learning_rate": 4.240682196339434e-06, "loss": 0.0078, "step": 379000 }, { "epoch": 157.86, "learning_rate": 4.2198835274542435e-06, "loss": 0.0078, "step": 379500 }, { "epoch": 158.0, "eval_cer": 0.243, "eval_gen_len": 13.7143, "eval_loss": 0.46363481879234314, "eval_runtime": 263.4944, "eval_samples_per_second": 12.167, "eval_steps_per_second": 3.044, "step": 379832 }, { "epoch": 158.07, "learning_rate": 4.199084858569052e-06, "loss": 0.0066, "step": 380000 }, { "epoch": 158.28, "learning_rate": 4.178286189683861e-06, "loss": 0.0073, "step": 380500 }, { "epoch": 158.49, "learning_rate": 4.15752911813644e-06, "loss": 0.0064, "step": 381000 }, { "epoch": 158.69, "learning_rate": 4.136730449251248e-06, "loss": 0.008, "step": 381500 }, { "epoch": 158.9, "learning_rate": 4.115931780366057e-06, "loss": 0.007, "step": 382000 }, { "epoch": 159.0, "eval_cer": 0.2413, "eval_gen_len": 13.6556, "eval_loss": 0.4499606192111969, "eval_runtime": 265.7001, "eval_samples_per_second": 12.066, "eval_steps_per_second": 3.018, "step": 382236 }, { "epoch": 159.11, "learning_rate": 4.095133111480866e-06, "loss": 0.0074, "step": 382500 }, { "epoch": 159.32, "learning_rate": 4.0743344425956745e-06, "loss": 0.0064, "step": 383000 }, { "epoch": 159.53, "learning_rate": 4.053535773710483e-06, "loss": 0.0068, "step": 383500 }, { "epoch": 159.73, "learning_rate": 4.032778702163062e-06, "loss": 0.0068, "step": 384000 }, { "epoch": 159.94, "learning_rate": 4.011980033277871e-06, "loss": 0.0079, "step": 384500 }, { "epoch": 160.0, "eval_cer": 0.241, "eval_gen_len": 13.622, "eval_loss": 0.4524941146373749, "eval_runtime": 263.8331, "eval_samples_per_second": 12.152, "eval_steps_per_second": 3.04, "step": 384640 }, { "epoch": 160.15, "learning_rate": 3.991181364392679e-06, "loss": 0.0076, "step": 385000 }, { "epoch": 160.36, "learning_rate": 3.9703826955074885e-06, "loss": 0.0062, "step": 385500 }, { "epoch": 160.57, "learning_rate": 3.949584026622296e-06, "loss": 0.0066, "step": 386000 }, { "epoch": 160.77, "learning_rate": 3.928785357737105e-06, "loss": 0.0067, "step": 386500 }, { "epoch": 160.98, "learning_rate": 3.908069883527454e-06, "loss": 0.0074, "step": 387000 }, { "epoch": 161.0, "eval_cer": 0.2439, "eval_gen_len": 13.7757, "eval_loss": 0.4422759711742401, "eval_runtime": 268.4253, "eval_samples_per_second": 11.944, "eval_steps_per_second": 2.988, "step": 387044 }, { "epoch": 161.19, "learning_rate": 3.887271214642263e-06, "loss": 0.008, "step": 387500 }, { "epoch": 161.4, "learning_rate": 3.866472545757072e-06, "loss": 0.0068, "step": 388000 }, { "epoch": 161.61, "learning_rate": 3.845673876871881e-06, "loss": 0.0066, "step": 388500 }, { "epoch": 161.81, "learning_rate": 3.824875207986689e-06, "loss": 0.0081, "step": 389000 }, { "epoch": 162.0, "eval_cer": 0.2433, "eval_gen_len": 13.7096, "eval_loss": 0.4686408042907715, "eval_runtime": 266.054, "eval_samples_per_second": 12.05, "eval_steps_per_second": 3.014, "step": 389448 }, { "epoch": 162.02, "learning_rate": 3.804118136439268e-06, "loss": 0.0073, "step": 389500 }, { "epoch": 162.23, "learning_rate": 3.7833194675540767e-06, "loss": 0.0057, "step": 390000 }, { "epoch": 162.44, "learning_rate": 3.7625207986688856e-06, "loss": 0.007, "step": 390500 }, { "epoch": 162.65, "learning_rate": 3.741722129783694e-06, "loss": 0.0074, "step": 391000 }, { "epoch": 162.85, "learning_rate": 3.7209234608985027e-06, "loss": 0.0067, "step": 391500 }, { "epoch": 163.0, "eval_cer": 0.2422, "eval_gen_len": 13.68, "eval_loss": 0.4667229950428009, "eval_runtime": 268.59, "eval_samples_per_second": 11.936, "eval_steps_per_second": 2.986, "step": 391852 }, { "epoch": 163.06, "learning_rate": 3.7001247920133116e-06, "loss": 0.0081, "step": 392000 }, { "epoch": 163.27, "learning_rate": 3.67932612312812e-06, "loss": 0.0067, "step": 392500 }, { "epoch": 163.48, "learning_rate": 3.6585274542429287e-06, "loss": 0.0064, "step": 393000 }, { "epoch": 163.69, "learning_rate": 3.6377287853577376e-06, "loss": 0.0073, "step": 393500 }, { "epoch": 163.89, "learning_rate": 3.6169717138103167e-06, "loss": 0.0074, "step": 394000 }, { "epoch": 164.0, "eval_cer": 0.2418, "eval_gen_len": 13.6482, "eval_loss": 0.4521939754486084, "eval_runtime": 273.0322, "eval_samples_per_second": 11.742, "eval_steps_per_second": 2.937, "step": 394256 }, { "epoch": 164.1, "learning_rate": 3.596173044925125e-06, "loss": 0.007, "step": 394500 }, { "epoch": 164.31, "learning_rate": 3.5754159733777038e-06, "loss": 0.0072, "step": 395000 }, { "epoch": 164.52, "learning_rate": 3.5546173044925127e-06, "loss": 0.0064, "step": 395500 }, { "epoch": 164.73, "learning_rate": 3.5338186356073212e-06, "loss": 0.0071, "step": 396000 }, { "epoch": 164.93, "learning_rate": 3.5130199667221298e-06, "loss": 0.0085, "step": 396500 }, { "epoch": 165.0, "eval_cer": 0.2414, "eval_gen_len": 13.6282, "eval_loss": 0.45710650086402893, "eval_runtime": 255.6061, "eval_samples_per_second": 12.543, "eval_steps_per_second": 3.138, "step": 396660 }, { "epoch": 165.14, "learning_rate": 3.4922212978369387e-06, "loss": 0.0063, "step": 397000 }, { "epoch": 165.35, "learning_rate": 3.4714226289517472e-06, "loss": 0.0068, "step": 397500 }, { "epoch": 165.56, "learning_rate": 3.4506239600665558e-06, "loss": 0.0072, "step": 398000 }, { "epoch": 165.77, "learning_rate": 3.4298668885191348e-06, "loss": 0.0065, "step": 398500 }, { "epoch": 165.97, "learning_rate": 3.4090682196339437e-06, "loss": 0.0074, "step": 399000 }, { "epoch": 166.0, "eval_cer": 0.241, "eval_gen_len": 13.6073, "eval_loss": 0.45994168519973755, "eval_runtime": 256.591, "eval_samples_per_second": 12.495, "eval_steps_per_second": 3.126, "step": 399064 }, { "epoch": 166.18, "learning_rate": 3.3882695507487522e-06, "loss": 0.0072, "step": 399500 }, { "epoch": 166.39, "learning_rate": 3.3674708818635608e-06, "loss": 0.0061, "step": 400000 }, { "epoch": 166.6, "learning_rate": 3.3466722129783697e-06, "loss": 0.0062, "step": 400500 }, { "epoch": 166.81, "learning_rate": 3.3258735440931782e-06, "loss": 0.0071, "step": 401000 }, { "epoch": 167.0, "eval_cer": 0.2427, "eval_gen_len": 13.5515, "eval_loss": 0.4614485502243042, "eval_runtime": 261.146, "eval_samples_per_second": 12.277, "eval_steps_per_second": 3.071, "step": 401468 }, { "epoch": 167.01, "learning_rate": 3.305074875207987e-06, "loss": 0.0076, "step": 401500 }, { "epoch": 167.22, "learning_rate": 3.2842762063227957e-06, "loss": 0.0068, "step": 402000 }, { "epoch": 167.43, "learning_rate": 3.2634775374376042e-06, "loss": 0.0075, "step": 402500 }, { "epoch": 167.64, "learning_rate": 3.2427204658901832e-06, "loss": 0.007, "step": 403000 }, { "epoch": 167.85, "learning_rate": 3.221921797004992e-06, "loss": 0.0054, "step": 403500 }, { "epoch": 168.0, "eval_cer": 0.2471, "eval_gen_len": 13.8865, "eval_loss": 0.4741056263446808, "eval_runtime": 274.0865, "eval_samples_per_second": 11.697, "eval_steps_per_second": 2.926, "step": 403872 }, { "epoch": 168.05, "learning_rate": 3.2011231281198007e-06, "loss": 0.0066, "step": 404000 }, { "epoch": 168.26, "learning_rate": 3.1803244592346092e-06, "loss": 0.0058, "step": 404500 }, { "epoch": 168.47, "learning_rate": 3.1595673876871887e-06, "loss": 0.0074, "step": 405000 }, { "epoch": 168.68, "learning_rate": 3.138768718801997e-06, "loss": 0.007, "step": 405500 }, { "epoch": 168.89, "learning_rate": 3.1179700499168057e-06, "loss": 0.0076, "step": 406000 }, { "epoch": 169.0, "eval_cer": 0.2432, "eval_gen_len": 13.6996, "eval_loss": 0.46461164951324463, "eval_runtime": 266.1722, "eval_samples_per_second": 12.045, "eval_steps_per_second": 3.013, "step": 406276 }, { "epoch": 169.09, "learning_rate": 3.097171381031614e-06, "loss": 0.0065, "step": 406500 }, { "epoch": 169.3, "learning_rate": 3.0763727121464228e-06, "loss": 0.0061, "step": 407000 }, { "epoch": 169.51, "learning_rate": 3.0555740432612313e-06, "loss": 0.0067, "step": 407500 }, { "epoch": 169.72, "learning_rate": 3.03477537437604e-06, "loss": 0.0075, "step": 408000 }, { "epoch": 169.93, "learning_rate": 3.0140183028286193e-06, "loss": 0.0064, "step": 408500 }, { "epoch": 170.0, "eval_cer": 0.2407, "eval_gen_len": 13.6525, "eval_loss": 0.45978671312332153, "eval_runtime": 264.0025, "eval_samples_per_second": 12.144, "eval_steps_per_second": 3.038, "step": 408680 }, { "epoch": 170.13, "learning_rate": 2.993219633943428e-06, "loss": 0.0058, "step": 409000 }, { "epoch": 170.34, "learning_rate": 2.9724209650582363e-06, "loss": 0.0072, "step": 409500 }, { "epoch": 170.55, "learning_rate": 2.9516222961730453e-06, "loss": 0.0073, "step": 410000 }, { "epoch": 170.76, "learning_rate": 2.9308236272878538e-06, "loss": 0.0067, "step": 410500 }, { "epoch": 170.97, "learning_rate": 2.9100249584026623e-06, "loss": 0.0066, "step": 411000 }, { "epoch": 171.0, "eval_cer": 0.2463, "eval_gen_len": 13.8612, "eval_loss": 0.4791421890258789, "eval_runtime": 266.2729, "eval_samples_per_second": 12.04, "eval_steps_per_second": 3.012, "step": 411084 }, { "epoch": 171.17, "learning_rate": 2.8892262895174713e-06, "loss": 0.0066, "step": 411500 }, { "epoch": 171.38, "learning_rate": 2.8684276206322798e-06, "loss": 0.0055, "step": 412000 }, { "epoch": 171.59, "learning_rate": 2.847670549084859e-06, "loss": 0.0072, "step": 412500 }, { "epoch": 171.8, "learning_rate": 2.8268718801996673e-06, "loss": 0.0067, "step": 413000 }, { "epoch": 172.0, "eval_cer": 0.2408, "eval_gen_len": 13.6622, "eval_loss": 0.4588078260421753, "eval_runtime": 266.7256, "eval_samples_per_second": 12.02, "eval_steps_per_second": 3.007, "step": 413488 }, { "epoch": 172.0, "learning_rate": 2.8060732113144763e-06, "loss": 0.0074, "step": 413500 }, { "epoch": 172.21, "learning_rate": 2.7852745424292848e-06, "loss": 0.0066, "step": 414000 }, { "epoch": 172.42, "learning_rate": 2.7644758735440937e-06, "loss": 0.0058, "step": 414500 }, { "epoch": 172.63, "learning_rate": 2.7436772046589023e-06, "loss": 0.0067, "step": 415000 }, { "epoch": 172.84, "learning_rate": 2.7228785357737108e-06, "loss": 0.0074, "step": 415500 }, { "epoch": 173.0, "eval_cer": 0.2411, "eval_gen_len": 13.7199, "eval_loss": 0.45347917079925537, "eval_runtime": 266.7945, "eval_samples_per_second": 12.017, "eval_steps_per_second": 3.006, "step": 415892 }, { "epoch": 173.04, "learning_rate": 2.70212146422629e-06, "loss": 0.0065, "step": 416000 }, { "epoch": 173.25, "learning_rate": 2.6813227953410987e-06, "loss": 0.0066, "step": 416500 }, { "epoch": 173.46, "learning_rate": 2.6605241264559073e-06, "loss": 0.006, "step": 417000 }, { "epoch": 173.67, "learning_rate": 2.639725457570716e-06, "loss": 0.0065, "step": 417500 }, { "epoch": 173.88, "learning_rate": 2.618926788685524e-06, "loss": 0.0073, "step": 418000 }, { "epoch": 174.0, "eval_cer": 0.2413, "eval_gen_len": 13.7389, "eval_loss": 0.4470750093460083, "eval_runtime": 261.7235, "eval_samples_per_second": 12.25, "eval_steps_per_second": 3.064, "step": 418296 }, { "epoch": 174.08, "learning_rate": 2.598128119800333e-06, "loss": 0.007, "step": 418500 }, { "epoch": 174.29, "learning_rate": 2.5773294509151414e-06, "loss": 0.0068, "step": 419000 }, { "epoch": 174.5, "learning_rate": 2.55653078202995e-06, "loss": 0.0073, "step": 419500 }, { "epoch": 174.71, "learning_rate": 2.5358153078203e-06, "loss": 0.0057, "step": 420000 }, { "epoch": 174.92, "learning_rate": 2.5150166389351083e-06, "loss": 0.0066, "step": 420500 }, { "epoch": 175.0, "eval_cer": 0.2421, "eval_gen_len": 13.7676, "eval_loss": 0.46230319142341614, "eval_runtime": 274.1203, "eval_samples_per_second": 11.696, "eval_steps_per_second": 2.926, "step": 420700 }, { "epoch": 175.12, "learning_rate": 2.494217970049917e-06, "loss": 0.0072, "step": 421000 }, { "epoch": 175.33, "learning_rate": 2.473419301164726e-06, "loss": 0.006, "step": 421500 }, { "epoch": 175.54, "learning_rate": 2.4526206322795343e-06, "loss": 0.0068, "step": 422000 }, { "epoch": 175.75, "learning_rate": 2.4318635607321134e-06, "loss": 0.0068, "step": 422500 }, { "epoch": 175.96, "learning_rate": 2.411064891846922e-06, "loss": 0.0067, "step": 423000 }, { "epoch": 176.0, "eval_cer": 0.2427, "eval_gen_len": 13.7302, "eval_loss": 0.46741247177124023, "eval_runtime": 271.0852, "eval_samples_per_second": 11.827, "eval_steps_per_second": 2.958, "step": 423104 }, { "epoch": 176.16, "learning_rate": 2.390266222961731e-06, "loss": 0.0065, "step": 423500 }, { "epoch": 176.37, "learning_rate": 2.3694675540765393e-06, "loss": 0.0062, "step": 424000 }, { "epoch": 176.58, "learning_rate": 2.348668885191348e-06, "loss": 0.0057, "step": 424500 }, { "epoch": 176.79, "learning_rate": 2.327870216306157e-06, "loss": 0.007, "step": 425000 }, { "epoch": 177.0, "learning_rate": 2.307071547420965e-06, "loss": 0.0077, "step": 425500 }, { "epoch": 177.0, "eval_cer": 0.2399, "eval_gen_len": 13.5359, "eval_loss": 0.45084038376808167, "eval_runtime": 262.6698, "eval_samples_per_second": 12.205, "eval_steps_per_second": 3.053, "step": 425508 }, { "epoch": 177.2, "learning_rate": 2.286272878535774e-06, "loss": 0.0059, "step": 426000 }, { "epoch": 177.41, "learning_rate": 2.2654742096505824e-06, "loss": 0.0067, "step": 426500 }, { "epoch": 177.62, "learning_rate": 2.2447171381031614e-06, "loss": 0.006, "step": 427000 }, { "epoch": 177.83, "learning_rate": 2.2239184692179704e-06, "loss": 0.0066, "step": 427500 }, { "epoch": 178.0, "eval_cer": 0.2408, "eval_gen_len": 13.7302, "eval_loss": 0.4653932452201843, "eval_runtime": 268.3858, "eval_samples_per_second": 11.945, "eval_steps_per_second": 2.988, "step": 427912 }, { "epoch": 178.04, "learning_rate": 2.203119800332779e-06, "loss": 0.0064, "step": 428000 }, { "epoch": 178.24, "learning_rate": 2.1823211314475874e-06, "loss": 0.0055, "step": 428500 }, { "epoch": 178.45, "learning_rate": 2.161564059900167e-06, "loss": 0.0068, "step": 429000 }, { "epoch": 178.66, "learning_rate": 2.1407653910149754e-06, "loss": 0.0068, "step": 429500 }, { "epoch": 178.87, "learning_rate": 2.119966722129784e-06, "loss": 0.0067, "step": 430000 }, { "epoch": 179.0, "eval_cer": 0.2408, "eval_gen_len": 13.6925, "eval_loss": 0.462500661611557, "eval_runtime": 268.4086, "eval_samples_per_second": 11.944, "eval_steps_per_second": 2.988, "step": 430316 }, { "epoch": 179.08, "learning_rate": 2.0991680532445924e-06, "loss": 0.0067, "step": 430500 }, { "epoch": 179.28, "learning_rate": 2.078369384359401e-06, "loss": 0.0059, "step": 431000 }, { "epoch": 179.49, "learning_rate": 2.05761231281198e-06, "loss": 0.0064, "step": 431500 }, { "epoch": 179.7, "learning_rate": 2.036813643926789e-06, "loss": 0.0065, "step": 432000 }, { "epoch": 179.91, "learning_rate": 2.0160149750415974e-06, "loss": 0.0072, "step": 432500 }, { "epoch": 180.0, "eval_cer": 0.242, "eval_gen_len": 13.7545, "eval_loss": 0.46416959166526794, "eval_runtime": 267.9605, "eval_samples_per_second": 11.964, "eval_steps_per_second": 2.993, "step": 432720 }, { "epoch": 180.12, "learning_rate": 1.995216306156406e-06, "loss": 0.0064, "step": 433000 }, { "epoch": 180.32, "learning_rate": 1.9744592346089854e-06, "loss": 0.0058, "step": 433500 }, { "epoch": 180.53, "learning_rate": 1.953660565723794e-06, "loss": 0.0063, "step": 434000 }, { "epoch": 180.74, "learning_rate": 1.9328618968386024e-06, "loss": 0.0065, "step": 434500 }, { "epoch": 180.95, "learning_rate": 1.9120632279534114e-06, "loss": 0.0066, "step": 435000 }, { "epoch": 181.0, "eval_cer": 0.2401, "eval_gen_len": 13.6538, "eval_loss": 0.4692617356777191, "eval_runtime": 260.4012, "eval_samples_per_second": 12.312, "eval_steps_per_second": 3.08, "step": 435124 }, { "epoch": 181.16, "learning_rate": 1.8912645590682197e-06, "loss": 0.0076, "step": 435500 }, { "epoch": 181.36, "learning_rate": 1.8705074875207987e-06, "loss": 0.0059, "step": 436000 }, { "epoch": 181.57, "learning_rate": 1.8497088186356074e-06, "loss": 0.0069, "step": 436500 }, { "epoch": 181.78, "learning_rate": 1.8289101497504162e-06, "loss": 0.0061, "step": 437000 }, { "epoch": 181.99, "learning_rate": 1.8081114808652247e-06, "loss": 0.0064, "step": 437500 }, { "epoch": 182.0, "eval_cer": 0.2413, "eval_gen_len": 13.7302, "eval_loss": 0.4686383605003357, "eval_runtime": 260.1458, "eval_samples_per_second": 12.324, "eval_steps_per_second": 3.083, "step": 437528 }, { "epoch": 182.2, "learning_rate": 1.787354409317804e-06, "loss": 0.0062, "step": 438000 }, { "epoch": 182.4, "learning_rate": 1.7665557404326125e-06, "loss": 0.0066, "step": 438500 }, { "epoch": 182.61, "learning_rate": 1.7457570715474212e-06, "loss": 0.0066, "step": 439000 }, { "epoch": 182.82, "learning_rate": 1.725e-06, "loss": 0.0067, "step": 439500 }, { "epoch": 183.0, "eval_cer": 0.2415, "eval_gen_len": 13.7371, "eval_loss": 0.46899163722991943, "eval_runtime": 270.7897, "eval_samples_per_second": 11.839, "eval_steps_per_second": 2.962, "step": 439932 }, { "epoch": 183.03, "learning_rate": 1.7042013311148087e-06, "loss": 0.0059, "step": 440000 }, { "epoch": 183.24, "learning_rate": 1.6834026622296173e-06, "loss": 0.0057, "step": 440500 }, { "epoch": 183.44, "learning_rate": 1.662603993344426e-06, "loss": 0.0062, "step": 441000 }, { "epoch": 183.65, "learning_rate": 1.6418053244592347e-06, "loss": 0.0061, "step": 441500 }, { "epoch": 183.86, "learning_rate": 1.6210066555740433e-06, "loss": 0.0067, "step": 442000 }, { "epoch": 184.0, "eval_cer": 0.2392, "eval_gen_len": 13.7442, "eval_loss": 0.47148939967155457, "eval_runtime": 271.9757, "eval_samples_per_second": 11.788, "eval_steps_per_second": 2.949, "step": 442336 }, { "epoch": 184.07, "learning_rate": 1.600207986688852e-06, "loss": 0.0066, "step": 442500 }, { "epoch": 184.28, "learning_rate": 1.5794093178036607e-06, "loss": 0.0063, "step": 443000 }, { "epoch": 184.48, "learning_rate": 1.5586106489184695e-06, "loss": 0.0063, "step": 443500 }, { "epoch": 184.69, "learning_rate": 1.5378535773710485e-06, "loss": 0.0063, "step": 444000 }, { "epoch": 184.9, "learning_rate": 1.5170965058236273e-06, "loss": 0.0062, "step": 444500 }, { "epoch": 185.0, "eval_cer": 0.2395, "eval_gen_len": 13.6572, "eval_loss": 0.46137315034866333, "eval_runtime": 269.0871, "eval_samples_per_second": 11.914, "eval_steps_per_second": 2.98, "step": 444740 }, { "epoch": 185.11, "learning_rate": 1.496297836938436e-06, "loss": 0.0066, "step": 445000 }, { "epoch": 185.32, "learning_rate": 1.4754991680532445e-06, "loss": 0.006, "step": 445500 }, { "epoch": 185.52, "learning_rate": 1.4547004991680533e-06, "loss": 0.0053, "step": 446000 }, { "epoch": 185.73, "learning_rate": 1.433901830282862e-06, "loss": 0.0075, "step": 446500 }, { "epoch": 185.94, "learning_rate": 1.4131031613976705e-06, "loss": 0.0068, "step": 447000 }, { "epoch": 186.0, "eval_cer": 0.2396, "eval_gen_len": 13.6185, "eval_loss": 0.46076661348342896, "eval_runtime": 256.715, "eval_samples_per_second": 12.489, "eval_steps_per_second": 3.124, "step": 447144 }, { "epoch": 186.15, "learning_rate": 1.3923044925124793e-06, "loss": 0.0062, "step": 447500 }, { "epoch": 186.36, "learning_rate": 1.371505823627288e-06, "loss": 0.0058, "step": 448000 }, { "epoch": 186.56, "learning_rate": 1.3507071547420965e-06, "loss": 0.0065, "step": 448500 }, { "epoch": 186.77, "learning_rate": 1.3299500831946758e-06, "loss": 0.0065, "step": 449000 }, { "epoch": 186.98, "learning_rate": 1.3091514143094845e-06, "loss": 0.0064, "step": 449500 }, { "epoch": 187.0, "eval_cer": 0.2391, "eval_gen_len": 13.7558, "eval_loss": 0.47285133600234985, "eval_runtime": 259.8275, "eval_samples_per_second": 12.339, "eval_steps_per_second": 3.087, "step": 449548 }, { "epoch": 187.19, "learning_rate": 1.288352745424293e-06, "loss": 0.0054, "step": 450000 }, { "epoch": 187.4, "learning_rate": 1.2675540765391017e-06, "loss": 0.0064, "step": 450500 }, { "epoch": 187.6, "learning_rate": 1.2467554076539103e-06, "loss": 0.0059, "step": 451000 }, { "epoch": 187.81, "learning_rate": 1.225956738768719e-06, "loss": 0.0059, "step": 451500 }, { "epoch": 188.0, "eval_cer": 0.2397, "eval_gen_len": 13.6946, "eval_loss": 0.47259289026260376, "eval_runtime": 273.3002, "eval_samples_per_second": 11.731, "eval_steps_per_second": 2.935, "step": 451952 }, { "epoch": 188.02, "learning_rate": 1.2051996672212978e-06, "loss": 0.0073, "step": 452000 }, { "epoch": 188.23, "learning_rate": 1.1844009983361065e-06, "loss": 0.0065, "step": 452500 }, { "epoch": 188.44, "learning_rate": 1.1636439267886856e-06, "loss": 0.0069, "step": 453000 }, { "epoch": 188.64, "learning_rate": 1.1428452579034943e-06, "loss": 0.0062, "step": 453500 }, { "epoch": 188.85, "learning_rate": 1.1220881863560733e-06, "loss": 0.0052, "step": 454000 }, { "epoch": 189.0, "eval_cer": 0.239, "eval_gen_len": 13.6185, "eval_loss": 0.4666392505168915, "eval_runtime": 268.978, "eval_samples_per_second": 11.919, "eval_steps_per_second": 2.982, "step": 454356 }, { "epoch": 189.06, "learning_rate": 1.1012895174708818e-06, "loss": 0.0061, "step": 454500 }, { "epoch": 189.27, "learning_rate": 1.0804908485856906e-06, "loss": 0.0065, "step": 455000 }, { "epoch": 189.48, "learning_rate": 1.0596921797004993e-06, "loss": 0.0051, "step": 455500 }, { "epoch": 189.68, "learning_rate": 1.038893510815308e-06, "loss": 0.0066, "step": 456000 }, { "epoch": 189.89, "learning_rate": 1.0180948419301166e-06, "loss": 0.0066, "step": 456500 }, { "epoch": 190.0, "eval_cer": 0.2381, "eval_gen_len": 13.6825, "eval_loss": 0.4684942066669464, "eval_runtime": 269.3222, "eval_samples_per_second": 11.904, "eval_steps_per_second": 2.978, "step": 456760 }, { "epoch": 190.1, "learning_rate": 9.97296173044925e-07, "loss": 0.0061, "step": 457000 }, { "epoch": 190.31, "learning_rate": 9.765391014975043e-07, "loss": 0.0058, "step": 457500 }, { "epoch": 190.52, "learning_rate": 9.557404326123129e-07, "loss": 0.0058, "step": 458000 }, { "epoch": 190.72, "learning_rate": 9.349417637271216e-07, "loss": 0.0065, "step": 458500 }, { "epoch": 190.93, "learning_rate": 9.141430948419302e-07, "loss": 0.0061, "step": 459000 }, { "epoch": 191.0, "eval_cer": 0.238, "eval_gen_len": 13.6538, "eval_loss": 0.46481847763061523, "eval_runtime": 260.0695, "eval_samples_per_second": 12.327, "eval_steps_per_second": 3.084, "step": 459164 }, { "epoch": 191.14, "learning_rate": 8.933444259567387e-07, "loss": 0.006, "step": 459500 }, { "epoch": 191.35, "learning_rate": 8.725457570715475e-07, "loss": 0.0055, "step": 460000 }, { "epoch": 191.56, "learning_rate": 8.517470881863561e-07, "loss": 0.0058, "step": 460500 }, { "epoch": 191.76, "learning_rate": 8.309900166389352e-07, "loss": 0.0066, "step": 461000 }, { "epoch": 191.97, "learning_rate": 8.101913477537439e-07, "loss": 0.0063, "step": 461500 }, { "epoch": 192.0, "eval_cer": 0.2386, "eval_gen_len": 13.6301, "eval_loss": 0.46835771203041077, "eval_runtime": 258.927, "eval_samples_per_second": 12.382, "eval_steps_per_second": 3.097, "step": 461568 }, { "epoch": 192.18, "learning_rate": 7.893926788685524e-07, "loss": 0.0063, "step": 462000 }, { "epoch": 192.39, "learning_rate": 7.685940099833611e-07, "loss": 0.0059, "step": 462500 }, { "epoch": 192.6, "learning_rate": 7.477953410981697e-07, "loss": 0.0058, "step": 463000 }, { "epoch": 192.8, "learning_rate": 7.269966722129785e-07, "loss": 0.0064, "step": 463500 }, { "epoch": 193.0, "eval_cer": 0.2377, "eval_gen_len": 13.6687, "eval_loss": 0.4715929627418518, "eval_runtime": 259.4667, "eval_samples_per_second": 12.356, "eval_steps_per_second": 3.091, "step": 463972 }, { "epoch": 193.01, "learning_rate": 7.061980033277871e-07, "loss": 0.0051, "step": 464000 }, { "epoch": 193.22, "learning_rate": 6.853993344425957e-07, "loss": 0.0054, "step": 464500 } ], "max_steps": 480800, "num_train_epochs": 200, "total_flos": 2079248374038528.0, "trial_name": null, "trial_params": null }