KhushiDS's picture
Upload folder using huggingface_hub
c4f185c verified
{
"best_metric": 44.0582094924667,
"best_model_checkpoint": "./whisper-large-v3-Marathi-Version1/checkpoint-8000",
"epoch": 17.24137931034483,
"eval_steps": 2000,
"global_step": 8000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10775862068965517,
"grad_norm": 0.3231394290924072,
"learning_rate": 1.5000000000000002e-07,
"loss": 0.5889,
"step": 50
},
{
"epoch": 0.21551724137931033,
"grad_norm": 0.3586913049221039,
"learning_rate": 3.0000000000000004e-07,
"loss": 0.5863,
"step": 100
},
{
"epoch": 0.3232758620689655,
"grad_norm": 0.415686696767807,
"learning_rate": 4.5e-07,
"loss": 0.5768,
"step": 150
},
{
"epoch": 0.43103448275862066,
"grad_norm": 0.4167291224002838,
"learning_rate": 6.000000000000001e-07,
"loss": 0.5821,
"step": 200
},
{
"epoch": 0.5387931034482759,
"grad_norm": 0.34522315859794617,
"learning_rate": 7.5e-07,
"loss": 0.5665,
"step": 250
},
{
"epoch": 0.646551724137931,
"grad_norm": 0.3691834807395935,
"learning_rate": 9e-07,
"loss": 0.5628,
"step": 300
},
{
"epoch": 0.7543103448275862,
"grad_norm": 0.344501793384552,
"learning_rate": 1.05e-06,
"loss": 0.5705,
"step": 350
},
{
"epoch": 0.8620689655172413,
"grad_norm": 0.2788464426994324,
"learning_rate": 1.2000000000000002e-06,
"loss": 0.5616,
"step": 400
},
{
"epoch": 0.9698275862068966,
"grad_norm": 0.27220970392227173,
"learning_rate": 1.35e-06,
"loss": 0.5344,
"step": 450
},
{
"epoch": 1.0775862068965518,
"grad_norm": 0.2518285810947418,
"learning_rate": 1.5e-06,
"loss": 0.5566,
"step": 500
},
{
"epoch": 1.1853448275862069,
"grad_norm": 0.29989466071128845,
"learning_rate": 1.65e-06,
"loss": 0.5213,
"step": 550
},
{
"epoch": 1.293103448275862,
"grad_norm": 0.26115870475769043,
"learning_rate": 1.8e-06,
"loss": 0.5062,
"step": 600
},
{
"epoch": 1.4008620689655173,
"grad_norm": 0.2464788407087326,
"learning_rate": 1.95e-06,
"loss": 0.5022,
"step": 650
},
{
"epoch": 1.5086206896551724,
"grad_norm": 0.2869219183921814,
"learning_rate": 2.1e-06,
"loss": 0.4828,
"step": 700
},
{
"epoch": 1.6163793103448276,
"grad_norm": 0.2619161307811737,
"learning_rate": 2.25e-06,
"loss": 0.4896,
"step": 750
},
{
"epoch": 1.7241379310344827,
"grad_norm": 0.33810245990753174,
"learning_rate": 2.4000000000000003e-06,
"loss": 0.4639,
"step": 800
},
{
"epoch": 1.831896551724138,
"grad_norm": 0.2645559012889862,
"learning_rate": 2.55e-06,
"loss": 0.4464,
"step": 850
},
{
"epoch": 1.9396551724137931,
"grad_norm": 0.2778465151786804,
"learning_rate": 2.7e-06,
"loss": 0.4273,
"step": 900
},
{
"epoch": 2.0474137931034484,
"grad_norm": 0.2977587580680847,
"learning_rate": 2.85e-06,
"loss": 0.401,
"step": 950
},
{
"epoch": 2.1551724137931036,
"grad_norm": 0.3463541567325592,
"learning_rate": 3e-06,
"loss": 0.4094,
"step": 1000
},
{
"epoch": 2.2629310344827585,
"grad_norm": 0.26817262172698975,
"learning_rate": 2.992105263157895e-06,
"loss": 0.3967,
"step": 1050
},
{
"epoch": 2.3706896551724137,
"grad_norm": 0.34868425130844116,
"learning_rate": 2.9842105263157896e-06,
"loss": 0.3987,
"step": 1100
},
{
"epoch": 2.478448275862069,
"grad_norm": 0.33089494705200195,
"learning_rate": 2.9763157894736843e-06,
"loss": 0.3953,
"step": 1150
},
{
"epoch": 2.586206896551724,
"grad_norm": 0.3616925776004791,
"learning_rate": 2.968421052631579e-06,
"loss": 0.3668,
"step": 1200
},
{
"epoch": 2.6939655172413794,
"grad_norm": 0.33365535736083984,
"learning_rate": 2.960526315789474e-06,
"loss": 0.3735,
"step": 1250
},
{
"epoch": 2.8017241379310347,
"grad_norm": 0.3913170099258423,
"learning_rate": 2.9526315789473685e-06,
"loss": 0.3632,
"step": 1300
},
{
"epoch": 2.9094827586206895,
"grad_norm": 0.2940693497657776,
"learning_rate": 2.9447368421052633e-06,
"loss": 0.359,
"step": 1350
},
{
"epoch": 3.0172413793103448,
"grad_norm": 0.3293132781982422,
"learning_rate": 2.936842105263158e-06,
"loss": 0.3824,
"step": 1400
},
{
"epoch": 3.125,
"grad_norm": 0.34137511253356934,
"learning_rate": 2.9289473684210528e-06,
"loss": 0.3616,
"step": 1450
},
{
"epoch": 3.2327586206896552,
"grad_norm": 0.34217217564582825,
"learning_rate": 2.9210526315789475e-06,
"loss": 0.3525,
"step": 1500
},
{
"epoch": 3.3405172413793105,
"grad_norm": 0.39114320278167725,
"learning_rate": 2.9131578947368423e-06,
"loss": 0.3413,
"step": 1550
},
{
"epoch": 3.4482758620689653,
"grad_norm": 0.3646846115589142,
"learning_rate": 2.905263157894737e-06,
"loss": 0.3385,
"step": 1600
},
{
"epoch": 3.5560344827586206,
"grad_norm": 0.4652530550956726,
"learning_rate": 2.8973684210526318e-06,
"loss": 0.3534,
"step": 1650
},
{
"epoch": 3.663793103448276,
"grad_norm": 0.3728253245353699,
"learning_rate": 2.8894736842105265e-06,
"loss": 0.3392,
"step": 1700
},
{
"epoch": 3.771551724137931,
"grad_norm": 0.4309278428554535,
"learning_rate": 2.8815789473684213e-06,
"loss": 0.3249,
"step": 1750
},
{
"epoch": 3.8793103448275863,
"grad_norm": 0.4407905042171478,
"learning_rate": 2.873684210526316e-06,
"loss": 0.3218,
"step": 1800
},
{
"epoch": 3.987068965517241,
"grad_norm": 0.41783958673477173,
"learning_rate": 2.8657894736842103e-06,
"loss": 0.3285,
"step": 1850
},
{
"epoch": 4.094827586206897,
"grad_norm": 0.38858509063720703,
"learning_rate": 2.857894736842105e-06,
"loss": 0.3292,
"step": 1900
},
{
"epoch": 4.202586206896552,
"grad_norm": 0.37383902072906494,
"learning_rate": 2.85e-06,
"loss": 0.3252,
"step": 1950
},
{
"epoch": 4.310344827586207,
"grad_norm": 0.5148123502731323,
"learning_rate": 2.8421052631578946e-06,
"loss": 0.315,
"step": 2000
},
{
"epoch": 4.310344827586207,
"eval_loss": 0.3307957649230957,
"eval_runtime": 5462.6499,
"eval_samples_per_second": 0.186,
"eval_steps_per_second": 0.023,
"eval_wer": 52.398827582660566,
"step": 2000
},
{
"epoch": 4.418103448275862,
"grad_norm": 0.4609113931655884,
"learning_rate": 2.8342105263157897e-06,
"loss": 0.318,
"step": 2050
},
{
"epoch": 4.525862068965517,
"grad_norm": 0.502840518951416,
"learning_rate": 2.8263157894736845e-06,
"loss": 0.3232,
"step": 2100
},
{
"epoch": 4.633620689655173,
"grad_norm": 0.4469599425792694,
"learning_rate": 2.8184210526315792e-06,
"loss": 0.3118,
"step": 2150
},
{
"epoch": 4.741379310344827,
"grad_norm": 0.4370408356189728,
"learning_rate": 2.810526315789474e-06,
"loss": 0.3068,
"step": 2200
},
{
"epoch": 4.849137931034483,
"grad_norm": 0.46128618717193604,
"learning_rate": 2.8026315789473687e-06,
"loss": 0.3154,
"step": 2250
},
{
"epoch": 4.956896551724138,
"grad_norm": 0.5146879553794861,
"learning_rate": 2.7947368421052635e-06,
"loss": 0.3102,
"step": 2300
},
{
"epoch": 5.064655172413793,
"grad_norm": 0.5018391609191895,
"learning_rate": 2.7868421052631578e-06,
"loss": 0.3113,
"step": 2350
},
{
"epoch": 5.172413793103448,
"grad_norm": 0.42680951952934265,
"learning_rate": 2.7789473684210525e-06,
"loss": 0.3143,
"step": 2400
},
{
"epoch": 5.280172413793103,
"grad_norm": 0.5996211767196655,
"learning_rate": 2.7710526315789473e-06,
"loss": 0.2932,
"step": 2450
},
{
"epoch": 5.387931034482759,
"grad_norm": 0.48398110270500183,
"learning_rate": 2.763157894736842e-06,
"loss": 0.287,
"step": 2500
},
{
"epoch": 5.495689655172414,
"grad_norm": 0.6341568231582642,
"learning_rate": 2.7552631578947368e-06,
"loss": 0.3083,
"step": 2550
},
{
"epoch": 5.603448275862069,
"grad_norm": 0.42023149132728577,
"learning_rate": 2.7473684210526315e-06,
"loss": 0.3064,
"step": 2600
},
{
"epoch": 5.711206896551724,
"grad_norm": 0.5808025598526001,
"learning_rate": 2.7394736842105263e-06,
"loss": 0.2893,
"step": 2650
},
{
"epoch": 5.818965517241379,
"grad_norm": 0.5769814252853394,
"learning_rate": 2.7315789473684214e-06,
"loss": 0.2837,
"step": 2700
},
{
"epoch": 5.926724137931035,
"grad_norm": 0.5669300556182861,
"learning_rate": 2.723684210526316e-06,
"loss": 0.2951,
"step": 2750
},
{
"epoch": 6.0344827586206895,
"grad_norm": 0.4516044855117798,
"learning_rate": 2.715789473684211e-06,
"loss": 0.3043,
"step": 2800
},
{
"epoch": 6.142241379310345,
"grad_norm": 0.4493812322616577,
"learning_rate": 2.7078947368421052e-06,
"loss": 0.2882,
"step": 2850
},
{
"epoch": 6.25,
"grad_norm": 0.5748199820518494,
"learning_rate": 2.7e-06,
"loss": 0.2935,
"step": 2900
},
{
"epoch": 6.357758620689655,
"grad_norm": 0.556123673915863,
"learning_rate": 2.6921052631578947e-06,
"loss": 0.2876,
"step": 2950
},
{
"epoch": 6.4655172413793105,
"grad_norm": 0.5574390292167664,
"learning_rate": 2.6842105263157895e-06,
"loss": 0.289,
"step": 3000
},
{
"epoch": 6.573275862068965,
"grad_norm": 0.6384408473968506,
"learning_rate": 2.6763157894736842e-06,
"loss": 0.2916,
"step": 3050
},
{
"epoch": 6.681034482758621,
"grad_norm": 0.5367217659950256,
"learning_rate": 2.668421052631579e-06,
"loss": 0.2747,
"step": 3100
},
{
"epoch": 6.788793103448276,
"grad_norm": 0.7160293459892273,
"learning_rate": 2.6605263157894737e-06,
"loss": 0.2938,
"step": 3150
},
{
"epoch": 6.896551724137931,
"grad_norm": 0.623241662979126,
"learning_rate": 2.6526315789473685e-06,
"loss": 0.2826,
"step": 3200
},
{
"epoch": 7.004310344827586,
"grad_norm": 0.5312695503234863,
"learning_rate": 2.644736842105263e-06,
"loss": 0.2774,
"step": 3250
},
{
"epoch": 7.112068965517241,
"grad_norm": 0.5798662900924683,
"learning_rate": 2.636842105263158e-06,
"loss": 0.2906,
"step": 3300
},
{
"epoch": 7.219827586206897,
"grad_norm": 0.5495738387107849,
"learning_rate": 2.6289473684210527e-06,
"loss": 0.2853,
"step": 3350
},
{
"epoch": 7.327586206896552,
"grad_norm": 0.6797431111335754,
"learning_rate": 2.6210526315789474e-06,
"loss": 0.2723,
"step": 3400
},
{
"epoch": 7.435344827586207,
"grad_norm": 0.6902515292167664,
"learning_rate": 2.613157894736842e-06,
"loss": 0.2785,
"step": 3450
},
{
"epoch": 7.543103448275862,
"grad_norm": 0.6549153327941895,
"learning_rate": 2.605263157894737e-06,
"loss": 0.2826,
"step": 3500
},
{
"epoch": 7.650862068965517,
"grad_norm": 0.6934303641319275,
"learning_rate": 2.5973684210526317e-06,
"loss": 0.279,
"step": 3550
},
{
"epoch": 7.758620689655173,
"grad_norm": 0.5600142478942871,
"learning_rate": 2.5894736842105264e-06,
"loss": 0.2755,
"step": 3600
},
{
"epoch": 7.866379310344827,
"grad_norm": 0.6655827164649963,
"learning_rate": 2.581578947368421e-06,
"loss": 0.2512,
"step": 3650
},
{
"epoch": 7.974137931034483,
"grad_norm": 0.6687513589859009,
"learning_rate": 2.573684210526316e-06,
"loss": 0.2817,
"step": 3700
},
{
"epoch": 8.081896551724139,
"grad_norm": 0.6291229724884033,
"learning_rate": 2.5657894736842107e-06,
"loss": 0.2637,
"step": 3750
},
{
"epoch": 8.189655172413794,
"grad_norm": 0.5600979328155518,
"learning_rate": 2.5578947368421054e-06,
"loss": 0.2808,
"step": 3800
},
{
"epoch": 8.297413793103448,
"grad_norm": 0.7537655830383301,
"learning_rate": 2.55e-06,
"loss": 0.2757,
"step": 3850
},
{
"epoch": 8.405172413793103,
"grad_norm": 0.6032606363296509,
"learning_rate": 2.542105263157895e-06,
"loss": 0.2719,
"step": 3900
},
{
"epoch": 8.512931034482758,
"grad_norm": 0.7251238822937012,
"learning_rate": 2.5342105263157892e-06,
"loss": 0.2828,
"step": 3950
},
{
"epoch": 8.620689655172415,
"grad_norm": 0.5968174338340759,
"learning_rate": 2.526315789473684e-06,
"loss": 0.2584,
"step": 4000
},
{
"epoch": 8.620689655172415,
"eval_loss": 0.2882911264896393,
"eval_runtime": 12265.8085,
"eval_samples_per_second": 0.083,
"eval_steps_per_second": 0.01,
"eval_wer": 47.524039697639736,
"step": 4000
},
{
"epoch": 8.72844827586207,
"grad_norm": 0.5381304621696472,
"learning_rate": 2.5184210526315787e-06,
"loss": 0.2712,
"step": 4050
},
{
"epoch": 8.836206896551724,
"grad_norm": 0.5945307612419128,
"learning_rate": 2.510526315789474e-06,
"loss": 0.2562,
"step": 4100
},
{
"epoch": 8.943965517241379,
"grad_norm": 0.6147953867912292,
"learning_rate": 2.5026315789473686e-06,
"loss": 0.2604,
"step": 4150
},
{
"epoch": 9.051724137931034,
"grad_norm": 0.6225630640983582,
"learning_rate": 2.4947368421052634e-06,
"loss": 0.2712,
"step": 4200
},
{
"epoch": 9.15948275862069,
"grad_norm": 0.6314706206321716,
"learning_rate": 2.486842105263158e-06,
"loss": 0.2522,
"step": 4250
},
{
"epoch": 9.267241379310345,
"grad_norm": 0.7159410119056702,
"learning_rate": 2.478947368421053e-06,
"loss": 0.2554,
"step": 4300
},
{
"epoch": 9.375,
"grad_norm": 0.7329779267311096,
"learning_rate": 2.4710526315789476e-06,
"loss": 0.2648,
"step": 4350
},
{
"epoch": 9.482758620689655,
"grad_norm": 0.6220468878746033,
"learning_rate": 2.4631578947368424e-06,
"loss": 0.2753,
"step": 4400
},
{
"epoch": 9.59051724137931,
"grad_norm": 0.697035551071167,
"learning_rate": 2.4552631578947367e-06,
"loss": 0.2636,
"step": 4450
},
{
"epoch": 9.698275862068966,
"grad_norm": 0.6941649913787842,
"learning_rate": 2.4473684210526314e-06,
"loss": 0.2547,
"step": 4500
},
{
"epoch": 9.806034482758621,
"grad_norm": 0.525019109249115,
"learning_rate": 2.439473684210526e-06,
"loss": 0.2574,
"step": 4550
},
{
"epoch": 9.913793103448276,
"grad_norm": 0.652886688709259,
"learning_rate": 2.431578947368421e-06,
"loss": 0.2629,
"step": 4600
},
{
"epoch": 10.02155172413793,
"grad_norm": 0.8766529560089111,
"learning_rate": 2.4236842105263157e-06,
"loss": 0.2546,
"step": 4650
},
{
"epoch": 10.129310344827585,
"grad_norm": 0.5778140425682068,
"learning_rate": 2.4157894736842104e-06,
"loss": 0.2675,
"step": 4700
},
{
"epoch": 10.237068965517242,
"grad_norm": 0.6798537373542786,
"learning_rate": 2.4078947368421056e-06,
"loss": 0.259,
"step": 4750
},
{
"epoch": 10.344827586206897,
"grad_norm": 0.5612985491752625,
"learning_rate": 2.4000000000000003e-06,
"loss": 0.2539,
"step": 4800
},
{
"epoch": 10.452586206896552,
"grad_norm": 0.7032902240753174,
"learning_rate": 2.392105263157895e-06,
"loss": 0.2553,
"step": 4850
},
{
"epoch": 10.560344827586206,
"grad_norm": 0.7675493359565735,
"learning_rate": 2.38421052631579e-06,
"loss": 0.2558,
"step": 4900
},
{
"epoch": 10.668103448275861,
"grad_norm": 0.6644128561019897,
"learning_rate": 2.376315789473684e-06,
"loss": 0.2611,
"step": 4950
},
{
"epoch": 10.775862068965518,
"grad_norm": 0.7273836135864258,
"learning_rate": 2.368421052631579e-06,
"loss": 0.2448,
"step": 5000
},
{
"epoch": 10.883620689655173,
"grad_norm": 0.7355319857597351,
"learning_rate": 2.3605263157894736e-06,
"loss": 0.2516,
"step": 5050
},
{
"epoch": 10.991379310344827,
"grad_norm": 0.7023867964744568,
"learning_rate": 2.3526315789473684e-06,
"loss": 0.2486,
"step": 5100
},
{
"epoch": 11.099137931034482,
"grad_norm": 0.7921417355537415,
"learning_rate": 2.344736842105263e-06,
"loss": 0.2379,
"step": 5150
},
{
"epoch": 11.206896551724139,
"grad_norm": 0.7202710509300232,
"learning_rate": 2.336842105263158e-06,
"loss": 0.2474,
"step": 5200
},
{
"epoch": 11.314655172413794,
"grad_norm": 0.7898572087287903,
"learning_rate": 2.3289473684210526e-06,
"loss": 0.2724,
"step": 5250
},
{
"epoch": 11.422413793103448,
"grad_norm": 0.6319573521614075,
"learning_rate": 2.3210526315789473e-06,
"loss": 0.2532,
"step": 5300
},
{
"epoch": 11.530172413793103,
"grad_norm": 0.717001736164093,
"learning_rate": 2.313157894736842e-06,
"loss": 0.2448,
"step": 5350
},
{
"epoch": 11.637931034482758,
"grad_norm": 0.8957849740982056,
"learning_rate": 2.305263157894737e-06,
"loss": 0.2584,
"step": 5400
},
{
"epoch": 11.745689655172415,
"grad_norm": 0.6746819019317627,
"learning_rate": 2.2973684210526316e-06,
"loss": 0.2425,
"step": 5450
},
{
"epoch": 11.85344827586207,
"grad_norm": 0.7924005389213562,
"learning_rate": 2.2894736842105263e-06,
"loss": 0.245,
"step": 5500
},
{
"epoch": 11.961206896551724,
"grad_norm": 0.8761442303657532,
"learning_rate": 2.281578947368421e-06,
"loss": 0.253,
"step": 5550
},
{
"epoch": 12.068965517241379,
"grad_norm": 0.6118531823158264,
"learning_rate": 2.273684210526316e-06,
"loss": 0.2492,
"step": 5600
},
{
"epoch": 12.176724137931034,
"grad_norm": 0.698029637336731,
"learning_rate": 2.2657894736842106e-06,
"loss": 0.2538,
"step": 5650
},
{
"epoch": 12.28448275862069,
"grad_norm": 0.6611655354499817,
"learning_rate": 2.2578947368421053e-06,
"loss": 0.2442,
"step": 5700
},
{
"epoch": 12.392241379310345,
"grad_norm": 0.7785851955413818,
"learning_rate": 2.25e-06,
"loss": 0.2592,
"step": 5750
},
{
"epoch": 12.5,
"grad_norm": 0.7249414920806885,
"learning_rate": 2.242105263157895e-06,
"loss": 0.2394,
"step": 5800
},
{
"epoch": 12.607758620689655,
"grad_norm": 0.8529052734375,
"learning_rate": 2.2342105263157895e-06,
"loss": 0.2353,
"step": 5850
},
{
"epoch": 12.71551724137931,
"grad_norm": 0.825858473777771,
"learning_rate": 2.2263157894736843e-06,
"loss": 0.2391,
"step": 5900
},
{
"epoch": 12.823275862068966,
"grad_norm": 0.9452285170555115,
"learning_rate": 2.218421052631579e-06,
"loss": 0.2522,
"step": 5950
},
{
"epoch": 12.931034482758621,
"grad_norm": 0.744655430316925,
"learning_rate": 2.2105263157894738e-06,
"loss": 0.2474,
"step": 6000
},
{
"epoch": 12.931034482758621,
"eval_loss": 0.270245224237442,
"eval_runtime": 12887.0574,
"eval_samples_per_second": 0.079,
"eval_steps_per_second": 0.01,
"eval_wer": 45.25119555715534,
"step": 6000
},
{
"epoch": 13.038793103448276,
"grad_norm": 0.6413202285766602,
"learning_rate": 2.2026315789473685e-06,
"loss": 0.2392,
"step": 6050
},
{
"epoch": 13.14655172413793,
"grad_norm": 1.1068989038467407,
"learning_rate": 2.1947368421052633e-06,
"loss": 0.2394,
"step": 6100
},
{
"epoch": 13.254310344827585,
"grad_norm": 0.7797178030014038,
"learning_rate": 2.186842105263158e-06,
"loss": 0.2501,
"step": 6150
},
{
"epoch": 13.362068965517242,
"grad_norm": 0.7657078504562378,
"learning_rate": 2.1789473684210528e-06,
"loss": 0.2466,
"step": 6200
},
{
"epoch": 13.469827586206897,
"grad_norm": 0.6744815111160278,
"learning_rate": 2.1710526315789475e-06,
"loss": 0.2302,
"step": 6250
},
{
"epoch": 13.577586206896552,
"grad_norm": 0.662436842918396,
"learning_rate": 2.1631578947368423e-06,
"loss": 0.2485,
"step": 6300
},
{
"epoch": 13.685344827586206,
"grad_norm": 0.5807084441184998,
"learning_rate": 2.155263157894737e-06,
"loss": 0.2434,
"step": 6350
},
{
"epoch": 13.793103448275861,
"grad_norm": 0.7120242118835449,
"learning_rate": 2.1473684210526317e-06,
"loss": 0.2397,
"step": 6400
},
{
"epoch": 13.900862068965518,
"grad_norm": 0.7638838291168213,
"learning_rate": 2.1394736842105265e-06,
"loss": 0.235,
"step": 6450
},
{
"epoch": 14.008620689655173,
"grad_norm": 0.6631995439529419,
"learning_rate": 2.1315789473684212e-06,
"loss": 0.2431,
"step": 6500
},
{
"epoch": 14.116379310344827,
"grad_norm": 0.7738016247749329,
"learning_rate": 2.123684210526316e-06,
"loss": 0.2463,
"step": 6550
},
{
"epoch": 14.224137931034482,
"grad_norm": 0.6762473583221436,
"learning_rate": 2.1157894736842103e-06,
"loss": 0.2333,
"step": 6600
},
{
"epoch": 14.331896551724139,
"grad_norm": 0.5386962890625,
"learning_rate": 2.107894736842105e-06,
"loss": 0.2224,
"step": 6650
},
{
"epoch": 14.439655172413794,
"grad_norm": 0.7853811383247375,
"learning_rate": 2.1e-06,
"loss": 0.2522,
"step": 6700
},
{
"epoch": 14.547413793103448,
"grad_norm": 0.849454402923584,
"learning_rate": 2.0921052631578945e-06,
"loss": 0.2288,
"step": 6750
},
{
"epoch": 14.655172413793103,
"grad_norm": 0.8840324878692627,
"learning_rate": 2.0842105263157897e-06,
"loss": 0.2347,
"step": 6800
},
{
"epoch": 14.762931034482758,
"grad_norm": 0.753038227558136,
"learning_rate": 2.0763157894736845e-06,
"loss": 0.2346,
"step": 6850
},
{
"epoch": 14.870689655172415,
"grad_norm": 0.5537305474281311,
"learning_rate": 2.068421052631579e-06,
"loss": 0.2377,
"step": 6900
},
{
"epoch": 14.97844827586207,
"grad_norm": 0.7764011025428772,
"learning_rate": 2.060526315789474e-06,
"loss": 0.2396,
"step": 6950
},
{
"epoch": 15.086206896551724,
"grad_norm": 0.8520766496658325,
"learning_rate": 2.0526315789473687e-06,
"loss": 0.2345,
"step": 7000
},
{
"epoch": 15.193965517241379,
"grad_norm": 0.6806856989860535,
"learning_rate": 2.0447368421052634e-06,
"loss": 0.2323,
"step": 7050
},
{
"epoch": 15.301724137931034,
"grad_norm": 0.8488845825195312,
"learning_rate": 2.0368421052631578e-06,
"loss": 0.2315,
"step": 7100
},
{
"epoch": 15.40948275862069,
"grad_norm": 0.7247233390808105,
"learning_rate": 2.0289473684210525e-06,
"loss": 0.226,
"step": 7150
},
{
"epoch": 15.517241379310345,
"grad_norm": 0.9292821288108826,
"learning_rate": 2.0210526315789473e-06,
"loss": 0.2517,
"step": 7200
},
{
"epoch": 15.625,
"grad_norm": 0.7515670657157898,
"learning_rate": 2.013157894736842e-06,
"loss": 0.2407,
"step": 7250
},
{
"epoch": 15.732758620689655,
"grad_norm": 0.6184987425804138,
"learning_rate": 2.0052631578947367e-06,
"loss": 0.2267,
"step": 7300
},
{
"epoch": 15.84051724137931,
"grad_norm": 0.9107720255851746,
"learning_rate": 1.9973684210526315e-06,
"loss": 0.2251,
"step": 7350
},
{
"epoch": 15.948275862068966,
"grad_norm": 0.6925145387649536,
"learning_rate": 1.9894736842105262e-06,
"loss": 0.2433,
"step": 7400
},
{
"epoch": 16.05603448275862,
"grad_norm": 0.8509221076965332,
"learning_rate": 1.9815789473684214e-06,
"loss": 0.2255,
"step": 7450
},
{
"epoch": 16.163793103448278,
"grad_norm": 0.835840106010437,
"learning_rate": 1.973684210526316e-06,
"loss": 0.24,
"step": 7500
},
{
"epoch": 16.271551724137932,
"grad_norm": 0.8754560351371765,
"learning_rate": 1.965789473684211e-06,
"loss": 0.2315,
"step": 7550
},
{
"epoch": 16.379310344827587,
"grad_norm": 0.8880627751350403,
"learning_rate": 1.9578947368421052e-06,
"loss": 0.2281,
"step": 7600
},
{
"epoch": 16.487068965517242,
"grad_norm": 0.8073711395263672,
"learning_rate": 1.95e-06,
"loss": 0.2347,
"step": 7650
},
{
"epoch": 16.594827586206897,
"grad_norm": 0.839345395565033,
"learning_rate": 1.9421052631578947e-06,
"loss": 0.2295,
"step": 7700
},
{
"epoch": 16.70258620689655,
"grad_norm": 0.8276099562644958,
"learning_rate": 1.9342105263157895e-06,
"loss": 0.2331,
"step": 7750
},
{
"epoch": 16.810344827586206,
"grad_norm": 0.9326604008674622,
"learning_rate": 1.926315789473684e-06,
"loss": 0.2257,
"step": 7800
},
{
"epoch": 16.91810344827586,
"grad_norm": 0.751990020275116,
"learning_rate": 1.918421052631579e-06,
"loss": 0.2355,
"step": 7850
},
{
"epoch": 17.025862068965516,
"grad_norm": 0.9207496643066406,
"learning_rate": 1.9105263157894737e-06,
"loss": 0.2289,
"step": 7900
},
{
"epoch": 17.13362068965517,
"grad_norm": 0.8190683126449585,
"learning_rate": 1.9026315789473684e-06,
"loss": 0.2304,
"step": 7950
},
{
"epoch": 17.24137931034483,
"grad_norm": 0.8016071319580078,
"learning_rate": 1.8947368421052632e-06,
"loss": 0.2292,
"step": 8000
},
{
"epoch": 17.24137931034483,
"eval_loss": 0.26053452491760254,
"eval_runtime": 12992.487,
"eval_samples_per_second": 0.078,
"eval_steps_per_second": 0.01,
"eval_wer": 44.0582094924667,
"step": 8000
}
],
"logging_steps": 50,
"max_steps": 20000,
"num_input_tokens_seen": 0,
"num_train_epochs": 44,
"save_steps": 2000,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.1975917395968e+20,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}