|
{ |
|
"best_metric": 44.0582094924667, |
|
"best_model_checkpoint": "./whisper-large-v3-Marathi-Version1/checkpoint-8000", |
|
"epoch": 17.24137931034483, |
|
"eval_steps": 2000, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10775862068965517, |
|
"grad_norm": 0.3231394290924072, |
|
"learning_rate": 1.5000000000000002e-07, |
|
"loss": 0.5889, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.21551724137931033, |
|
"grad_norm": 0.3586913049221039, |
|
"learning_rate": 3.0000000000000004e-07, |
|
"loss": 0.5863, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3232758620689655, |
|
"grad_norm": 0.415686696767807, |
|
"learning_rate": 4.5e-07, |
|
"loss": 0.5768, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.43103448275862066, |
|
"grad_norm": 0.4167291224002838, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 0.5821, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5387931034482759, |
|
"grad_norm": 0.34522315859794617, |
|
"learning_rate": 7.5e-07, |
|
"loss": 0.5665, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.646551724137931, |
|
"grad_norm": 0.3691834807395935, |
|
"learning_rate": 9e-07, |
|
"loss": 0.5628, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7543103448275862, |
|
"grad_norm": 0.344501793384552, |
|
"learning_rate": 1.05e-06, |
|
"loss": 0.5705, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8620689655172413, |
|
"grad_norm": 0.2788464426994324, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 0.5616, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9698275862068966, |
|
"grad_norm": 0.27220970392227173, |
|
"learning_rate": 1.35e-06, |
|
"loss": 0.5344, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.0775862068965518, |
|
"grad_norm": 0.2518285810947418, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.5566, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.1853448275862069, |
|
"grad_norm": 0.29989466071128845, |
|
"learning_rate": 1.65e-06, |
|
"loss": 0.5213, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.293103448275862, |
|
"grad_norm": 0.26115870475769043, |
|
"learning_rate": 1.8e-06, |
|
"loss": 0.5062, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.4008620689655173, |
|
"grad_norm": 0.2464788407087326, |
|
"learning_rate": 1.95e-06, |
|
"loss": 0.5022, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.5086206896551724, |
|
"grad_norm": 0.2869219183921814, |
|
"learning_rate": 2.1e-06, |
|
"loss": 0.4828, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.6163793103448276, |
|
"grad_norm": 0.2619161307811737, |
|
"learning_rate": 2.25e-06, |
|
"loss": 0.4896, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.7241379310344827, |
|
"grad_norm": 0.33810245990753174, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.4639, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.831896551724138, |
|
"grad_norm": 0.2645559012889862, |
|
"learning_rate": 2.55e-06, |
|
"loss": 0.4464, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.9396551724137931, |
|
"grad_norm": 0.2778465151786804, |
|
"learning_rate": 2.7e-06, |
|
"loss": 0.4273, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.0474137931034484, |
|
"grad_norm": 0.2977587580680847, |
|
"learning_rate": 2.85e-06, |
|
"loss": 0.401, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.1551724137931036, |
|
"grad_norm": 0.3463541567325592, |
|
"learning_rate": 3e-06, |
|
"loss": 0.4094, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.2629310344827585, |
|
"grad_norm": 0.26817262172698975, |
|
"learning_rate": 2.992105263157895e-06, |
|
"loss": 0.3967, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.3706896551724137, |
|
"grad_norm": 0.34868425130844116, |
|
"learning_rate": 2.9842105263157896e-06, |
|
"loss": 0.3987, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.478448275862069, |
|
"grad_norm": 0.33089494705200195, |
|
"learning_rate": 2.9763157894736843e-06, |
|
"loss": 0.3953, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.586206896551724, |
|
"grad_norm": 0.3616925776004791, |
|
"learning_rate": 2.968421052631579e-06, |
|
"loss": 0.3668, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.6939655172413794, |
|
"grad_norm": 0.33365535736083984, |
|
"learning_rate": 2.960526315789474e-06, |
|
"loss": 0.3735, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.8017241379310347, |
|
"grad_norm": 0.3913170099258423, |
|
"learning_rate": 2.9526315789473685e-06, |
|
"loss": 0.3632, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.9094827586206895, |
|
"grad_norm": 0.2940693497657776, |
|
"learning_rate": 2.9447368421052633e-06, |
|
"loss": 0.359, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.0172413793103448, |
|
"grad_norm": 0.3293132781982422, |
|
"learning_rate": 2.936842105263158e-06, |
|
"loss": 0.3824, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.125, |
|
"grad_norm": 0.34137511253356934, |
|
"learning_rate": 2.9289473684210528e-06, |
|
"loss": 0.3616, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.2327586206896552, |
|
"grad_norm": 0.34217217564582825, |
|
"learning_rate": 2.9210526315789475e-06, |
|
"loss": 0.3525, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.3405172413793105, |
|
"grad_norm": 0.39114320278167725, |
|
"learning_rate": 2.9131578947368423e-06, |
|
"loss": 0.3413, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.4482758620689653, |
|
"grad_norm": 0.3646846115589142, |
|
"learning_rate": 2.905263157894737e-06, |
|
"loss": 0.3385, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.5560344827586206, |
|
"grad_norm": 0.4652530550956726, |
|
"learning_rate": 2.8973684210526318e-06, |
|
"loss": 0.3534, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.663793103448276, |
|
"grad_norm": 0.3728253245353699, |
|
"learning_rate": 2.8894736842105265e-06, |
|
"loss": 0.3392, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.771551724137931, |
|
"grad_norm": 0.4309278428554535, |
|
"learning_rate": 2.8815789473684213e-06, |
|
"loss": 0.3249, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 3.8793103448275863, |
|
"grad_norm": 0.4407905042171478, |
|
"learning_rate": 2.873684210526316e-06, |
|
"loss": 0.3218, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.987068965517241, |
|
"grad_norm": 0.41783958673477173, |
|
"learning_rate": 2.8657894736842103e-06, |
|
"loss": 0.3285, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 4.094827586206897, |
|
"grad_norm": 0.38858509063720703, |
|
"learning_rate": 2.857894736842105e-06, |
|
"loss": 0.3292, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.202586206896552, |
|
"grad_norm": 0.37383902072906494, |
|
"learning_rate": 2.85e-06, |
|
"loss": 0.3252, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 4.310344827586207, |
|
"grad_norm": 0.5148123502731323, |
|
"learning_rate": 2.8421052631578946e-06, |
|
"loss": 0.315, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.310344827586207, |
|
"eval_loss": 0.3307957649230957, |
|
"eval_runtime": 5462.6499, |
|
"eval_samples_per_second": 0.186, |
|
"eval_steps_per_second": 0.023, |
|
"eval_wer": 52.398827582660566, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.418103448275862, |
|
"grad_norm": 0.4609113931655884, |
|
"learning_rate": 2.8342105263157897e-06, |
|
"loss": 0.318, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 4.525862068965517, |
|
"grad_norm": 0.502840518951416, |
|
"learning_rate": 2.8263157894736845e-06, |
|
"loss": 0.3232, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.633620689655173, |
|
"grad_norm": 0.4469599425792694, |
|
"learning_rate": 2.8184210526315792e-06, |
|
"loss": 0.3118, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 4.741379310344827, |
|
"grad_norm": 0.4370408356189728, |
|
"learning_rate": 2.810526315789474e-06, |
|
"loss": 0.3068, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.849137931034483, |
|
"grad_norm": 0.46128618717193604, |
|
"learning_rate": 2.8026315789473687e-06, |
|
"loss": 0.3154, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 4.956896551724138, |
|
"grad_norm": 0.5146879553794861, |
|
"learning_rate": 2.7947368421052635e-06, |
|
"loss": 0.3102, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 5.064655172413793, |
|
"grad_norm": 0.5018391609191895, |
|
"learning_rate": 2.7868421052631578e-06, |
|
"loss": 0.3113, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 5.172413793103448, |
|
"grad_norm": 0.42680951952934265, |
|
"learning_rate": 2.7789473684210525e-06, |
|
"loss": 0.3143, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.280172413793103, |
|
"grad_norm": 0.5996211767196655, |
|
"learning_rate": 2.7710526315789473e-06, |
|
"loss": 0.2932, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 5.387931034482759, |
|
"grad_norm": 0.48398110270500183, |
|
"learning_rate": 2.763157894736842e-06, |
|
"loss": 0.287, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.495689655172414, |
|
"grad_norm": 0.6341568231582642, |
|
"learning_rate": 2.7552631578947368e-06, |
|
"loss": 0.3083, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 5.603448275862069, |
|
"grad_norm": 0.42023149132728577, |
|
"learning_rate": 2.7473684210526315e-06, |
|
"loss": 0.3064, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.711206896551724, |
|
"grad_norm": 0.5808025598526001, |
|
"learning_rate": 2.7394736842105263e-06, |
|
"loss": 0.2893, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 5.818965517241379, |
|
"grad_norm": 0.5769814252853394, |
|
"learning_rate": 2.7315789473684214e-06, |
|
"loss": 0.2837, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 5.926724137931035, |
|
"grad_norm": 0.5669300556182861, |
|
"learning_rate": 2.723684210526316e-06, |
|
"loss": 0.2951, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 6.0344827586206895, |
|
"grad_norm": 0.4516044855117798, |
|
"learning_rate": 2.715789473684211e-06, |
|
"loss": 0.3043, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.142241379310345, |
|
"grad_norm": 0.4493812322616577, |
|
"learning_rate": 2.7078947368421052e-06, |
|
"loss": 0.2882, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 0.5748199820518494, |
|
"learning_rate": 2.7e-06, |
|
"loss": 0.2935, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 6.357758620689655, |
|
"grad_norm": 0.556123673915863, |
|
"learning_rate": 2.6921052631578947e-06, |
|
"loss": 0.2876, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 6.4655172413793105, |
|
"grad_norm": 0.5574390292167664, |
|
"learning_rate": 2.6842105263157895e-06, |
|
"loss": 0.289, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.573275862068965, |
|
"grad_norm": 0.6384408473968506, |
|
"learning_rate": 2.6763157894736842e-06, |
|
"loss": 0.2916, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 6.681034482758621, |
|
"grad_norm": 0.5367217659950256, |
|
"learning_rate": 2.668421052631579e-06, |
|
"loss": 0.2747, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 6.788793103448276, |
|
"grad_norm": 0.7160293459892273, |
|
"learning_rate": 2.6605263157894737e-06, |
|
"loss": 0.2938, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 6.896551724137931, |
|
"grad_norm": 0.623241662979126, |
|
"learning_rate": 2.6526315789473685e-06, |
|
"loss": 0.2826, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 7.004310344827586, |
|
"grad_norm": 0.5312695503234863, |
|
"learning_rate": 2.644736842105263e-06, |
|
"loss": 0.2774, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 7.112068965517241, |
|
"grad_norm": 0.5798662900924683, |
|
"learning_rate": 2.636842105263158e-06, |
|
"loss": 0.2906, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 7.219827586206897, |
|
"grad_norm": 0.5495738387107849, |
|
"learning_rate": 2.6289473684210527e-06, |
|
"loss": 0.2853, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 7.327586206896552, |
|
"grad_norm": 0.6797431111335754, |
|
"learning_rate": 2.6210526315789474e-06, |
|
"loss": 0.2723, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 7.435344827586207, |
|
"grad_norm": 0.6902515292167664, |
|
"learning_rate": 2.613157894736842e-06, |
|
"loss": 0.2785, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 7.543103448275862, |
|
"grad_norm": 0.6549153327941895, |
|
"learning_rate": 2.605263157894737e-06, |
|
"loss": 0.2826, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.650862068965517, |
|
"grad_norm": 0.6934303641319275, |
|
"learning_rate": 2.5973684210526317e-06, |
|
"loss": 0.279, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 7.758620689655173, |
|
"grad_norm": 0.5600142478942871, |
|
"learning_rate": 2.5894736842105264e-06, |
|
"loss": 0.2755, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 7.866379310344827, |
|
"grad_norm": 0.6655827164649963, |
|
"learning_rate": 2.581578947368421e-06, |
|
"loss": 0.2512, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 7.974137931034483, |
|
"grad_norm": 0.6687513589859009, |
|
"learning_rate": 2.573684210526316e-06, |
|
"loss": 0.2817, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 8.081896551724139, |
|
"grad_norm": 0.6291229724884033, |
|
"learning_rate": 2.5657894736842107e-06, |
|
"loss": 0.2637, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 8.189655172413794, |
|
"grad_norm": 0.5600979328155518, |
|
"learning_rate": 2.5578947368421054e-06, |
|
"loss": 0.2808, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 8.297413793103448, |
|
"grad_norm": 0.7537655830383301, |
|
"learning_rate": 2.55e-06, |
|
"loss": 0.2757, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 8.405172413793103, |
|
"grad_norm": 0.6032606363296509, |
|
"learning_rate": 2.542105263157895e-06, |
|
"loss": 0.2719, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 8.512931034482758, |
|
"grad_norm": 0.7251238822937012, |
|
"learning_rate": 2.5342105263157892e-06, |
|
"loss": 0.2828, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 8.620689655172415, |
|
"grad_norm": 0.5968174338340759, |
|
"learning_rate": 2.526315789473684e-06, |
|
"loss": 0.2584, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.620689655172415, |
|
"eval_loss": 0.2882911264896393, |
|
"eval_runtime": 12265.8085, |
|
"eval_samples_per_second": 0.083, |
|
"eval_steps_per_second": 0.01, |
|
"eval_wer": 47.524039697639736, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.72844827586207, |
|
"grad_norm": 0.5381304621696472, |
|
"learning_rate": 2.5184210526315787e-06, |
|
"loss": 0.2712, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 8.836206896551724, |
|
"grad_norm": 0.5945307612419128, |
|
"learning_rate": 2.510526315789474e-06, |
|
"loss": 0.2562, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 8.943965517241379, |
|
"grad_norm": 0.6147953867912292, |
|
"learning_rate": 2.5026315789473686e-06, |
|
"loss": 0.2604, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 9.051724137931034, |
|
"grad_norm": 0.6225630640983582, |
|
"learning_rate": 2.4947368421052634e-06, |
|
"loss": 0.2712, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 9.15948275862069, |
|
"grad_norm": 0.6314706206321716, |
|
"learning_rate": 2.486842105263158e-06, |
|
"loss": 0.2522, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 9.267241379310345, |
|
"grad_norm": 0.7159410119056702, |
|
"learning_rate": 2.478947368421053e-06, |
|
"loss": 0.2554, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 9.375, |
|
"grad_norm": 0.7329779267311096, |
|
"learning_rate": 2.4710526315789476e-06, |
|
"loss": 0.2648, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 9.482758620689655, |
|
"grad_norm": 0.6220468878746033, |
|
"learning_rate": 2.4631578947368424e-06, |
|
"loss": 0.2753, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 9.59051724137931, |
|
"grad_norm": 0.697035551071167, |
|
"learning_rate": 2.4552631578947367e-06, |
|
"loss": 0.2636, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 9.698275862068966, |
|
"grad_norm": 0.6941649913787842, |
|
"learning_rate": 2.4473684210526314e-06, |
|
"loss": 0.2547, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.806034482758621, |
|
"grad_norm": 0.525019109249115, |
|
"learning_rate": 2.439473684210526e-06, |
|
"loss": 0.2574, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 9.913793103448276, |
|
"grad_norm": 0.652886688709259, |
|
"learning_rate": 2.431578947368421e-06, |
|
"loss": 0.2629, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 10.02155172413793, |
|
"grad_norm": 0.8766529560089111, |
|
"learning_rate": 2.4236842105263157e-06, |
|
"loss": 0.2546, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 10.129310344827585, |
|
"grad_norm": 0.5778140425682068, |
|
"learning_rate": 2.4157894736842104e-06, |
|
"loss": 0.2675, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 10.237068965517242, |
|
"grad_norm": 0.6798537373542786, |
|
"learning_rate": 2.4078947368421056e-06, |
|
"loss": 0.259, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 10.344827586206897, |
|
"grad_norm": 0.5612985491752625, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.2539, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 10.452586206896552, |
|
"grad_norm": 0.7032902240753174, |
|
"learning_rate": 2.392105263157895e-06, |
|
"loss": 0.2553, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 10.560344827586206, |
|
"grad_norm": 0.7675493359565735, |
|
"learning_rate": 2.38421052631579e-06, |
|
"loss": 0.2558, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 10.668103448275861, |
|
"grad_norm": 0.6644128561019897, |
|
"learning_rate": 2.376315789473684e-06, |
|
"loss": 0.2611, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 10.775862068965518, |
|
"grad_norm": 0.7273836135864258, |
|
"learning_rate": 2.368421052631579e-06, |
|
"loss": 0.2448, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.883620689655173, |
|
"grad_norm": 0.7355319857597351, |
|
"learning_rate": 2.3605263157894736e-06, |
|
"loss": 0.2516, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 10.991379310344827, |
|
"grad_norm": 0.7023867964744568, |
|
"learning_rate": 2.3526315789473684e-06, |
|
"loss": 0.2486, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 11.099137931034482, |
|
"grad_norm": 0.7921417355537415, |
|
"learning_rate": 2.344736842105263e-06, |
|
"loss": 0.2379, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 11.206896551724139, |
|
"grad_norm": 0.7202710509300232, |
|
"learning_rate": 2.336842105263158e-06, |
|
"loss": 0.2474, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 11.314655172413794, |
|
"grad_norm": 0.7898572087287903, |
|
"learning_rate": 2.3289473684210526e-06, |
|
"loss": 0.2724, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 11.422413793103448, |
|
"grad_norm": 0.6319573521614075, |
|
"learning_rate": 2.3210526315789473e-06, |
|
"loss": 0.2532, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 11.530172413793103, |
|
"grad_norm": 0.717001736164093, |
|
"learning_rate": 2.313157894736842e-06, |
|
"loss": 0.2448, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 11.637931034482758, |
|
"grad_norm": 0.8957849740982056, |
|
"learning_rate": 2.305263157894737e-06, |
|
"loss": 0.2584, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 11.745689655172415, |
|
"grad_norm": 0.6746819019317627, |
|
"learning_rate": 2.2973684210526316e-06, |
|
"loss": 0.2425, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 11.85344827586207, |
|
"grad_norm": 0.7924005389213562, |
|
"learning_rate": 2.2894736842105263e-06, |
|
"loss": 0.245, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 11.961206896551724, |
|
"grad_norm": 0.8761442303657532, |
|
"learning_rate": 2.281578947368421e-06, |
|
"loss": 0.253, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 12.068965517241379, |
|
"grad_norm": 0.6118531823158264, |
|
"learning_rate": 2.273684210526316e-06, |
|
"loss": 0.2492, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 12.176724137931034, |
|
"grad_norm": 0.698029637336731, |
|
"learning_rate": 2.2657894736842106e-06, |
|
"loss": 0.2538, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 12.28448275862069, |
|
"grad_norm": 0.6611655354499817, |
|
"learning_rate": 2.2578947368421053e-06, |
|
"loss": 0.2442, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 12.392241379310345, |
|
"grad_norm": 0.7785851955413818, |
|
"learning_rate": 2.25e-06, |
|
"loss": 0.2592, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 0.7249414920806885, |
|
"learning_rate": 2.242105263157895e-06, |
|
"loss": 0.2394, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 12.607758620689655, |
|
"grad_norm": 0.8529052734375, |
|
"learning_rate": 2.2342105263157895e-06, |
|
"loss": 0.2353, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 12.71551724137931, |
|
"grad_norm": 0.825858473777771, |
|
"learning_rate": 2.2263157894736843e-06, |
|
"loss": 0.2391, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 12.823275862068966, |
|
"grad_norm": 0.9452285170555115, |
|
"learning_rate": 2.218421052631579e-06, |
|
"loss": 0.2522, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 12.931034482758621, |
|
"grad_norm": 0.744655430316925, |
|
"learning_rate": 2.2105263157894738e-06, |
|
"loss": 0.2474, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 12.931034482758621, |
|
"eval_loss": 0.270245224237442, |
|
"eval_runtime": 12887.0574, |
|
"eval_samples_per_second": 0.079, |
|
"eval_steps_per_second": 0.01, |
|
"eval_wer": 45.25119555715534, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 13.038793103448276, |
|
"grad_norm": 0.6413202285766602, |
|
"learning_rate": 2.2026315789473685e-06, |
|
"loss": 0.2392, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 13.14655172413793, |
|
"grad_norm": 1.1068989038467407, |
|
"learning_rate": 2.1947368421052633e-06, |
|
"loss": 0.2394, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 13.254310344827585, |
|
"grad_norm": 0.7797178030014038, |
|
"learning_rate": 2.186842105263158e-06, |
|
"loss": 0.2501, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 13.362068965517242, |
|
"grad_norm": 0.7657078504562378, |
|
"learning_rate": 2.1789473684210528e-06, |
|
"loss": 0.2466, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 13.469827586206897, |
|
"grad_norm": 0.6744815111160278, |
|
"learning_rate": 2.1710526315789475e-06, |
|
"loss": 0.2302, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 13.577586206896552, |
|
"grad_norm": 0.662436842918396, |
|
"learning_rate": 2.1631578947368423e-06, |
|
"loss": 0.2485, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 13.685344827586206, |
|
"grad_norm": 0.5807084441184998, |
|
"learning_rate": 2.155263157894737e-06, |
|
"loss": 0.2434, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 13.793103448275861, |
|
"grad_norm": 0.7120242118835449, |
|
"learning_rate": 2.1473684210526317e-06, |
|
"loss": 0.2397, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 13.900862068965518, |
|
"grad_norm": 0.7638838291168213, |
|
"learning_rate": 2.1394736842105265e-06, |
|
"loss": 0.235, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 14.008620689655173, |
|
"grad_norm": 0.6631995439529419, |
|
"learning_rate": 2.1315789473684212e-06, |
|
"loss": 0.2431, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 14.116379310344827, |
|
"grad_norm": 0.7738016247749329, |
|
"learning_rate": 2.123684210526316e-06, |
|
"loss": 0.2463, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 14.224137931034482, |
|
"grad_norm": 0.6762473583221436, |
|
"learning_rate": 2.1157894736842103e-06, |
|
"loss": 0.2333, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 14.331896551724139, |
|
"grad_norm": 0.5386962890625, |
|
"learning_rate": 2.107894736842105e-06, |
|
"loss": 0.2224, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 14.439655172413794, |
|
"grad_norm": 0.7853811383247375, |
|
"learning_rate": 2.1e-06, |
|
"loss": 0.2522, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 14.547413793103448, |
|
"grad_norm": 0.849454402923584, |
|
"learning_rate": 2.0921052631578945e-06, |
|
"loss": 0.2288, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 14.655172413793103, |
|
"grad_norm": 0.8840324878692627, |
|
"learning_rate": 2.0842105263157897e-06, |
|
"loss": 0.2347, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 14.762931034482758, |
|
"grad_norm": 0.753038227558136, |
|
"learning_rate": 2.0763157894736845e-06, |
|
"loss": 0.2346, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 14.870689655172415, |
|
"grad_norm": 0.5537305474281311, |
|
"learning_rate": 2.068421052631579e-06, |
|
"loss": 0.2377, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 14.97844827586207, |
|
"grad_norm": 0.7764011025428772, |
|
"learning_rate": 2.060526315789474e-06, |
|
"loss": 0.2396, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 15.086206896551724, |
|
"grad_norm": 0.8520766496658325, |
|
"learning_rate": 2.0526315789473687e-06, |
|
"loss": 0.2345, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 15.193965517241379, |
|
"grad_norm": 0.6806856989860535, |
|
"learning_rate": 2.0447368421052634e-06, |
|
"loss": 0.2323, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 15.301724137931034, |
|
"grad_norm": 0.8488845825195312, |
|
"learning_rate": 2.0368421052631578e-06, |
|
"loss": 0.2315, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 15.40948275862069, |
|
"grad_norm": 0.7247233390808105, |
|
"learning_rate": 2.0289473684210525e-06, |
|
"loss": 0.226, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 15.517241379310345, |
|
"grad_norm": 0.9292821288108826, |
|
"learning_rate": 2.0210526315789473e-06, |
|
"loss": 0.2517, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 15.625, |
|
"grad_norm": 0.7515670657157898, |
|
"learning_rate": 2.013157894736842e-06, |
|
"loss": 0.2407, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 15.732758620689655, |
|
"grad_norm": 0.6184987425804138, |
|
"learning_rate": 2.0052631578947367e-06, |
|
"loss": 0.2267, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 15.84051724137931, |
|
"grad_norm": 0.9107720255851746, |
|
"learning_rate": 1.9973684210526315e-06, |
|
"loss": 0.2251, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 15.948275862068966, |
|
"grad_norm": 0.6925145387649536, |
|
"learning_rate": 1.9894736842105262e-06, |
|
"loss": 0.2433, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 16.05603448275862, |
|
"grad_norm": 0.8509221076965332, |
|
"learning_rate": 1.9815789473684214e-06, |
|
"loss": 0.2255, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 16.163793103448278, |
|
"grad_norm": 0.835840106010437, |
|
"learning_rate": 1.973684210526316e-06, |
|
"loss": 0.24, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 16.271551724137932, |
|
"grad_norm": 0.8754560351371765, |
|
"learning_rate": 1.965789473684211e-06, |
|
"loss": 0.2315, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 16.379310344827587, |
|
"grad_norm": 0.8880627751350403, |
|
"learning_rate": 1.9578947368421052e-06, |
|
"loss": 0.2281, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 16.487068965517242, |
|
"grad_norm": 0.8073711395263672, |
|
"learning_rate": 1.95e-06, |
|
"loss": 0.2347, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 16.594827586206897, |
|
"grad_norm": 0.839345395565033, |
|
"learning_rate": 1.9421052631578947e-06, |
|
"loss": 0.2295, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 16.70258620689655, |
|
"grad_norm": 0.8276099562644958, |
|
"learning_rate": 1.9342105263157895e-06, |
|
"loss": 0.2331, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 16.810344827586206, |
|
"grad_norm": 0.9326604008674622, |
|
"learning_rate": 1.926315789473684e-06, |
|
"loss": 0.2257, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 16.91810344827586, |
|
"grad_norm": 0.751990020275116, |
|
"learning_rate": 1.918421052631579e-06, |
|
"loss": 0.2355, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 17.025862068965516, |
|
"grad_norm": 0.9207496643066406, |
|
"learning_rate": 1.9105263157894737e-06, |
|
"loss": 0.2289, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 17.13362068965517, |
|
"grad_norm": 0.8190683126449585, |
|
"learning_rate": 1.9026315789473684e-06, |
|
"loss": 0.2304, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 17.24137931034483, |
|
"grad_norm": 0.8016071319580078, |
|
"learning_rate": 1.8947368421052632e-06, |
|
"loss": 0.2292, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 17.24137931034483, |
|
"eval_loss": 0.26053452491760254, |
|
"eval_runtime": 12992.487, |
|
"eval_samples_per_second": 0.078, |
|
"eval_steps_per_second": 0.01, |
|
"eval_wer": 44.0582094924667, |
|
"step": 8000 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 20000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 44, |
|
"save_steps": 2000, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.1975917395968e+20, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|