|
{ |
|
"best_global_step": 234872, |
|
"best_metric": 0.3474995039635673, |
|
"best_model_checkpoint": "facebook-mms-1b-all-common_voice_fleurs-rw-200hrs-v1/checkpoint-234872", |
|
"epoch": 26.0, |
|
"eval_steps": 500, |
|
"global_step": 277576, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 8.114748001098633, |
|
"learning_rate": 9.993443237167478e-06, |
|
"loss": 1.7863, |
|
"step": 10676 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 0.11142080162244088, |
|
"eval_loss": 0.4298412799835205, |
|
"eval_runtime": 370.3842, |
|
"eval_samples_per_second": 36.206, |
|
"eval_steps_per_second": 2.265, |
|
"eval_wer": 0.38987518778522096, |
|
"step": 10676 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 49.95594787597656, |
|
"learning_rate": 1.9989696515548896e-05, |
|
"loss": 0.2748, |
|
"step": 21352 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 0.11646517201831753, |
|
"eval_loss": 0.42387428879737854, |
|
"eval_runtime": 330.7981, |
|
"eval_samples_per_second": 40.538, |
|
"eval_steps_per_second": 2.536, |
|
"eval_wer": 0.4003628152193426, |
|
"step": 21352 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 12.085474967956543, |
|
"learning_rate": 2.998594979393031e-05, |
|
"loss": 0.2545, |
|
"step": 32028 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 0.1220423125205042, |
|
"eval_loss": 0.4541724622249603, |
|
"eval_runtime": 351.2843, |
|
"eval_samples_per_second": 38.174, |
|
"eval_steps_per_second": 2.388, |
|
"eval_wer": 0.4101890607432043, |
|
"step": 32028 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 8.780159950256348, |
|
"learning_rate": 3.998313975271638e-05, |
|
"loss": 0.2486, |
|
"step": 42704 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 0.12309158494361855, |
|
"eval_loss": 0.460758239030838, |
|
"eval_runtime": 312.2958, |
|
"eval_samples_per_second": 42.94, |
|
"eval_steps_per_second": 2.687, |
|
"eval_wer": 0.4010241971296025, |
|
"step": 42704 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 4.321907043457031, |
|
"learning_rate": 4.997939303109779e-05, |
|
"loss": 0.2513, |
|
"step": 53380 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 0.1371239090413663, |
|
"eval_loss": 0.4873207211494446, |
|
"eval_runtime": 302.0004, |
|
"eval_samples_per_second": 44.404, |
|
"eval_steps_per_second": 2.778, |
|
"eval_wer": 0.4268936781337692, |
|
"step": 53380 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 3.6101784706115723, |
|
"learning_rate": 4.947501528268029e-05, |
|
"loss": 0.2481, |
|
"step": 64056 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 0.1327763422959816, |
|
"eval_loss": 0.4789027273654938, |
|
"eval_runtime": 328.5795, |
|
"eval_samples_per_second": 40.812, |
|
"eval_steps_per_second": 2.553, |
|
"eval_wer": 0.41674618996778123, |
|
"step": 64056 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 4.900432109832764, |
|
"learning_rate": 4.89488473901126e-05, |
|
"loss": 0.2243, |
|
"step": 74732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 0.12506541007030397, |
|
"eval_loss": 0.4427885413169861, |
|
"eval_runtime": 373.1444, |
|
"eval_samples_per_second": 35.938, |
|
"eval_steps_per_second": 2.248, |
|
"eval_wer": 0.3932765804665577, |
|
"step": 74732 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.762911558151245, |
|
"learning_rate": 4.842272879651358e-05, |
|
"loss": 0.2074, |
|
"step": 85408 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 0.1276913024263408, |
|
"eval_loss": 0.4647955298423767, |
|
"eval_runtime": 419.1893, |
|
"eval_samples_per_second": 31.99, |
|
"eval_steps_per_second": 2.001, |
|
"eval_wer": 0.39266244012131635, |
|
"step": 85408 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 4.190975666046143, |
|
"learning_rate": 4.789665950188322e-05, |
|
"loss": 0.1951, |
|
"step": 96084 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 0.12099710704476624, |
|
"eval_loss": 0.4297826886177063, |
|
"eval_runtime": 367.3195, |
|
"eval_samples_per_second": 36.508, |
|
"eval_steps_per_second": 2.284, |
|
"eval_wer": 0.3886941486597568, |
|
"step": 96084 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 12.867461204528809, |
|
"learning_rate": 4.7370491609315534e-05, |
|
"loss": 0.183, |
|
"step": 106760 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 0.12186065553769111, |
|
"eval_loss": 0.4488038122653961, |
|
"eval_runtime": 318.5142, |
|
"eval_samples_per_second": 42.102, |
|
"eval_steps_per_second": 2.634, |
|
"eval_wer": 0.3807292207976266, |
|
"step": 106760 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.9338295459747314, |
|
"learning_rate": 4.6844373015716514e-05, |
|
"loss": 0.1704, |
|
"step": 117436 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 0.11717146521268779, |
|
"eval_loss": 0.4291200637817383, |
|
"eval_runtime": 309.0823, |
|
"eval_samples_per_second": 43.386, |
|
"eval_steps_per_second": 2.714, |
|
"eval_wer": 0.36931565868914107, |
|
"step": 117436 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.193000316619873, |
|
"learning_rate": 4.631815582418016e-05, |
|
"loss": 0.1602, |
|
"step": 128112 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 0.11806890493374943, |
|
"eval_loss": 0.4337969422340393, |
|
"eval_runtime": 370.6877, |
|
"eval_samples_per_second": 36.176, |
|
"eval_steps_per_second": 2.263, |
|
"eval_wer": 0.3649127448294107, |
|
"step": 128112 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 4.6738057136535645, |
|
"learning_rate": 4.5792086529549806e-05, |
|
"loss": 0.1503, |
|
"step": 138788 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 0.11863963321557261, |
|
"eval_loss": 0.43087342381477356, |
|
"eval_runtime": 445.2671, |
|
"eval_samples_per_second": 30.117, |
|
"eval_steps_per_second": 1.884, |
|
"eval_wer": 0.36557412673967066, |
|
"step": 138788 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 4.588621616363525, |
|
"learning_rate": 4.526601723491945e-05, |
|
"loss": 0.1417, |
|
"step": 149464 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 0.11495497889254312, |
|
"eval_loss": 0.4210534691810608, |
|
"eval_runtime": 380.9506, |
|
"eval_samples_per_second": 35.201, |
|
"eval_steps_per_second": 2.202, |
|
"eval_wer": 0.36186093972921135, |
|
"step": 149464 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 20.85131072998047, |
|
"learning_rate": 4.473989864132043e-05, |
|
"loss": 0.136, |
|
"step": 160140 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 0.1191358007954949, |
|
"eval_loss": 0.44502612948417664, |
|
"eval_runtime": 358.3686, |
|
"eval_samples_per_second": 37.42, |
|
"eval_steps_per_second": 2.341, |
|
"eval_wer": 0.37130925273292453, |
|
"step": 160140 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.3467574417591095, |
|
"learning_rate": 4.421373074875274e-05, |
|
"loss": 0.1305, |
|
"step": 170816 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_cer": 0.11509596640159207, |
|
"eval_loss": 0.4436805248260498, |
|
"eval_runtime": 352.8374, |
|
"eval_samples_per_second": 38.006, |
|
"eval_steps_per_second": 2.378, |
|
"eval_wer": 0.36488439989039956, |
|
"step": 170816 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 2.450319290161133, |
|
"learning_rate": 4.368761215515372e-05, |
|
"loss": 0.122, |
|
"step": 181492 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_cer": 0.11497666927855064, |
|
"eval_loss": 0.45026466250419617, |
|
"eval_runtime": 361.6111, |
|
"eval_samples_per_second": 37.084, |
|
"eval_steps_per_second": 2.32, |
|
"eval_wer": 0.3567210574551914, |
|
"step": 181492 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 1.8293219804763794, |
|
"learning_rate": 4.316149356155469e-05, |
|
"loss": 0.1139, |
|
"step": 192168 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_cer": 0.11221792330821767, |
|
"eval_loss": 0.4319141209125519, |
|
"eval_runtime": 319.6496, |
|
"eval_samples_per_second": 41.952, |
|
"eval_steps_per_second": 2.625, |
|
"eval_wer": 0.3517795897542494, |
|
"step": 192168 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 121.26933288574219, |
|
"learning_rate": 4.263532566898701e-05, |
|
"loss": 0.1057, |
|
"step": 202844 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_cer": 0.11463504569893201, |
|
"eval_loss": 0.43899470567703247, |
|
"eval_runtime": 318.2195, |
|
"eval_samples_per_second": 42.141, |
|
"eval_steps_per_second": 2.637, |
|
"eval_wer": 0.35932879184421623, |
|
"step": 202844 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 2.178046703338623, |
|
"learning_rate": 4.2109157776419316e-05, |
|
"loss": 0.0997, |
|
"step": 213520 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_cer": 0.11560975742014548, |
|
"eval_loss": 0.45046666264533997, |
|
"eval_runtime": 309.141, |
|
"eval_samples_per_second": 43.378, |
|
"eval_steps_per_second": 2.714, |
|
"eval_wer": 0.35749581912149586, |
|
"step": 213520 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"grad_norm": 82.85478973388672, |
|
"learning_rate": 4.158313778075763e-05, |
|
"loss": 0.0931, |
|
"step": 224196 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_cer": 0.11554197496387195, |
|
"eval_loss": 0.47222137451171875, |
|
"eval_runtime": 297.0885, |
|
"eval_samples_per_second": 45.138, |
|
"eval_steps_per_second": 2.824, |
|
"eval_wer": 0.35703285178431393, |
|
"step": 224196 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 4.658969402313232, |
|
"learning_rate": 4.105696988818994e-05, |
|
"loss": 0.0872, |
|
"step": 234872 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_cer": 0.1130882500467699, |
|
"eval_loss": 0.47128280997276306, |
|
"eval_runtime": 297.5307, |
|
"eval_samples_per_second": 45.071, |
|
"eval_steps_per_second": 2.82, |
|
"eval_wer": 0.3474995039635673, |
|
"step": 234872 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"grad_norm": 31.182987213134766, |
|
"learning_rate": 4.053080199562225e-05, |
|
"loss": 0.0834, |
|
"step": 245548 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_cer": 0.11390977341680517, |
|
"eval_loss": 0.4635840058326721, |
|
"eval_runtime": 294.749, |
|
"eval_samples_per_second": 45.496, |
|
"eval_steps_per_second": 2.846, |
|
"eval_wer": 0.3590169975150937, |
|
"step": 245548 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 3.176016330718994, |
|
"learning_rate": 4.000478199996056e-05, |
|
"loss": 0.0815, |
|
"step": 256224 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_cer": 0.11560975742014548, |
|
"eval_loss": 0.46226194500923157, |
|
"eval_runtime": 299.7969, |
|
"eval_samples_per_second": 44.73, |
|
"eval_steps_per_second": 2.799, |
|
"eval_wer": 0.3642419146061471, |
|
"step": 256224 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 28.0598087310791, |
|
"learning_rate": 3.947851550945554e-05, |
|
"loss": 0.0764, |
|
"step": 266900 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_cer": 0.11135844176266922, |
|
"eval_loss": 0.45431631803512573, |
|
"eval_runtime": 289.8433, |
|
"eval_samples_per_second": 46.266, |
|
"eval_steps_per_second": 2.895, |
|
"eval_wer": 0.348793922845076, |
|
"step": 266900 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"grad_norm": 5.333448886871338, |
|
"learning_rate": 3.895239691585652e-05, |
|
"loss": 0.0729, |
|
"step": 277576 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_cer": 0.11139911123643334, |
|
"eval_loss": 0.4733089804649353, |
|
"eval_runtime": 299.9429, |
|
"eval_samples_per_second": 44.709, |
|
"eval_steps_per_second": 2.797, |
|
"eval_wer": 0.34857661164599063, |
|
"step": 277576 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 1067600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.001 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 4 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.748850144039913e+21, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|