KYAGABA's picture
Force upload all files (including large ones)
aeba0af verified
{
"best_global_step": 234872,
"best_metric": 0.3474995039635673,
"best_model_checkpoint": "facebook-mms-1b-all-common_voice_fleurs-rw-200hrs-v1/checkpoint-234872",
"epoch": 26.0,
"eval_steps": 500,
"global_step": 277576,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 8.114748001098633,
"learning_rate": 9.993443237167478e-06,
"loss": 1.7863,
"step": 10676
},
{
"epoch": 1.0,
"eval_cer": 0.11142080162244088,
"eval_loss": 0.4298412799835205,
"eval_runtime": 370.3842,
"eval_samples_per_second": 36.206,
"eval_steps_per_second": 2.265,
"eval_wer": 0.38987518778522096,
"step": 10676
},
{
"epoch": 2.0,
"grad_norm": 49.95594787597656,
"learning_rate": 1.9989696515548896e-05,
"loss": 0.2748,
"step": 21352
},
{
"epoch": 2.0,
"eval_cer": 0.11646517201831753,
"eval_loss": 0.42387428879737854,
"eval_runtime": 330.7981,
"eval_samples_per_second": 40.538,
"eval_steps_per_second": 2.536,
"eval_wer": 0.4003628152193426,
"step": 21352
},
{
"epoch": 3.0,
"grad_norm": 12.085474967956543,
"learning_rate": 2.998594979393031e-05,
"loss": 0.2545,
"step": 32028
},
{
"epoch": 3.0,
"eval_cer": 0.1220423125205042,
"eval_loss": 0.4541724622249603,
"eval_runtime": 351.2843,
"eval_samples_per_second": 38.174,
"eval_steps_per_second": 2.388,
"eval_wer": 0.4101890607432043,
"step": 32028
},
{
"epoch": 4.0,
"grad_norm": 8.780159950256348,
"learning_rate": 3.998313975271638e-05,
"loss": 0.2486,
"step": 42704
},
{
"epoch": 4.0,
"eval_cer": 0.12309158494361855,
"eval_loss": 0.460758239030838,
"eval_runtime": 312.2958,
"eval_samples_per_second": 42.94,
"eval_steps_per_second": 2.687,
"eval_wer": 0.4010241971296025,
"step": 42704
},
{
"epoch": 5.0,
"grad_norm": 4.321907043457031,
"learning_rate": 4.997939303109779e-05,
"loss": 0.2513,
"step": 53380
},
{
"epoch": 5.0,
"eval_cer": 0.1371239090413663,
"eval_loss": 0.4873207211494446,
"eval_runtime": 302.0004,
"eval_samples_per_second": 44.404,
"eval_steps_per_second": 2.778,
"eval_wer": 0.4268936781337692,
"step": 53380
},
{
"epoch": 6.0,
"grad_norm": 3.6101784706115723,
"learning_rate": 4.947501528268029e-05,
"loss": 0.2481,
"step": 64056
},
{
"epoch": 6.0,
"eval_cer": 0.1327763422959816,
"eval_loss": 0.4789027273654938,
"eval_runtime": 328.5795,
"eval_samples_per_second": 40.812,
"eval_steps_per_second": 2.553,
"eval_wer": 0.41674618996778123,
"step": 64056
},
{
"epoch": 7.0,
"grad_norm": 4.900432109832764,
"learning_rate": 4.89488473901126e-05,
"loss": 0.2243,
"step": 74732
},
{
"epoch": 7.0,
"eval_cer": 0.12506541007030397,
"eval_loss": 0.4427885413169861,
"eval_runtime": 373.1444,
"eval_samples_per_second": 35.938,
"eval_steps_per_second": 2.248,
"eval_wer": 0.3932765804665577,
"step": 74732
},
{
"epoch": 8.0,
"grad_norm": 3.762911558151245,
"learning_rate": 4.842272879651358e-05,
"loss": 0.2074,
"step": 85408
},
{
"epoch": 8.0,
"eval_cer": 0.1276913024263408,
"eval_loss": 0.4647955298423767,
"eval_runtime": 419.1893,
"eval_samples_per_second": 31.99,
"eval_steps_per_second": 2.001,
"eval_wer": 0.39266244012131635,
"step": 85408
},
{
"epoch": 9.0,
"grad_norm": 4.190975666046143,
"learning_rate": 4.789665950188322e-05,
"loss": 0.1951,
"step": 96084
},
{
"epoch": 9.0,
"eval_cer": 0.12099710704476624,
"eval_loss": 0.4297826886177063,
"eval_runtime": 367.3195,
"eval_samples_per_second": 36.508,
"eval_steps_per_second": 2.284,
"eval_wer": 0.3886941486597568,
"step": 96084
},
{
"epoch": 10.0,
"grad_norm": 12.867461204528809,
"learning_rate": 4.7370491609315534e-05,
"loss": 0.183,
"step": 106760
},
{
"epoch": 10.0,
"eval_cer": 0.12186065553769111,
"eval_loss": 0.4488038122653961,
"eval_runtime": 318.5142,
"eval_samples_per_second": 42.102,
"eval_steps_per_second": 2.634,
"eval_wer": 0.3807292207976266,
"step": 106760
},
{
"epoch": 11.0,
"grad_norm": 2.9338295459747314,
"learning_rate": 4.6844373015716514e-05,
"loss": 0.1704,
"step": 117436
},
{
"epoch": 11.0,
"eval_cer": 0.11717146521268779,
"eval_loss": 0.4291200637817383,
"eval_runtime": 309.0823,
"eval_samples_per_second": 43.386,
"eval_steps_per_second": 2.714,
"eval_wer": 0.36931565868914107,
"step": 117436
},
{
"epoch": 12.0,
"grad_norm": 1.193000316619873,
"learning_rate": 4.631815582418016e-05,
"loss": 0.1602,
"step": 128112
},
{
"epoch": 12.0,
"eval_cer": 0.11806890493374943,
"eval_loss": 0.4337969422340393,
"eval_runtime": 370.6877,
"eval_samples_per_second": 36.176,
"eval_steps_per_second": 2.263,
"eval_wer": 0.3649127448294107,
"step": 128112
},
{
"epoch": 13.0,
"grad_norm": 4.6738057136535645,
"learning_rate": 4.5792086529549806e-05,
"loss": 0.1503,
"step": 138788
},
{
"epoch": 13.0,
"eval_cer": 0.11863963321557261,
"eval_loss": 0.43087342381477356,
"eval_runtime": 445.2671,
"eval_samples_per_second": 30.117,
"eval_steps_per_second": 1.884,
"eval_wer": 0.36557412673967066,
"step": 138788
},
{
"epoch": 14.0,
"grad_norm": 4.588621616363525,
"learning_rate": 4.526601723491945e-05,
"loss": 0.1417,
"step": 149464
},
{
"epoch": 14.0,
"eval_cer": 0.11495497889254312,
"eval_loss": 0.4210534691810608,
"eval_runtime": 380.9506,
"eval_samples_per_second": 35.201,
"eval_steps_per_second": 2.202,
"eval_wer": 0.36186093972921135,
"step": 149464
},
{
"epoch": 15.0,
"grad_norm": 20.85131072998047,
"learning_rate": 4.473989864132043e-05,
"loss": 0.136,
"step": 160140
},
{
"epoch": 15.0,
"eval_cer": 0.1191358007954949,
"eval_loss": 0.44502612948417664,
"eval_runtime": 358.3686,
"eval_samples_per_second": 37.42,
"eval_steps_per_second": 2.341,
"eval_wer": 0.37130925273292453,
"step": 160140
},
{
"epoch": 16.0,
"grad_norm": 0.3467574417591095,
"learning_rate": 4.421373074875274e-05,
"loss": 0.1305,
"step": 170816
},
{
"epoch": 16.0,
"eval_cer": 0.11509596640159207,
"eval_loss": 0.4436805248260498,
"eval_runtime": 352.8374,
"eval_samples_per_second": 38.006,
"eval_steps_per_second": 2.378,
"eval_wer": 0.36488439989039956,
"step": 170816
},
{
"epoch": 17.0,
"grad_norm": 2.450319290161133,
"learning_rate": 4.368761215515372e-05,
"loss": 0.122,
"step": 181492
},
{
"epoch": 17.0,
"eval_cer": 0.11497666927855064,
"eval_loss": 0.45026466250419617,
"eval_runtime": 361.6111,
"eval_samples_per_second": 37.084,
"eval_steps_per_second": 2.32,
"eval_wer": 0.3567210574551914,
"step": 181492
},
{
"epoch": 18.0,
"grad_norm": 1.8293219804763794,
"learning_rate": 4.316149356155469e-05,
"loss": 0.1139,
"step": 192168
},
{
"epoch": 18.0,
"eval_cer": 0.11221792330821767,
"eval_loss": 0.4319141209125519,
"eval_runtime": 319.6496,
"eval_samples_per_second": 41.952,
"eval_steps_per_second": 2.625,
"eval_wer": 0.3517795897542494,
"step": 192168
},
{
"epoch": 19.0,
"grad_norm": 121.26933288574219,
"learning_rate": 4.263532566898701e-05,
"loss": 0.1057,
"step": 202844
},
{
"epoch": 19.0,
"eval_cer": 0.11463504569893201,
"eval_loss": 0.43899470567703247,
"eval_runtime": 318.2195,
"eval_samples_per_second": 42.141,
"eval_steps_per_second": 2.637,
"eval_wer": 0.35932879184421623,
"step": 202844
},
{
"epoch": 20.0,
"grad_norm": 2.178046703338623,
"learning_rate": 4.2109157776419316e-05,
"loss": 0.0997,
"step": 213520
},
{
"epoch": 20.0,
"eval_cer": 0.11560975742014548,
"eval_loss": 0.45046666264533997,
"eval_runtime": 309.141,
"eval_samples_per_second": 43.378,
"eval_steps_per_second": 2.714,
"eval_wer": 0.35749581912149586,
"step": 213520
},
{
"epoch": 21.0,
"grad_norm": 82.85478973388672,
"learning_rate": 4.158313778075763e-05,
"loss": 0.0931,
"step": 224196
},
{
"epoch": 21.0,
"eval_cer": 0.11554197496387195,
"eval_loss": 0.47222137451171875,
"eval_runtime": 297.0885,
"eval_samples_per_second": 45.138,
"eval_steps_per_second": 2.824,
"eval_wer": 0.35703285178431393,
"step": 224196
},
{
"epoch": 22.0,
"grad_norm": 4.658969402313232,
"learning_rate": 4.105696988818994e-05,
"loss": 0.0872,
"step": 234872
},
{
"epoch": 22.0,
"eval_cer": 0.1130882500467699,
"eval_loss": 0.47128280997276306,
"eval_runtime": 297.5307,
"eval_samples_per_second": 45.071,
"eval_steps_per_second": 2.82,
"eval_wer": 0.3474995039635673,
"step": 234872
},
{
"epoch": 23.0,
"grad_norm": 31.182987213134766,
"learning_rate": 4.053080199562225e-05,
"loss": 0.0834,
"step": 245548
},
{
"epoch": 23.0,
"eval_cer": 0.11390977341680517,
"eval_loss": 0.4635840058326721,
"eval_runtime": 294.749,
"eval_samples_per_second": 45.496,
"eval_steps_per_second": 2.846,
"eval_wer": 0.3590169975150937,
"step": 245548
},
{
"epoch": 24.0,
"grad_norm": 3.176016330718994,
"learning_rate": 4.000478199996056e-05,
"loss": 0.0815,
"step": 256224
},
{
"epoch": 24.0,
"eval_cer": 0.11560975742014548,
"eval_loss": 0.46226194500923157,
"eval_runtime": 299.7969,
"eval_samples_per_second": 44.73,
"eval_steps_per_second": 2.799,
"eval_wer": 0.3642419146061471,
"step": 256224
},
{
"epoch": 25.0,
"grad_norm": 28.0598087310791,
"learning_rate": 3.947851550945554e-05,
"loss": 0.0764,
"step": 266900
},
{
"epoch": 25.0,
"eval_cer": 0.11135844176266922,
"eval_loss": 0.45431631803512573,
"eval_runtime": 289.8433,
"eval_samples_per_second": 46.266,
"eval_steps_per_second": 2.895,
"eval_wer": 0.348793922845076,
"step": 266900
},
{
"epoch": 26.0,
"grad_norm": 5.333448886871338,
"learning_rate": 3.895239691585652e-05,
"loss": 0.0729,
"step": 277576
},
{
"epoch": 26.0,
"eval_cer": 0.11139911123643334,
"eval_loss": 0.4733089804649353,
"eval_runtime": 299.9429,
"eval_samples_per_second": 44.709,
"eval_steps_per_second": 2.797,
"eval_wer": 0.34857661164599063,
"step": 277576
}
],
"logging_steps": 100,
"max_steps": 1067600,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 10,
"early_stopping_threshold": 0.001
},
"attributes": {
"early_stopping_patience_counter": 4
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.748850144039913e+21,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}