c14kevincardenas's picture
End of training
6351ea2 verified
{
"best_metric": 0.3964671194553375,
"best_model_checkpoint": "limb_classification_person_crop_seq/t2_4heads_1layers_5e-4lr/checkpoint-2520",
"epoch": 15.0,
"eval_steps": 500,
"global_step": 2700,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1388888888888889,
"grad_norm": 275926.53125,
"learning_rate": 5e-05,
"loss": 1.2686,
"step": 25
},
{
"epoch": 0.2777777777777778,
"grad_norm": 328248.0,
"learning_rate": 0.0001,
"loss": 0.9051,
"step": 50
},
{
"epoch": 0.4166666666666667,
"grad_norm": 368871.40625,
"learning_rate": 0.00015,
"loss": 0.7478,
"step": 75
},
{
"epoch": 0.5555555555555556,
"grad_norm": 98371.5390625,
"learning_rate": 0.0002,
"loss": 0.5482,
"step": 100
},
{
"epoch": 0.6944444444444444,
"grad_norm": 116783.9921875,
"learning_rate": 0.00025,
"loss": 0.5521,
"step": 125
},
{
"epoch": 0.8333333333333334,
"grad_norm": 390853.9375,
"learning_rate": 0.0003,
"loss": 0.4814,
"step": 150
},
{
"epoch": 0.9722222222222222,
"grad_norm": 156001.8125,
"learning_rate": 0.00035,
"loss": 0.5406,
"step": 175
},
{
"epoch": 1.0,
"eval_accuracy": 0.8883399209486166,
"eval_loss": 0.48044389486312866,
"eval_runtime": 37.7276,
"eval_samples_per_second": 26.824,
"eval_steps_per_second": 0.848,
"step": 180
},
{
"epoch": 1.1111111111111112,
"grad_norm": 191408.703125,
"learning_rate": 0.0004,
"loss": 0.5133,
"step": 200
},
{
"epoch": 1.25,
"grad_norm": 313042.46875,
"learning_rate": 0.00045000000000000004,
"loss": 0.4485,
"step": 225
},
{
"epoch": 1.3888888888888888,
"grad_norm": 164934.65625,
"learning_rate": 0.0005,
"loss": 0.5473,
"step": 250
},
{
"epoch": 1.5277777777777777,
"grad_norm": 266101.15625,
"learning_rate": 0.0004948979591836735,
"loss": 0.4893,
"step": 275
},
{
"epoch": 1.6666666666666665,
"grad_norm": 216736.703125,
"learning_rate": 0.0004897959183673469,
"loss": 0.4952,
"step": 300
},
{
"epoch": 1.8055555555555556,
"grad_norm": 130781.34375,
"learning_rate": 0.0004846938775510204,
"loss": 0.5304,
"step": 325
},
{
"epoch": 1.9444444444444444,
"grad_norm": 83755.078125,
"learning_rate": 0.00047959183673469387,
"loss": 0.4852,
"step": 350
},
{
"epoch": 2.0,
"eval_accuracy": 0.8794466403162056,
"eval_loss": 0.5456948280334473,
"eval_runtime": 36.26,
"eval_samples_per_second": 27.91,
"eval_steps_per_second": 0.883,
"step": 360
},
{
"epoch": 2.0833333333333335,
"grad_norm": 140542.96875,
"learning_rate": 0.0004744897959183674,
"loss": 0.5491,
"step": 375
},
{
"epoch": 2.2222222222222223,
"grad_norm": 275681.375,
"learning_rate": 0.00046938775510204083,
"loss": 0.4691,
"step": 400
},
{
"epoch": 2.361111111111111,
"grad_norm": 119873.5625,
"learning_rate": 0.00046428571428571433,
"loss": 0.4607,
"step": 425
},
{
"epoch": 2.5,
"grad_norm": 147333.703125,
"learning_rate": 0.0004591836734693878,
"loss": 0.5189,
"step": 450
},
{
"epoch": 2.638888888888889,
"grad_norm": 152648.078125,
"learning_rate": 0.00045408163265306124,
"loss": 0.4508,
"step": 475
},
{
"epoch": 2.7777777777777777,
"grad_norm": 129466.5078125,
"learning_rate": 0.0004489795918367347,
"loss": 0.4649,
"step": 500
},
{
"epoch": 2.9166666666666665,
"grad_norm": 39402.28515625,
"learning_rate": 0.00044387755102040814,
"loss": 0.4664,
"step": 525
},
{
"epoch": 3.0,
"eval_accuracy": 0.9051383399209486,
"eval_loss": 0.42033523321151733,
"eval_runtime": 36.9286,
"eval_samples_per_second": 27.404,
"eval_steps_per_second": 0.867,
"step": 540
},
{
"epoch": 3.0555555555555554,
"grad_norm": 167756.265625,
"learning_rate": 0.00043877551020408165,
"loss": 0.4957,
"step": 550
},
{
"epoch": 3.1944444444444446,
"grad_norm": 178619.546875,
"learning_rate": 0.0004336734693877551,
"loss": 0.4743,
"step": 575
},
{
"epoch": 3.3333333333333335,
"grad_norm": 109380.5,
"learning_rate": 0.00042857142857142855,
"loss": 0.5037,
"step": 600
},
{
"epoch": 3.4722222222222223,
"grad_norm": 161733.875,
"learning_rate": 0.00042346938775510206,
"loss": 0.4998,
"step": 625
},
{
"epoch": 3.611111111111111,
"grad_norm": 139706.953125,
"learning_rate": 0.00041836734693877556,
"loss": 0.4484,
"step": 650
},
{
"epoch": 3.75,
"grad_norm": 150799.125,
"learning_rate": 0.000413265306122449,
"loss": 0.5218,
"step": 675
},
{
"epoch": 3.888888888888889,
"grad_norm": 233782.0625,
"learning_rate": 0.00040816326530612246,
"loss": 0.4929,
"step": 700
},
{
"epoch": 4.0,
"eval_accuracy": 0.900197628458498,
"eval_loss": 0.43486830592155457,
"eval_runtime": 35.8273,
"eval_samples_per_second": 28.247,
"eval_steps_per_second": 0.893,
"step": 720
},
{
"epoch": 4.027777777777778,
"grad_norm": 124010.9140625,
"learning_rate": 0.0004030612244897959,
"loss": 0.4506,
"step": 725
},
{
"epoch": 4.166666666666667,
"grad_norm": 142697.78125,
"learning_rate": 0.00039795918367346937,
"loss": 0.4781,
"step": 750
},
{
"epoch": 4.305555555555555,
"grad_norm": 167024.359375,
"learning_rate": 0.0003928571428571429,
"loss": 0.4571,
"step": 775
},
{
"epoch": 4.444444444444445,
"grad_norm": 91725.46875,
"learning_rate": 0.0003877551020408163,
"loss": 0.45,
"step": 800
},
{
"epoch": 4.583333333333333,
"grad_norm": 99782.9921875,
"learning_rate": 0.0003826530612244898,
"loss": 0.4521,
"step": 825
},
{
"epoch": 4.722222222222222,
"grad_norm": 178208.5625,
"learning_rate": 0.00037755102040816323,
"loss": 0.4598,
"step": 850
},
{
"epoch": 4.861111111111111,
"grad_norm": 189875.09375,
"learning_rate": 0.0003724489795918368,
"loss": 0.4492,
"step": 875
},
{
"epoch": 5.0,
"grad_norm": 31828.625,
"learning_rate": 0.00036734693877551024,
"loss": 0.4334,
"step": 900
},
{
"epoch": 5.0,
"eval_accuracy": 0.8764822134387352,
"eval_loss": 0.4815811514854431,
"eval_runtime": 36.4103,
"eval_samples_per_second": 27.794,
"eval_steps_per_second": 0.879,
"step": 900
},
{
"epoch": 5.138888888888889,
"grad_norm": 131415.0,
"learning_rate": 0.0003622448979591837,
"loss": 0.4699,
"step": 925
},
{
"epoch": 5.277777777777778,
"grad_norm": 42832.09765625,
"learning_rate": 0.00035714285714285714,
"loss": 0.4448,
"step": 950
},
{
"epoch": 5.416666666666667,
"grad_norm": 202659.328125,
"learning_rate": 0.00035204081632653065,
"loss": 0.4595,
"step": 975
},
{
"epoch": 5.555555555555555,
"grad_norm": 95209.203125,
"learning_rate": 0.0003469387755102041,
"loss": 0.4968,
"step": 1000
},
{
"epoch": 5.694444444444445,
"grad_norm": 121417.4140625,
"learning_rate": 0.00034183673469387755,
"loss": 0.4662,
"step": 1025
},
{
"epoch": 5.833333333333333,
"grad_norm": 103243.7734375,
"learning_rate": 0.000336734693877551,
"loss": 0.4726,
"step": 1050
},
{
"epoch": 5.972222222222222,
"grad_norm": 79883.8671875,
"learning_rate": 0.00033163265306122445,
"loss": 0.4709,
"step": 1075
},
{
"epoch": 6.0,
"eval_accuracy": 0.8932806324110671,
"eval_loss": 0.45736971497535706,
"eval_runtime": 36.8482,
"eval_samples_per_second": 27.464,
"eval_steps_per_second": 0.868,
"step": 1080
},
{
"epoch": 6.111111111111111,
"grad_norm": 70588.4609375,
"learning_rate": 0.00032653061224489796,
"loss": 0.4736,
"step": 1100
},
{
"epoch": 6.25,
"grad_norm": 235716.375,
"learning_rate": 0.00032142857142857147,
"loss": 0.4877,
"step": 1125
},
{
"epoch": 6.388888888888889,
"grad_norm": 118860.515625,
"learning_rate": 0.0003163265306122449,
"loss": 0.4801,
"step": 1150
},
{
"epoch": 6.527777777777778,
"grad_norm": 96496.296875,
"learning_rate": 0.00031122448979591837,
"loss": 0.4387,
"step": 1175
},
{
"epoch": 6.666666666666667,
"grad_norm": 122115.0859375,
"learning_rate": 0.0003061224489795919,
"loss": 0.3712,
"step": 1200
},
{
"epoch": 6.805555555555555,
"grad_norm": 87567.2265625,
"learning_rate": 0.0003010204081632653,
"loss": 0.443,
"step": 1225
},
{
"epoch": 6.944444444444445,
"grad_norm": 102648.7265625,
"learning_rate": 0.0002959183673469388,
"loss": 0.4525,
"step": 1250
},
{
"epoch": 7.0,
"eval_accuracy": 0.8883399209486166,
"eval_loss": 0.465226411819458,
"eval_runtime": 36.2286,
"eval_samples_per_second": 27.934,
"eval_steps_per_second": 0.883,
"step": 1260
},
{
"epoch": 7.083333333333333,
"grad_norm": 54489.8828125,
"learning_rate": 0.00029081632653061223,
"loss": 0.4306,
"step": 1275
},
{
"epoch": 7.222222222222222,
"grad_norm": 81611.7890625,
"learning_rate": 0.0002857142857142857,
"loss": 0.4381,
"step": 1300
},
{
"epoch": 7.361111111111111,
"grad_norm": 139663.171875,
"learning_rate": 0.0002806122448979592,
"loss": 0.457,
"step": 1325
},
{
"epoch": 7.5,
"grad_norm": 73111.8828125,
"learning_rate": 0.00027551020408163264,
"loss": 0.4403,
"step": 1350
},
{
"epoch": 7.638888888888889,
"grad_norm": 56448.75,
"learning_rate": 0.00027040816326530614,
"loss": 0.4308,
"step": 1375
},
{
"epoch": 7.777777777777778,
"grad_norm": 59582.625,
"learning_rate": 0.0002653061224489796,
"loss": 0.4565,
"step": 1400
},
{
"epoch": 7.916666666666667,
"grad_norm": 167981.71875,
"learning_rate": 0.0002602040816326531,
"loss": 0.4601,
"step": 1425
},
{
"epoch": 8.0,
"eval_accuracy": 0.900197628458498,
"eval_loss": 0.4387129545211792,
"eval_runtime": 36.2307,
"eval_samples_per_second": 27.932,
"eval_steps_per_second": 0.883,
"step": 1440
},
{
"epoch": 8.055555555555555,
"grad_norm": 146485.015625,
"learning_rate": 0.00025510204081632655,
"loss": 0.4442,
"step": 1450
},
{
"epoch": 8.194444444444445,
"grad_norm": 77854.203125,
"learning_rate": 0.00025,
"loss": 0.4185,
"step": 1475
},
{
"epoch": 8.333333333333334,
"grad_norm": 123192.34375,
"learning_rate": 0.00024489795918367346,
"loss": 0.4692,
"step": 1500
},
{
"epoch": 8.472222222222221,
"grad_norm": 74983.546875,
"learning_rate": 0.00023979591836734694,
"loss": 0.4189,
"step": 1525
},
{
"epoch": 8.61111111111111,
"grad_norm": 136541.65625,
"learning_rate": 0.00023469387755102041,
"loss": 0.4135,
"step": 1550
},
{
"epoch": 8.75,
"grad_norm": 90828.046875,
"learning_rate": 0.0002295918367346939,
"loss": 0.419,
"step": 1575
},
{
"epoch": 8.88888888888889,
"grad_norm": 92607.2109375,
"learning_rate": 0.00022448979591836734,
"loss": 0.4361,
"step": 1600
},
{
"epoch": 9.0,
"eval_accuracy": 0.9071146245059288,
"eval_loss": 0.4137505292892456,
"eval_runtime": 36.7818,
"eval_samples_per_second": 27.514,
"eval_steps_per_second": 0.87,
"step": 1620
},
{
"epoch": 9.027777777777779,
"grad_norm": 206206.59375,
"learning_rate": 0.00021938775510204082,
"loss": 0.4229,
"step": 1625
},
{
"epoch": 9.166666666666666,
"grad_norm": 61973.49609375,
"learning_rate": 0.00021428571428571427,
"loss": 0.4488,
"step": 1650
},
{
"epoch": 9.305555555555555,
"grad_norm": 134002.28125,
"learning_rate": 0.00020918367346938778,
"loss": 0.4099,
"step": 1675
},
{
"epoch": 9.444444444444445,
"grad_norm": 162290.21875,
"learning_rate": 0.00020408163265306123,
"loss": 0.4352,
"step": 1700
},
{
"epoch": 9.583333333333334,
"grad_norm": 108405.265625,
"learning_rate": 0.00019897959183673468,
"loss": 0.4241,
"step": 1725
},
{
"epoch": 9.722222222222221,
"grad_norm": 66163.1875,
"learning_rate": 0.00019387755102040816,
"loss": 0.4053,
"step": 1750
},
{
"epoch": 9.86111111111111,
"grad_norm": 35489.41796875,
"learning_rate": 0.00018877551020408161,
"loss": 0.4023,
"step": 1775
},
{
"epoch": 10.0,
"grad_norm": 148451.015625,
"learning_rate": 0.00018367346938775512,
"loss": 0.4297,
"step": 1800
},
{
"epoch": 10.0,
"eval_accuracy": 0.9140316205533597,
"eval_loss": 0.4089234173297882,
"eval_runtime": 36.486,
"eval_samples_per_second": 27.737,
"eval_steps_per_second": 0.877,
"step": 1800
},
{
"epoch": 10.13888888888889,
"grad_norm": 132313.015625,
"learning_rate": 0.00017857142857142857,
"loss": 0.4157,
"step": 1825
},
{
"epoch": 10.277777777777779,
"grad_norm": 79190.890625,
"learning_rate": 0.00017346938775510205,
"loss": 0.4155,
"step": 1850
},
{
"epoch": 10.416666666666666,
"grad_norm": 59022.73828125,
"learning_rate": 0.0001683673469387755,
"loss": 0.3802,
"step": 1875
},
{
"epoch": 10.555555555555555,
"grad_norm": 113138.1640625,
"learning_rate": 0.00016326530612244898,
"loss": 0.4633,
"step": 1900
},
{
"epoch": 10.694444444444445,
"grad_norm": 80191.265625,
"learning_rate": 0.00015816326530612246,
"loss": 0.4034,
"step": 1925
},
{
"epoch": 10.833333333333334,
"grad_norm": 80256.8984375,
"learning_rate": 0.00015306122448979594,
"loss": 0.4033,
"step": 1950
},
{
"epoch": 10.972222222222221,
"grad_norm": 92320.3359375,
"learning_rate": 0.0001479591836734694,
"loss": 0.4,
"step": 1975
},
{
"epoch": 11.0,
"eval_accuracy": 0.9199604743083004,
"eval_loss": 0.3999524712562561,
"eval_runtime": 36.413,
"eval_samples_per_second": 27.792,
"eval_steps_per_second": 0.879,
"step": 1980
},
{
"epoch": 11.11111111111111,
"grad_norm": 83646.3828125,
"learning_rate": 0.00014285714285714284,
"loss": 0.4017,
"step": 2000
},
{
"epoch": 11.25,
"grad_norm": 47865.28515625,
"learning_rate": 0.00013775510204081632,
"loss": 0.4172,
"step": 2025
},
{
"epoch": 11.38888888888889,
"grad_norm": 100859.5859375,
"learning_rate": 0.0001326530612244898,
"loss": 0.3601,
"step": 2050
},
{
"epoch": 11.527777777777779,
"grad_norm": 89678.1796875,
"learning_rate": 0.00012755102040816328,
"loss": 0.459,
"step": 2075
},
{
"epoch": 11.666666666666666,
"grad_norm": 97468.703125,
"learning_rate": 0.00012244897959183673,
"loss": 0.3659,
"step": 2100
},
{
"epoch": 11.805555555555555,
"grad_norm": 116296.359375,
"learning_rate": 0.00011734693877551021,
"loss": 0.4134,
"step": 2125
},
{
"epoch": 11.944444444444445,
"grad_norm": 66697.9296875,
"learning_rate": 0.00011224489795918367,
"loss": 0.4035,
"step": 2150
},
{
"epoch": 12.0,
"eval_accuracy": 0.9071146245059288,
"eval_loss": 0.42599722743034363,
"eval_runtime": 36.752,
"eval_samples_per_second": 27.536,
"eval_steps_per_second": 0.871,
"step": 2160
},
{
"epoch": 12.083333333333334,
"grad_norm": 139186.21875,
"learning_rate": 0.00010714285714285714,
"loss": 0.3609,
"step": 2175
},
{
"epoch": 12.222222222222221,
"grad_norm": 69709.2109375,
"learning_rate": 0.00010204081632653062,
"loss": 0.4146,
"step": 2200
},
{
"epoch": 12.36111111111111,
"grad_norm": 84500.0859375,
"learning_rate": 9.693877551020408e-05,
"loss": 0.4013,
"step": 2225
},
{
"epoch": 12.5,
"grad_norm": 45239.5703125,
"learning_rate": 9.183673469387756e-05,
"loss": 0.3918,
"step": 2250
},
{
"epoch": 12.63888888888889,
"grad_norm": 49387.7421875,
"learning_rate": 8.673469387755102e-05,
"loss": 0.388,
"step": 2275
},
{
"epoch": 12.777777777777779,
"grad_norm": 98527.546875,
"learning_rate": 8.163265306122449e-05,
"loss": 0.3941,
"step": 2300
},
{
"epoch": 12.916666666666666,
"grad_norm": 75106.1640625,
"learning_rate": 7.653061224489797e-05,
"loss": 0.3875,
"step": 2325
},
{
"epoch": 13.0,
"eval_accuracy": 0.9100790513833992,
"eval_loss": 0.40881994366645813,
"eval_runtime": 36.7096,
"eval_samples_per_second": 27.568,
"eval_steps_per_second": 0.872,
"step": 2340
},
{
"epoch": 13.055555555555555,
"grad_norm": 94333.46875,
"learning_rate": 7.142857142857142e-05,
"loss": 0.395,
"step": 2350
},
{
"epoch": 13.194444444444445,
"grad_norm": 150090.71875,
"learning_rate": 6.63265306122449e-05,
"loss": 0.3972,
"step": 2375
},
{
"epoch": 13.333333333333334,
"grad_norm": 86562.6015625,
"learning_rate": 6.122448979591836e-05,
"loss": 0.3347,
"step": 2400
},
{
"epoch": 13.472222222222221,
"grad_norm": 205886.484375,
"learning_rate": 5.6122448979591836e-05,
"loss": 0.4316,
"step": 2425
},
{
"epoch": 13.61111111111111,
"grad_norm": 90394.1640625,
"learning_rate": 5.102040816326531e-05,
"loss": 0.3521,
"step": 2450
},
{
"epoch": 13.75,
"grad_norm": 118663.3359375,
"learning_rate": 4.591836734693878e-05,
"loss": 0.4071,
"step": 2475
},
{
"epoch": 13.88888888888889,
"grad_norm": 91543.4765625,
"learning_rate": 4.0816326530612245e-05,
"loss": 0.4117,
"step": 2500
},
{
"epoch": 14.0,
"eval_accuracy": 0.9179841897233202,
"eval_loss": 0.3964671194553375,
"eval_runtime": 36.2893,
"eval_samples_per_second": 27.887,
"eval_steps_per_second": 0.882,
"step": 2520
},
{
"epoch": 14.027777777777779,
"grad_norm": 48705.08203125,
"learning_rate": 3.571428571428571e-05,
"loss": 0.3965,
"step": 2525
},
{
"epoch": 14.166666666666666,
"grad_norm": 71733.8046875,
"learning_rate": 3.061224489795918e-05,
"loss": 0.4043,
"step": 2550
},
{
"epoch": 14.305555555555555,
"grad_norm": 113618.6484375,
"learning_rate": 2.5510204081632654e-05,
"loss": 0.3868,
"step": 2575
},
{
"epoch": 14.444444444444445,
"grad_norm": 90760.4609375,
"learning_rate": 2.0408163265306123e-05,
"loss": 0.3633,
"step": 2600
},
{
"epoch": 14.583333333333334,
"grad_norm": 58063.44921875,
"learning_rate": 1.530612244897959e-05,
"loss": 0.3651,
"step": 2625
},
{
"epoch": 14.722222222222221,
"grad_norm": 66486.3984375,
"learning_rate": 1.0204081632653061e-05,
"loss": 0.3904,
"step": 2650
},
{
"epoch": 14.86111111111111,
"grad_norm": 68429.1484375,
"learning_rate": 5.102040816326531e-06,
"loss": 0.4017,
"step": 2675
},
{
"epoch": 15.0,
"grad_norm": 489841.0625,
"learning_rate": 0.0,
"loss": 0.3518,
"step": 2700
},
{
"epoch": 15.0,
"eval_accuracy": 0.91600790513834,
"eval_loss": 0.3987027406692505,
"eval_runtime": 36.8038,
"eval_samples_per_second": 27.497,
"eval_steps_per_second": 0.869,
"step": 2700
},
{
"epoch": 15.0,
"step": 2700,
"total_flos": 0.0,
"train_loss": 0.45526096591243037,
"train_runtime": 5324.9693,
"train_samples_per_second": 16.141,
"train_steps_per_second": 0.507
}
],
"logging_steps": 25,
"max_steps": 2700,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}