|
{ |
|
"best_metric": 0.3964671194553375, |
|
"best_model_checkpoint": "limb_classification_person_crop_seq/t2_4heads_1layers_5e-4lr/checkpoint-2520", |
|
"epoch": 15.0, |
|
"eval_steps": 500, |
|
"global_step": 2700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1388888888888889, |
|
"grad_norm": 275926.53125, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2686, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 328248.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9051, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 368871.40625, |
|
"learning_rate": 0.00015, |
|
"loss": 0.7478, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 98371.5390625, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5482, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6944444444444444, |
|
"grad_norm": 116783.9921875, |
|
"learning_rate": 0.00025, |
|
"loss": 0.5521, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 390853.9375, |
|
"learning_rate": 0.0003, |
|
"loss": 0.4814, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9722222222222222, |
|
"grad_norm": 156001.8125, |
|
"learning_rate": 0.00035, |
|
"loss": 0.5406, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8883399209486166, |
|
"eval_loss": 0.48044389486312866, |
|
"eval_runtime": 37.7276, |
|
"eval_samples_per_second": 26.824, |
|
"eval_steps_per_second": 0.848, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 191408.703125, |
|
"learning_rate": 0.0004, |
|
"loss": 0.5133, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 313042.46875, |
|
"learning_rate": 0.00045000000000000004, |
|
"loss": 0.4485, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.3888888888888888, |
|
"grad_norm": 164934.65625, |
|
"learning_rate": 0.0005, |
|
"loss": 0.5473, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.5277777777777777, |
|
"grad_norm": 266101.15625, |
|
"learning_rate": 0.0004948979591836735, |
|
"loss": 0.4893, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 216736.703125, |
|
"learning_rate": 0.0004897959183673469, |
|
"loss": 0.4952, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.8055555555555556, |
|
"grad_norm": 130781.34375, |
|
"learning_rate": 0.0004846938775510204, |
|
"loss": 0.5304, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.9444444444444444, |
|
"grad_norm": 83755.078125, |
|
"learning_rate": 0.00047959183673469387, |
|
"loss": 0.4852, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8794466403162056, |
|
"eval_loss": 0.5456948280334473, |
|
"eval_runtime": 36.26, |
|
"eval_samples_per_second": 27.91, |
|
"eval_steps_per_second": 0.883, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.0833333333333335, |
|
"grad_norm": 140542.96875, |
|
"learning_rate": 0.0004744897959183674, |
|
"loss": 0.5491, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 275681.375, |
|
"learning_rate": 0.00046938775510204083, |
|
"loss": 0.4691, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.361111111111111, |
|
"grad_norm": 119873.5625, |
|
"learning_rate": 0.00046428571428571433, |
|
"loss": 0.4607, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 147333.703125, |
|
"learning_rate": 0.0004591836734693878, |
|
"loss": 0.5189, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.638888888888889, |
|
"grad_norm": 152648.078125, |
|
"learning_rate": 0.00045408163265306124, |
|
"loss": 0.4508, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 129466.5078125, |
|
"learning_rate": 0.0004489795918367347, |
|
"loss": 0.4649, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.9166666666666665, |
|
"grad_norm": 39402.28515625, |
|
"learning_rate": 0.00044387755102040814, |
|
"loss": 0.4664, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9051383399209486, |
|
"eval_loss": 0.42033523321151733, |
|
"eval_runtime": 36.9286, |
|
"eval_samples_per_second": 27.404, |
|
"eval_steps_per_second": 0.867, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.0555555555555554, |
|
"grad_norm": 167756.265625, |
|
"learning_rate": 0.00043877551020408165, |
|
"loss": 0.4957, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.1944444444444446, |
|
"grad_norm": 178619.546875, |
|
"learning_rate": 0.0004336734693877551, |
|
"loss": 0.4743, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 109380.5, |
|
"learning_rate": 0.00042857142857142855, |
|
"loss": 0.5037, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.4722222222222223, |
|
"grad_norm": 161733.875, |
|
"learning_rate": 0.00042346938775510206, |
|
"loss": 0.4998, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 3.611111111111111, |
|
"grad_norm": 139706.953125, |
|
"learning_rate": 0.00041836734693877556, |
|
"loss": 0.4484, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 150799.125, |
|
"learning_rate": 0.000413265306122449, |
|
"loss": 0.5218, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 3.888888888888889, |
|
"grad_norm": 233782.0625, |
|
"learning_rate": 0.00040816326530612246, |
|
"loss": 0.4929, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.900197628458498, |
|
"eval_loss": 0.43486830592155457, |
|
"eval_runtime": 35.8273, |
|
"eval_samples_per_second": 28.247, |
|
"eval_steps_per_second": 0.893, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 4.027777777777778, |
|
"grad_norm": 124010.9140625, |
|
"learning_rate": 0.0004030612244897959, |
|
"loss": 0.4506, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 4.166666666666667, |
|
"grad_norm": 142697.78125, |
|
"learning_rate": 0.00039795918367346937, |
|
"loss": 0.4781, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.305555555555555, |
|
"grad_norm": 167024.359375, |
|
"learning_rate": 0.0003928571428571429, |
|
"loss": 0.4571, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 91725.46875, |
|
"learning_rate": 0.0003877551020408163, |
|
"loss": 0.45, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.583333333333333, |
|
"grad_norm": 99782.9921875, |
|
"learning_rate": 0.0003826530612244898, |
|
"loss": 0.4521, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 4.722222222222222, |
|
"grad_norm": 178208.5625, |
|
"learning_rate": 0.00037755102040816323, |
|
"loss": 0.4598, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.861111111111111, |
|
"grad_norm": 189875.09375, |
|
"learning_rate": 0.0003724489795918368, |
|
"loss": 0.4492, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 31828.625, |
|
"learning_rate": 0.00036734693877551024, |
|
"loss": 0.4334, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8764822134387352, |
|
"eval_loss": 0.4815811514854431, |
|
"eval_runtime": 36.4103, |
|
"eval_samples_per_second": 27.794, |
|
"eval_steps_per_second": 0.879, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.138888888888889, |
|
"grad_norm": 131415.0, |
|
"learning_rate": 0.0003622448979591837, |
|
"loss": 0.4699, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 5.277777777777778, |
|
"grad_norm": 42832.09765625, |
|
"learning_rate": 0.00035714285714285714, |
|
"loss": 0.4448, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 5.416666666666667, |
|
"grad_norm": 202659.328125, |
|
"learning_rate": 0.00035204081632653065, |
|
"loss": 0.4595, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"grad_norm": 95209.203125, |
|
"learning_rate": 0.0003469387755102041, |
|
"loss": 0.4968, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.694444444444445, |
|
"grad_norm": 121417.4140625, |
|
"learning_rate": 0.00034183673469387755, |
|
"loss": 0.4662, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 5.833333333333333, |
|
"grad_norm": 103243.7734375, |
|
"learning_rate": 0.000336734693877551, |
|
"loss": 0.4726, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.972222222222222, |
|
"grad_norm": 79883.8671875, |
|
"learning_rate": 0.00033163265306122445, |
|
"loss": 0.4709, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8932806324110671, |
|
"eval_loss": 0.45736971497535706, |
|
"eval_runtime": 36.8482, |
|
"eval_samples_per_second": 27.464, |
|
"eval_steps_per_second": 0.868, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 6.111111111111111, |
|
"grad_norm": 70588.4609375, |
|
"learning_rate": 0.00032653061224489796, |
|
"loss": 0.4736, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 235716.375, |
|
"learning_rate": 0.00032142857142857147, |
|
"loss": 0.4877, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 6.388888888888889, |
|
"grad_norm": 118860.515625, |
|
"learning_rate": 0.0003163265306122449, |
|
"loss": 0.4801, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 6.527777777777778, |
|
"grad_norm": 96496.296875, |
|
"learning_rate": 0.00031122448979591837, |
|
"loss": 0.4387, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 122115.0859375, |
|
"learning_rate": 0.0003061224489795919, |
|
"loss": 0.3712, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.805555555555555, |
|
"grad_norm": 87567.2265625, |
|
"learning_rate": 0.0003010204081632653, |
|
"loss": 0.443, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 6.944444444444445, |
|
"grad_norm": 102648.7265625, |
|
"learning_rate": 0.0002959183673469388, |
|
"loss": 0.4525, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8883399209486166, |
|
"eval_loss": 0.465226411819458, |
|
"eval_runtime": 36.2286, |
|
"eval_samples_per_second": 27.934, |
|
"eval_steps_per_second": 0.883, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 7.083333333333333, |
|
"grad_norm": 54489.8828125, |
|
"learning_rate": 0.00029081632653061223, |
|
"loss": 0.4306, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 7.222222222222222, |
|
"grad_norm": 81611.7890625, |
|
"learning_rate": 0.0002857142857142857, |
|
"loss": 0.4381, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.361111111111111, |
|
"grad_norm": 139663.171875, |
|
"learning_rate": 0.0002806122448979592, |
|
"loss": 0.457, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"grad_norm": 73111.8828125, |
|
"learning_rate": 0.00027551020408163264, |
|
"loss": 0.4403, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 7.638888888888889, |
|
"grad_norm": 56448.75, |
|
"learning_rate": 0.00027040816326530614, |
|
"loss": 0.4308, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 7.777777777777778, |
|
"grad_norm": 59582.625, |
|
"learning_rate": 0.0002653061224489796, |
|
"loss": 0.4565, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.916666666666667, |
|
"grad_norm": 167981.71875, |
|
"learning_rate": 0.0002602040816326531, |
|
"loss": 0.4601, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.900197628458498, |
|
"eval_loss": 0.4387129545211792, |
|
"eval_runtime": 36.2307, |
|
"eval_samples_per_second": 27.932, |
|
"eval_steps_per_second": 0.883, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 8.055555555555555, |
|
"grad_norm": 146485.015625, |
|
"learning_rate": 0.00025510204081632655, |
|
"loss": 0.4442, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 8.194444444444445, |
|
"grad_norm": 77854.203125, |
|
"learning_rate": 0.00025, |
|
"loss": 0.4185, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 8.333333333333334, |
|
"grad_norm": 123192.34375, |
|
"learning_rate": 0.00024489795918367346, |
|
"loss": 0.4692, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.472222222222221, |
|
"grad_norm": 74983.546875, |
|
"learning_rate": 0.00023979591836734694, |
|
"loss": 0.4189, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 8.61111111111111, |
|
"grad_norm": 136541.65625, |
|
"learning_rate": 0.00023469387755102041, |
|
"loss": 0.4135, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"grad_norm": 90828.046875, |
|
"learning_rate": 0.0002295918367346939, |
|
"loss": 0.419, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"grad_norm": 92607.2109375, |
|
"learning_rate": 0.00022448979591836734, |
|
"loss": 0.4361, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9071146245059288, |
|
"eval_loss": 0.4137505292892456, |
|
"eval_runtime": 36.7818, |
|
"eval_samples_per_second": 27.514, |
|
"eval_steps_per_second": 0.87, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 9.027777777777779, |
|
"grad_norm": 206206.59375, |
|
"learning_rate": 0.00021938775510204082, |
|
"loss": 0.4229, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 9.166666666666666, |
|
"grad_norm": 61973.49609375, |
|
"learning_rate": 0.00021428571428571427, |
|
"loss": 0.4488, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 9.305555555555555, |
|
"grad_norm": 134002.28125, |
|
"learning_rate": 0.00020918367346938778, |
|
"loss": 0.4099, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 9.444444444444445, |
|
"grad_norm": 162290.21875, |
|
"learning_rate": 0.00020408163265306123, |
|
"loss": 0.4352, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.583333333333334, |
|
"grad_norm": 108405.265625, |
|
"learning_rate": 0.00019897959183673468, |
|
"loss": 0.4241, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 9.722222222222221, |
|
"grad_norm": 66163.1875, |
|
"learning_rate": 0.00019387755102040816, |
|
"loss": 0.4053, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 9.86111111111111, |
|
"grad_norm": 35489.41796875, |
|
"learning_rate": 0.00018877551020408161, |
|
"loss": 0.4023, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 148451.015625, |
|
"learning_rate": 0.00018367346938775512, |
|
"loss": 0.4297, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9140316205533597, |
|
"eval_loss": 0.4089234173297882, |
|
"eval_runtime": 36.486, |
|
"eval_samples_per_second": 27.737, |
|
"eval_steps_per_second": 0.877, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.13888888888889, |
|
"grad_norm": 132313.015625, |
|
"learning_rate": 0.00017857142857142857, |
|
"loss": 0.4157, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 10.277777777777779, |
|
"grad_norm": 79190.890625, |
|
"learning_rate": 0.00017346938775510205, |
|
"loss": 0.4155, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 10.416666666666666, |
|
"grad_norm": 59022.73828125, |
|
"learning_rate": 0.0001683673469387755, |
|
"loss": 0.3802, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 10.555555555555555, |
|
"grad_norm": 113138.1640625, |
|
"learning_rate": 0.00016326530612244898, |
|
"loss": 0.4633, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 10.694444444444445, |
|
"grad_norm": 80191.265625, |
|
"learning_rate": 0.00015816326530612246, |
|
"loss": 0.4034, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 10.833333333333334, |
|
"grad_norm": 80256.8984375, |
|
"learning_rate": 0.00015306122448979594, |
|
"loss": 0.4033, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 10.972222222222221, |
|
"grad_norm": 92320.3359375, |
|
"learning_rate": 0.0001479591836734694, |
|
"loss": 0.4, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9199604743083004, |
|
"eval_loss": 0.3999524712562561, |
|
"eval_runtime": 36.413, |
|
"eval_samples_per_second": 27.792, |
|
"eval_steps_per_second": 0.879, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 11.11111111111111, |
|
"grad_norm": 83646.3828125, |
|
"learning_rate": 0.00014285714285714284, |
|
"loss": 0.4017, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"grad_norm": 47865.28515625, |
|
"learning_rate": 0.00013775510204081632, |
|
"loss": 0.4172, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 11.38888888888889, |
|
"grad_norm": 100859.5859375, |
|
"learning_rate": 0.0001326530612244898, |
|
"loss": 0.3601, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 11.527777777777779, |
|
"grad_norm": 89678.1796875, |
|
"learning_rate": 0.00012755102040816328, |
|
"loss": 0.459, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 11.666666666666666, |
|
"grad_norm": 97468.703125, |
|
"learning_rate": 0.00012244897959183673, |
|
"loss": 0.3659, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 11.805555555555555, |
|
"grad_norm": 116296.359375, |
|
"learning_rate": 0.00011734693877551021, |
|
"loss": 0.4134, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 11.944444444444445, |
|
"grad_norm": 66697.9296875, |
|
"learning_rate": 0.00011224489795918367, |
|
"loss": 0.4035, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9071146245059288, |
|
"eval_loss": 0.42599722743034363, |
|
"eval_runtime": 36.752, |
|
"eval_samples_per_second": 27.536, |
|
"eval_steps_per_second": 0.871, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 12.083333333333334, |
|
"grad_norm": 139186.21875, |
|
"learning_rate": 0.00010714285714285714, |
|
"loss": 0.3609, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 12.222222222222221, |
|
"grad_norm": 69709.2109375, |
|
"learning_rate": 0.00010204081632653062, |
|
"loss": 0.4146, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 12.36111111111111, |
|
"grad_norm": 84500.0859375, |
|
"learning_rate": 9.693877551020408e-05, |
|
"loss": 0.4013, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 45239.5703125, |
|
"learning_rate": 9.183673469387756e-05, |
|
"loss": 0.3918, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 12.63888888888889, |
|
"grad_norm": 49387.7421875, |
|
"learning_rate": 8.673469387755102e-05, |
|
"loss": 0.388, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 12.777777777777779, |
|
"grad_norm": 98527.546875, |
|
"learning_rate": 8.163265306122449e-05, |
|
"loss": 0.3941, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 12.916666666666666, |
|
"grad_norm": 75106.1640625, |
|
"learning_rate": 7.653061224489797e-05, |
|
"loss": 0.3875, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9100790513833992, |
|
"eval_loss": 0.40881994366645813, |
|
"eval_runtime": 36.7096, |
|
"eval_samples_per_second": 27.568, |
|
"eval_steps_per_second": 0.872, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 13.055555555555555, |
|
"grad_norm": 94333.46875, |
|
"learning_rate": 7.142857142857142e-05, |
|
"loss": 0.395, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 13.194444444444445, |
|
"grad_norm": 150090.71875, |
|
"learning_rate": 6.63265306122449e-05, |
|
"loss": 0.3972, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 13.333333333333334, |
|
"grad_norm": 86562.6015625, |
|
"learning_rate": 6.122448979591836e-05, |
|
"loss": 0.3347, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 13.472222222222221, |
|
"grad_norm": 205886.484375, |
|
"learning_rate": 5.6122448979591836e-05, |
|
"loss": 0.4316, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 13.61111111111111, |
|
"grad_norm": 90394.1640625, |
|
"learning_rate": 5.102040816326531e-05, |
|
"loss": 0.3521, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"grad_norm": 118663.3359375, |
|
"learning_rate": 4.591836734693878e-05, |
|
"loss": 0.4071, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 13.88888888888889, |
|
"grad_norm": 91543.4765625, |
|
"learning_rate": 4.0816326530612245e-05, |
|
"loss": 0.4117, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9179841897233202, |
|
"eval_loss": 0.3964671194553375, |
|
"eval_runtime": 36.2893, |
|
"eval_samples_per_second": 27.887, |
|
"eval_steps_per_second": 0.882, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 14.027777777777779, |
|
"grad_norm": 48705.08203125, |
|
"learning_rate": 3.571428571428571e-05, |
|
"loss": 0.3965, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 14.166666666666666, |
|
"grad_norm": 71733.8046875, |
|
"learning_rate": 3.061224489795918e-05, |
|
"loss": 0.4043, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 14.305555555555555, |
|
"grad_norm": 113618.6484375, |
|
"learning_rate": 2.5510204081632654e-05, |
|
"loss": 0.3868, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 14.444444444444445, |
|
"grad_norm": 90760.4609375, |
|
"learning_rate": 2.0408163265306123e-05, |
|
"loss": 0.3633, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 14.583333333333334, |
|
"grad_norm": 58063.44921875, |
|
"learning_rate": 1.530612244897959e-05, |
|
"loss": 0.3651, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 14.722222222222221, |
|
"grad_norm": 66486.3984375, |
|
"learning_rate": 1.0204081632653061e-05, |
|
"loss": 0.3904, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 14.86111111111111, |
|
"grad_norm": 68429.1484375, |
|
"learning_rate": 5.102040816326531e-06, |
|
"loss": 0.4017, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 489841.0625, |
|
"learning_rate": 0.0, |
|
"loss": 0.3518, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.91600790513834, |
|
"eval_loss": 0.3987027406692505, |
|
"eval_runtime": 36.8038, |
|
"eval_samples_per_second": 27.497, |
|
"eval_steps_per_second": 0.869, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 2700, |
|
"total_flos": 0.0, |
|
"train_loss": 0.45526096591243037, |
|
"train_runtime": 5324.9693, |
|
"train_samples_per_second": 16.141, |
|
"train_steps_per_second": 0.507 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 2700, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|