{ "best_metric": 0.3956221342086792, "best_model_checkpoint": "limb_classification_person_crop_seq/t2_4heads_1layers_5e-5lr/checkpoint-1800", "epoch": 10.0, "eval_steps": 500, "global_step": 1800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1388888888888889, "grad_norm": 254960.921875, "learning_rate": 5e-06, "loss": 1.3893, "step": 25 }, { "epoch": 0.2777777777777778, "grad_norm": 486476.5625, "learning_rate": 1e-05, "loss": 1.267, "step": 50 }, { "epoch": 0.4166666666666667, "grad_norm": 159468.171875, "learning_rate": 1.5e-05, "loss": 1.1099, "step": 75 }, { "epoch": 0.5555555555555556, "grad_norm": 305320.75, "learning_rate": 2e-05, "loss": 0.8908, "step": 100 }, { "epoch": 0.6944444444444444, "grad_norm": 265693.59375, "learning_rate": 2.5e-05, "loss": 0.8314, "step": 125 }, { "epoch": 0.8333333333333334, "grad_norm": 186615.65625, "learning_rate": 3e-05, "loss": 0.7751, "step": 150 }, { "epoch": 0.9722222222222222, "grad_norm": 192767.796875, "learning_rate": 3.5e-05, "loss": 0.7468, "step": 175 }, { "epoch": 1.0, "eval_accuracy": 0.8221343873517787, "eval_loss": 0.6694391965866089, "eval_runtime": 37.5561, "eval_samples_per_second": 26.946, "eval_steps_per_second": 0.852, "step": 180 }, { "epoch": 1.1111111111111112, "grad_norm": 349009.5625, "learning_rate": 4e-05, "loss": 0.6529, "step": 200 }, { "epoch": 1.25, "grad_norm": 276312.5, "learning_rate": 4.5e-05, "loss": 0.5645, "step": 225 }, { "epoch": 1.3888888888888888, "grad_norm": 185650.203125, "learning_rate": 5e-05, "loss": 0.5763, "step": 250 }, { "epoch": 1.5277777777777777, "grad_norm": 135137.328125, "learning_rate": 4.9193548387096775e-05, "loss": 0.4923, "step": 275 }, { "epoch": 1.6666666666666665, "grad_norm": 239085.9375, "learning_rate": 4.8387096774193554e-05, "loss": 0.48, "step": 300 }, { "epoch": 1.8055555555555556, "grad_norm": 225795.71875, "learning_rate": 4.7580645161290326e-05, "loss": 0.4791, "step": 325 }, { "epoch": 1.9444444444444444, "grad_norm": 177114.5625, "learning_rate": 4.67741935483871e-05, "loss": 0.4348, "step": 350 }, { "epoch": 2.0, "eval_accuracy": 0.9051383399209486, "eval_loss": 0.45405644178390503, "eval_runtime": 38.6709, "eval_samples_per_second": 26.17, "eval_steps_per_second": 0.827, "step": 360 }, { "epoch": 2.0833333333333335, "grad_norm": 266843.53125, "learning_rate": 4.596774193548387e-05, "loss": 0.4523, "step": 375 }, { "epoch": 2.2222222222222223, "grad_norm": 286068.0625, "learning_rate": 4.516129032258064e-05, "loss": 0.4211, "step": 400 }, { "epoch": 2.361111111111111, "grad_norm": 241806.078125, "learning_rate": 4.435483870967742e-05, "loss": 0.4222, "step": 425 }, { "epoch": 2.5, "grad_norm": 191997.0625, "learning_rate": 4.3548387096774194e-05, "loss": 0.4652, "step": 450 }, { "epoch": 2.638888888888889, "grad_norm": 260036.46875, "learning_rate": 4.2741935483870973e-05, "loss": 0.4236, "step": 475 }, { "epoch": 2.7777777777777777, "grad_norm": 113210.3828125, "learning_rate": 4.1935483870967746e-05, "loss": 0.4085, "step": 500 }, { "epoch": 2.9166666666666665, "grad_norm": 56733.63671875, "learning_rate": 4.112903225806452e-05, "loss": 0.4143, "step": 525 }, { "epoch": 3.0, "eval_accuracy": 0.9130434782608695, "eval_loss": 0.40373140573501587, "eval_runtime": 39.1148, "eval_samples_per_second": 25.873, "eval_steps_per_second": 0.818, "step": 540 }, { "epoch": 3.0555555555555554, "grad_norm": 155552.890625, "learning_rate": 4.032258064516129e-05, "loss": 0.4194, "step": 550 }, { "epoch": 3.1944444444444446, "grad_norm": 137688.90625, "learning_rate": 3.951612903225806e-05, "loss": 0.4101, "step": 575 }, { "epoch": 3.3333333333333335, "grad_norm": 116833.6484375, "learning_rate": 3.870967741935484e-05, "loss": 0.403, "step": 600 }, { "epoch": 3.4722222222222223, "grad_norm": 120877.296875, "learning_rate": 3.7903225806451614e-05, "loss": 0.3997, "step": 625 }, { "epoch": 3.611111111111111, "grad_norm": 227494.65625, "learning_rate": 3.7096774193548386e-05, "loss": 0.413, "step": 650 }, { "epoch": 3.75, "grad_norm": 233935.421875, "learning_rate": 3.6290322580645165e-05, "loss": 0.4388, "step": 675 }, { "epoch": 3.888888888888889, "grad_norm": 232182.8125, "learning_rate": 3.548387096774194e-05, "loss": 0.4151, "step": 700 }, { "epoch": 4.0, "eval_accuracy": 0.9061264822134387, "eval_loss": 0.4072447121143341, "eval_runtime": 39.3667, "eval_samples_per_second": 25.707, "eval_steps_per_second": 0.813, "step": 720 }, { "epoch": 4.027777777777778, "grad_norm": 145487.5625, "learning_rate": 3.467741935483872e-05, "loss": 0.3899, "step": 725 }, { "epoch": 4.166666666666667, "grad_norm": 134005.078125, "learning_rate": 3.387096774193548e-05, "loss": 0.4111, "step": 750 }, { "epoch": 4.305555555555555, "grad_norm": 210573.5625, "learning_rate": 3.306451612903226e-05, "loss": 0.409, "step": 775 }, { "epoch": 4.444444444444445, "grad_norm": 155788.828125, "learning_rate": 3.2258064516129034e-05, "loss": 0.4012, "step": 800 }, { "epoch": 4.583333333333333, "grad_norm": 222779.453125, "learning_rate": 3.1451612903225806e-05, "loss": 0.3789, "step": 825 }, { "epoch": 4.722222222222222, "grad_norm": 159385.59375, "learning_rate": 3.0645161290322585e-05, "loss": 0.4404, "step": 850 }, { "epoch": 4.861111111111111, "grad_norm": 124916.8359375, "learning_rate": 2.9838709677419357e-05, "loss": 0.3993, "step": 875 }, { "epoch": 5.0, "grad_norm": 63225.9609375, "learning_rate": 2.9032258064516133e-05, "loss": 0.3883, "step": 900 }, { "epoch": 5.0, "eval_accuracy": 0.9209486166007905, "eval_loss": 0.4023725092411041, "eval_runtime": 39.0689, "eval_samples_per_second": 25.903, "eval_steps_per_second": 0.819, "step": 900 }, { "epoch": 5.138888888888889, "grad_norm": 188551.28125, "learning_rate": 2.822580645161291e-05, "loss": 0.4119, "step": 925 }, { "epoch": 5.277777777777778, "grad_norm": 92207.1796875, "learning_rate": 2.7419354838709678e-05, "loss": 0.3819, "step": 950 }, { "epoch": 5.416666666666667, "grad_norm": 124627.640625, "learning_rate": 2.661290322580645e-05, "loss": 0.3771, "step": 975 }, { "epoch": 5.555555555555555, "grad_norm": 97066.984375, "learning_rate": 2.5806451612903226e-05, "loss": 0.3962, "step": 1000 }, { "epoch": 5.694444444444445, "grad_norm": 146162.0, "learning_rate": 2.5e-05, "loss": 0.3979, "step": 1025 }, { "epoch": 5.833333333333333, "grad_norm": 165914.25, "learning_rate": 2.4193548387096777e-05, "loss": 0.4063, "step": 1050 }, { "epoch": 5.972222222222222, "grad_norm": 130905.46875, "learning_rate": 2.338709677419355e-05, "loss": 0.4177, "step": 1075 }, { "epoch": 6.0, "eval_accuracy": 0.9120553359683794, "eval_loss": 0.40066784620285034, "eval_runtime": 38.799, "eval_samples_per_second": 26.083, "eval_steps_per_second": 0.825, "step": 1080 }, { "epoch": 6.111111111111111, "grad_norm": 134286.234375, "learning_rate": 2.258064516129032e-05, "loss": 0.4245, "step": 1100 }, { "epoch": 6.25, "grad_norm": 112775.40625, "learning_rate": 2.1774193548387097e-05, "loss": 0.3922, "step": 1125 }, { "epoch": 6.388888888888889, "grad_norm": 139970.015625, "learning_rate": 2.0967741935483873e-05, "loss": 0.4186, "step": 1150 }, { "epoch": 6.527777777777778, "grad_norm": 161092.609375, "learning_rate": 2.0161290322580645e-05, "loss": 0.3729, "step": 1175 }, { "epoch": 6.666666666666667, "grad_norm": 141433.046875, "learning_rate": 1.935483870967742e-05, "loss": 0.3479, "step": 1200 }, { "epoch": 6.805555555555555, "grad_norm": 166351.671875, "learning_rate": 1.8548387096774193e-05, "loss": 0.3921, "step": 1225 }, { "epoch": 6.944444444444445, "grad_norm": 107018.28125, "learning_rate": 1.774193548387097e-05, "loss": 0.3858, "step": 1250 }, { "epoch": 7.0, "eval_accuracy": 0.9140316205533597, "eval_loss": 0.39838895201683044, "eval_runtime": 39.0979, "eval_samples_per_second": 25.884, "eval_steps_per_second": 0.818, "step": 1260 }, { "epoch": 7.083333333333333, "grad_norm": 59167.79296875, "learning_rate": 1.693548387096774e-05, "loss": 0.3798, "step": 1275 }, { "epoch": 7.222222222222222, "grad_norm": 152942.640625, "learning_rate": 1.6129032258064517e-05, "loss": 0.3892, "step": 1300 }, { "epoch": 7.361111111111111, "grad_norm": 177200.453125, "learning_rate": 1.5322580645161292e-05, "loss": 0.3992, "step": 1325 }, { "epoch": 7.5, "grad_norm": 69149.4140625, "learning_rate": 1.4516129032258066e-05, "loss": 0.3791, "step": 1350 }, { "epoch": 7.638888888888889, "grad_norm": 131647.15625, "learning_rate": 1.3709677419354839e-05, "loss": 0.388, "step": 1375 }, { "epoch": 7.777777777777778, "grad_norm": 123180.046875, "learning_rate": 1.2903225806451613e-05, "loss": 0.3916, "step": 1400 }, { "epoch": 7.916666666666667, "grad_norm": 205890.03125, "learning_rate": 1.2096774193548388e-05, "loss": 0.4052, "step": 1425 }, { "epoch": 8.0, "eval_accuracy": 0.9150197628458498, "eval_loss": 0.4063805043697357, "eval_runtime": 39.4618, "eval_samples_per_second": 25.645, "eval_steps_per_second": 0.811, "step": 1440 }, { "epoch": 8.055555555555555, "grad_norm": 87160.3359375, "learning_rate": 1.129032258064516e-05, "loss": 0.3703, "step": 1450 }, { "epoch": 8.194444444444445, "grad_norm": 184019.65625, "learning_rate": 1.0483870967741936e-05, "loss": 0.3886, "step": 1475 }, { "epoch": 8.333333333333334, "grad_norm": 162551.328125, "learning_rate": 9.67741935483871e-06, "loss": 0.4073, "step": 1500 }, { "epoch": 8.472222222222221, "grad_norm": 120735.328125, "learning_rate": 8.870967741935484e-06, "loss": 0.3904, "step": 1525 }, { "epoch": 8.61111111111111, "grad_norm": 75586.015625, "learning_rate": 8.064516129032258e-06, "loss": 0.3725, "step": 1550 }, { "epoch": 8.75, "grad_norm": 128439.8984375, "learning_rate": 7.258064516129033e-06, "loss": 0.357, "step": 1575 }, { "epoch": 8.88888888888889, "grad_norm": 161737.953125, "learning_rate": 6.451612903225806e-06, "loss": 0.3904, "step": 1600 }, { "epoch": 9.0, "eval_accuracy": 0.91600790513834, "eval_loss": 0.39628955721855164, "eval_runtime": 38.6568, "eval_samples_per_second": 26.179, "eval_steps_per_second": 0.828, "step": 1620 }, { "epoch": 9.027777777777779, "grad_norm": 224493.296875, "learning_rate": 5.64516129032258e-06, "loss": 0.372, "step": 1625 }, { "epoch": 9.166666666666666, "grad_norm": 133257.734375, "learning_rate": 4.838709677419355e-06, "loss": 0.3916, "step": 1650 }, { "epoch": 9.305555555555555, "grad_norm": 147201.34375, "learning_rate": 4.032258064516129e-06, "loss": 0.3709, "step": 1675 }, { "epoch": 9.444444444444445, "grad_norm": 134916.09375, "learning_rate": 3.225806451612903e-06, "loss": 0.379, "step": 1700 }, { "epoch": 9.583333333333334, "grad_norm": 103274.6875, "learning_rate": 2.4193548387096776e-06, "loss": 0.3698, "step": 1725 }, { "epoch": 9.722222222222221, "grad_norm": 96143.7421875, "learning_rate": 1.6129032258064516e-06, "loss": 0.3724, "step": 1750 }, { "epoch": 9.86111111111111, "grad_norm": 55614.1875, "learning_rate": 8.064516129032258e-07, "loss": 0.3707, "step": 1775 }, { "epoch": 10.0, "grad_norm": 160736.5625, "learning_rate": 0.0, "loss": 0.3725, "step": 1800 }, { "epoch": 10.0, "eval_accuracy": 0.9120553359683794, "eval_loss": 0.3956221342086792, "eval_runtime": 38.2951, "eval_samples_per_second": 26.426, "eval_steps_per_second": 0.836, "step": 1800 }, { "epoch": 10.0, "step": 1800, "total_flos": 0.0, "train_loss": 0.46868183824751114, "train_runtime": 3707.7308, "train_samples_per_second": 15.454, "train_steps_per_second": 0.485 } ], "logging_steps": 25, "max_steps": 1800, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }