{ "best_metric": 0.0022578395437449217, "best_model_checkpoint": "project-name/checkpoint-1806", "epoch": 3.0, "eval_steps": 500, "global_step": 1806, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04152823920265781, "grad_norm": 5.379489898681641, "learning_rate": 6.906077348066299e-06, "loss": 3.8859, "step": 25 }, { "epoch": 0.08305647840531562, "grad_norm": 8.003674507141113, "learning_rate": 1.3812154696132598e-05, "loss": 3.4497, "step": 50 }, { "epoch": 0.12458471760797342, "grad_norm": 4.87650203704834, "learning_rate": 2.0718232044198896e-05, "loss": 2.6429, "step": 75 }, { "epoch": 0.16611295681063123, "grad_norm": 3.833146095275879, "learning_rate": 2.7624309392265197e-05, "loss": 1.5907, "step": 100 }, { "epoch": 0.20764119601328904, "grad_norm": 2.214146614074707, "learning_rate": 3.4530386740331494e-05, "loss": 0.7225, "step": 125 }, { "epoch": 0.24916943521594684, "grad_norm": 1.3430291414260864, "learning_rate": 4.143646408839779e-05, "loss": 0.3209, "step": 150 }, { "epoch": 0.29069767441860467, "grad_norm": 0.761883556842804, "learning_rate": 4.834254143646409e-05, "loss": 0.2108, "step": 175 }, { "epoch": 0.33222591362126247, "grad_norm": 1.163543939590454, "learning_rate": 4.941538461538462e-05, "loss": 0.1191, "step": 200 }, { "epoch": 0.37375415282392027, "grad_norm": 0.919347882270813, "learning_rate": 4.864615384615385e-05, "loss": 0.1199, "step": 225 }, { "epoch": 0.4152823920265781, "grad_norm": 1.222544550895691, "learning_rate": 4.787692307692308e-05, "loss": 0.0968, "step": 250 }, { "epoch": 0.4568106312292359, "grad_norm": 1.4635224342346191, "learning_rate": 4.710769230769231e-05, "loss": 0.097, "step": 275 }, { "epoch": 0.4983388704318937, "grad_norm": 1.3402526378631592, "learning_rate": 4.633846153846154e-05, "loss": 0.0689, "step": 300 }, { "epoch": 0.5398671096345515, "grad_norm": 0.8671730160713196, "learning_rate": 4.556923076923077e-05, "loss": 0.0985, "step": 325 }, { "epoch": 0.5813953488372093, "grad_norm": 1.5626745223999023, "learning_rate": 4.4800000000000005e-05, "loss": 0.1026, "step": 350 }, { "epoch": 0.6229235880398671, "grad_norm": 0.6234097480773926, "learning_rate": 4.403076923076923e-05, "loss": 0.1204, "step": 375 }, { "epoch": 0.6644518272425249, "grad_norm": 1.1657061576843262, "learning_rate": 4.3261538461538464e-05, "loss": 0.0758, "step": 400 }, { "epoch": 0.7059800664451827, "grad_norm": 0.9871473908424377, "learning_rate": 4.2492307692307694e-05, "loss": 0.1, "step": 425 }, { "epoch": 0.7475083056478405, "grad_norm": 0.6370132565498352, "learning_rate": 4.1723076923076924e-05, "loss": 0.078, "step": 450 }, { "epoch": 0.7890365448504983, "grad_norm": 1.5644354820251465, "learning_rate": 4.0953846153846154e-05, "loss": 0.0815, "step": 475 }, { "epoch": 0.8305647840531561, "grad_norm": 2.4982550144195557, "learning_rate": 4.018461538461539e-05, "loss": 0.0825, "step": 500 }, { "epoch": 0.872093023255814, "grad_norm": 1.3120901584625244, "learning_rate": 3.941538461538461e-05, "loss": 0.0659, "step": 525 }, { "epoch": 0.9136212624584718, "grad_norm": 1.236675500869751, "learning_rate": 3.864615384615385e-05, "loss": 0.0628, "step": 550 }, { "epoch": 0.9551495016611296, "grad_norm": 1.1188125610351562, "learning_rate": 3.787692307692308e-05, "loss": 0.0762, "step": 575 }, { "epoch": 0.9966777408637874, "grad_norm": 1.2520239353179932, "learning_rate": 3.710769230769231e-05, "loss": 0.0513, "step": 600 }, { "epoch": 1.0, "eval_accuracy": 0.9968161683277962, "eval_f1_macro": 0.9965727646072912, "eval_f1_micro": 0.9968161683277962, "eval_f1_weighted": 0.9968213765950431, "eval_loss": 0.008436347357928753, "eval_precision_macro": 0.9963284135987243, "eval_precision_micro": 0.9968161683277962, "eval_precision_weighted": 0.9969410521911278, "eval_recall_macro": 0.9969503142190713, "eval_recall_micro": 0.9968161683277962, "eval_recall_weighted": 0.9968161683277962, "eval_runtime": 66.3126, "eval_samples_per_second": 108.939, "eval_steps_per_second": 1.146, "step": 602 }, { "epoch": 1.0382059800664452, "grad_norm": 0.6366732716560364, "learning_rate": 3.633846153846154e-05, "loss": 0.0526, "step": 625 }, { "epoch": 1.079734219269103, "grad_norm": 1.8220373392105103, "learning_rate": 3.556923076923077e-05, "loss": 0.0818, "step": 650 }, { "epoch": 1.1212624584717608, "grad_norm": 1.0081020593643188, "learning_rate": 3.48e-05, "loss": 0.0778, "step": 675 }, { "epoch": 1.1627906976744187, "grad_norm": 0.17098402976989746, "learning_rate": 3.4030769230769235e-05, "loss": 0.0875, "step": 700 }, { "epoch": 1.2043189368770764, "grad_norm": 0.3888116776943207, "learning_rate": 3.326153846153846e-05, "loss": 0.0604, "step": 725 }, { "epoch": 1.2458471760797343, "grad_norm": 0.4702301621437073, "learning_rate": 3.2492307692307695e-05, "loss": 0.0613, "step": 750 }, { "epoch": 1.287375415282392, "grad_norm": 0.26242753863334656, "learning_rate": 3.1723076923076925e-05, "loss": 0.0462, "step": 775 }, { "epoch": 1.3289036544850499, "grad_norm": 1.047985553741455, "learning_rate": 3.0953846153846155e-05, "loss": 0.0419, "step": 800 }, { "epoch": 1.3704318936877076, "grad_norm": 0.8426637649536133, "learning_rate": 3.0184615384615384e-05, "loss": 0.058, "step": 825 }, { "epoch": 1.4119601328903655, "grad_norm": 0.32604917883872986, "learning_rate": 2.9415384615384618e-05, "loss": 0.0622, "step": 850 }, { "epoch": 1.4534883720930232, "grad_norm": 0.9149531126022339, "learning_rate": 2.8646153846153844e-05, "loss": 0.077, "step": 875 }, { "epoch": 1.495016611295681, "grad_norm": 1.723166584968567, "learning_rate": 2.7876923076923077e-05, "loss": 0.0556, "step": 900 }, { "epoch": 1.536544850498339, "grad_norm": 0.5741901993751526, "learning_rate": 2.710769230769231e-05, "loss": 0.0515, "step": 925 }, { "epoch": 1.5780730897009967, "grad_norm": 1.2806332111358643, "learning_rate": 2.633846153846154e-05, "loss": 0.0624, "step": 950 }, { "epoch": 1.6196013289036544, "grad_norm": 0.016820058226585388, "learning_rate": 2.5569230769230773e-05, "loss": 0.0567, "step": 975 }, { "epoch": 1.6611295681063123, "grad_norm": 0.07608214765787125, "learning_rate": 2.48e-05, "loss": 0.0479, "step": 1000 }, { "epoch": 1.7026578073089702, "grad_norm": 0.5414699912071228, "learning_rate": 2.4030769230769233e-05, "loss": 0.0544, "step": 1025 }, { "epoch": 1.744186046511628, "grad_norm": 1.1329694986343384, "learning_rate": 2.3261538461538463e-05, "loss": 0.0637, "step": 1050 }, { "epoch": 1.7857142857142856, "grad_norm": 0.3416385352611542, "learning_rate": 2.2492307692307692e-05, "loss": 0.0426, "step": 1075 }, { "epoch": 1.8272425249169435, "grad_norm": 0.03737162798643112, "learning_rate": 2.1723076923076922e-05, "loss": 0.0555, "step": 1100 }, { "epoch": 1.8687707641196014, "grad_norm": 1.7364981174468994, "learning_rate": 2.0953846153846155e-05, "loss": 0.0638, "step": 1125 }, { "epoch": 1.910299003322259, "grad_norm": 2.366119384765625, "learning_rate": 2.0184615384615385e-05, "loss": 0.0412, "step": 1150 }, { "epoch": 1.9518272425249168, "grad_norm": 0.04764602333307266, "learning_rate": 1.9415384615384615e-05, "loss": 0.047, "step": 1175 }, { "epoch": 1.9933554817275747, "grad_norm": 1.8723899126052856, "learning_rate": 1.8646153846153845e-05, "loss": 0.0681, "step": 1200 }, { "epoch": 2.0, "eval_accuracy": 0.9994462901439646, "eval_f1_macro": 0.9993863106544856, "eval_f1_micro": 0.9994462901439646, "eval_f1_weighted": 0.999446389268536, "eval_loss": 0.0026618363335728645, "eval_precision_macro": 0.9993719331532139, "eval_precision_micro": 0.9994462901439646, "eval_precision_weighted": 0.9994504223070693, "eval_recall_macro": 0.9994055488584712, "eval_recall_micro": 0.9994462901439646, "eval_recall_weighted": 0.9994462901439646, "eval_runtime": 66.1379, "eval_samples_per_second": 109.226, "eval_steps_per_second": 1.149, "step": 1204 }, { "epoch": 2.0348837209302326, "grad_norm": 1.302878975868225, "learning_rate": 1.7876923076923078e-05, "loss": 0.0419, "step": 1225 }, { "epoch": 2.0764119601328903, "grad_norm": 0.5144720673561096, "learning_rate": 1.7107692307692308e-05, "loss": 0.062, "step": 1250 }, { "epoch": 2.117940199335548, "grad_norm": 0.30704957246780396, "learning_rate": 1.6338461538461538e-05, "loss": 0.051, "step": 1275 }, { "epoch": 2.159468438538206, "grad_norm": 0.014852220192551613, "learning_rate": 1.556923076923077e-05, "loss": 0.0598, "step": 1300 }, { "epoch": 2.200996677740864, "grad_norm": 0.3403995633125305, "learning_rate": 1.48e-05, "loss": 0.046, "step": 1325 }, { "epoch": 2.2425249169435215, "grad_norm": 0.502398669719696, "learning_rate": 1.403076923076923e-05, "loss": 0.0461, "step": 1350 }, { "epoch": 2.284053156146179, "grad_norm": 0.31954801082611084, "learning_rate": 1.3261538461538462e-05, "loss": 0.0648, "step": 1375 }, { "epoch": 2.3255813953488373, "grad_norm": 0.3058352470397949, "learning_rate": 1.2492307692307693e-05, "loss": 0.0501, "step": 1400 }, { "epoch": 2.367109634551495, "grad_norm": 1.0894181728363037, "learning_rate": 1.1723076923076925e-05, "loss": 0.0373, "step": 1425 }, { "epoch": 2.4086378737541527, "grad_norm": 0.060482293367385864, "learning_rate": 1.0953846153846155e-05, "loss": 0.0511, "step": 1450 }, { "epoch": 2.4501661129568104, "grad_norm": 0.31768345832824707, "learning_rate": 1.0184615384615386e-05, "loss": 0.0478, "step": 1475 }, { "epoch": 2.4916943521594686, "grad_norm": 0.1634824424982071, "learning_rate": 9.415384615384616e-06, "loss": 0.0504, "step": 1500 }, { "epoch": 2.5332225913621262, "grad_norm": 0.30424895882606506, "learning_rate": 8.646153846153847e-06, "loss": 0.0533, "step": 1525 }, { "epoch": 2.574750830564784, "grad_norm": 1.7913155555725098, "learning_rate": 7.876923076923077e-06, "loss": 0.055, "step": 1550 }, { "epoch": 2.616279069767442, "grad_norm": 0.3424425423145294, "learning_rate": 7.107692307692308e-06, "loss": 0.0533, "step": 1575 }, { "epoch": 2.6578073089700998, "grad_norm": 0.02855181321501732, "learning_rate": 6.338461538461539e-06, "loss": 0.066, "step": 1600 }, { "epoch": 2.6993355481727574, "grad_norm": 0.3200845718383789, "learning_rate": 5.56923076923077e-06, "loss": 0.0542, "step": 1625 }, { "epoch": 2.740863787375415, "grad_norm": 0.6209678649902344, "learning_rate": 4.800000000000001e-06, "loss": 0.0513, "step": 1650 }, { "epoch": 2.782392026578073, "grad_norm": 0.009850960224866867, "learning_rate": 4.030769230769231e-06, "loss": 0.0357, "step": 1675 }, { "epoch": 2.823920265780731, "grad_norm": 0.5470796823501587, "learning_rate": 3.261538461538462e-06, "loss": 0.0672, "step": 1700 }, { "epoch": 2.8654485049833887, "grad_norm": 0.3075508177280426, "learning_rate": 2.4923076923076926e-06, "loss": 0.0487, "step": 1725 }, { "epoch": 2.9069767441860463, "grad_norm": 1.3108922243118286, "learning_rate": 1.7230769230769232e-06, "loss": 0.0415, "step": 1750 }, { "epoch": 2.9485049833887045, "grad_norm": 0.3696553409099579, "learning_rate": 9.53846153846154e-07, "loss": 0.0432, "step": 1775 }, { "epoch": 2.990033222591362, "grad_norm": 0.40178290009498596, "learning_rate": 1.846153846153846e-07, "loss": 0.0603, "step": 1800 }, { "epoch": 3.0, "eval_accuracy": 0.9994462901439646, "eval_f1_macro": 0.9993728898422963, "eval_f1_micro": 0.9994462901439646, "eval_f1_weighted": 0.9994461479999952, "eval_loss": 0.0022578395437449217, "eval_precision_macro": 0.999304490903036, "eval_precision_micro": 0.9994462901439646, "eval_precision_weighted": 0.9994521433108384, "eval_recall_macro": 0.999448205969945, "eval_recall_micro": 0.9994462901439646, "eval_recall_weighted": 0.9994462901439646, "eval_runtime": 64.626, "eval_samples_per_second": 111.782, "eval_steps_per_second": 1.176, "step": 1806 } ], "logging_steps": 25, "max_steps": 1806, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.032655999996527e+19, "train_batch_size": 48, "trial_name": null, "trial_params": null }