{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 6.0, "eval_steps": 50, "global_step": 600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5, "grad_norm": 2.202085256576538, "learning_rate": 2.75e-05, "loss": 0.7001, "step": 50 }, { "epoch": 0.5, "eval_electronics_AUC": 0.8274661508704062, "eval_electronics_accuracy": 0.5875, "eval_electronics_f1": 0.0, "eval_electronics_loss": 0.7029502987861633, "eval_electronics_macro_f1": 0.3700787401574803, "eval_electronics_micro_f1": 0.5875, "eval_electronics_precision": 0.0, "eval_electronics_recall": 0.0, "eval_electronics_runtime": 1.1511, "eval_electronics_samples_per_second": 347.485, "eval_electronics_specificity": 1.0, "eval_electronics_steps_per_second": 0.869, "step": 50 }, { "epoch": 1.0, "grad_norm": 5.9589738845825195, "learning_rate": 2.5e-05, "loss": 0.6259, "step": 100 }, { "epoch": 1.0, "eval_electronics_AUC": 0.8812894906511928, "eval_electronics_accuracy": 0.805, "eval_electronics_f1": 0.7914438502673797, "eval_electronics_loss": 0.44977375864982605, "eval_electronics_macro_f1": 0.804172629359042, "eval_electronics_micro_f1": 0.805, "eval_electronics_precision": 0.7081339712918661, "eval_electronics_recall": 0.896969696969697, "eval_electronics_runtime": 1.1287, "eval_electronics_samples_per_second": 354.379, "eval_electronics_specificity": 0.7404255319148936, "eval_electronics_steps_per_second": 0.886, "step": 100 }, { "epoch": 1.5, "grad_norm": 15.954824447631836, "learning_rate": 2.25e-05, "loss": 0.5285, "step": 150 }, { "epoch": 1.5, "eval_electronics_AUC": 0.8546228239845262, "eval_electronics_accuracy": 0.75, "eval_electronics_f1": 0.647887323943662, "eval_electronics_loss": 0.5688791275024414, "eval_electronics_macro_f1": 0.7270444371656295, "eval_electronics_micro_f1": 0.75, "eval_electronics_precision": 0.773109243697479, "eval_electronics_recall": 0.5575757575757576, "eval_electronics_runtime": 1.1417, "eval_electronics_samples_per_second": 350.348, "eval_electronics_specificity": 0.8851063829787233, "eval_electronics_steps_per_second": 0.876, "step": 150 }, { "epoch": 2.0, "grad_norm": 2.80739426612854, "learning_rate": 1.9999999999999998e-05, "loss": 0.492, "step": 200 }, { "epoch": 2.0, "eval_electronics_AUC": 0.8878401031592521, "eval_electronics_accuracy": 0.79, "eval_electronics_f1": 0.7741935483870968, "eval_electronics_loss": 0.5164185762405396, "eval_electronics_macro_f1": 0.7889659330720531, "eval_electronics_micro_f1": 0.79, "eval_electronics_precision": 0.6956521739130435, "eval_electronics_recall": 0.8727272727272727, "eval_electronics_runtime": 1.1374, "eval_electronics_samples_per_second": 351.668, "eval_electronics_specificity": 0.7319148936170212, "eval_electronics_steps_per_second": 0.879, "step": 200 }, { "epoch": 2.5, "grad_norm": 48.75403594970703, "learning_rate": 1.7500000000000002e-05, "loss": 0.4042, "step": 250 }, { "epoch": 2.5, "eval_electronics_AUC": 0.8810831721470018, "eval_electronics_accuracy": 0.7525, "eval_electronics_f1": 0.7579462102689487, "eval_electronics_loss": 0.6417726874351501, "eval_electronics_macro_f1": 0.7523746396613286, "eval_electronics_micro_f1": 0.7525, "eval_electronics_precision": 0.6352459016393442, "eval_electronics_recall": 0.9393939393939394, "eval_electronics_runtime": 1.176, "eval_electronics_samples_per_second": 340.145, "eval_electronics_specificity": 0.6212765957446809, "eval_electronics_steps_per_second": 0.85, "step": 250 }, { "epoch": 3.0, "grad_norm": 17.9029541015625, "learning_rate": 1.5e-05, "loss": 0.3676, "step": 300 }, { "epoch": 3.0, "eval_electronics_AUC": 0.8895422308188267, "eval_electronics_accuracy": 0.81, "eval_electronics_f1": 0.7432432432432432, "eval_electronics_loss": 0.5099908709526062, "eval_electronics_macro_f1": 0.7962247962247961, "eval_electronics_micro_f1": 0.81, "eval_electronics_precision": 0.8396946564885496, "eval_electronics_recall": 0.6666666666666666, "eval_electronics_runtime": 1.1391, "eval_electronics_samples_per_second": 351.16, "eval_electronics_specificity": 0.9106382978723404, "eval_electronics_steps_per_second": 0.878, "step": 300 }, { "epoch": 3.5, "grad_norm": 0.9845223426818848, "learning_rate": 1.25e-05, "loss": 0.2304, "step": 350 }, { "epoch": 3.5, "eval_electronics_AUC": 0.885880077369439, "eval_electronics_accuracy": 0.8275, "eval_electronics_f1": 0.7964601769911505, "eval_electronics_loss": 0.5810662508010864, "eval_electronics_macro_f1": 0.8233927783003474, "eval_electronics_micro_f1": 0.8275, "eval_electronics_precision": 0.7758620689655172, "eval_electronics_recall": 0.8181818181818182, "eval_electronics_runtime": 1.1575, "eval_electronics_samples_per_second": 345.569, "eval_electronics_specificity": 0.8340425531914893, "eval_electronics_steps_per_second": 0.864, "step": 350 }, { "epoch": 4.0, "grad_norm": 0.663503110408783, "learning_rate": 9.999999999999999e-06, "loss": 0.2429, "step": 400 }, { "epoch": 4.0, "eval_electronics_AUC": 0.8947001934235976, "eval_electronics_accuracy": 0.825, "eval_electronics_f1": 0.7988505747126436, "eval_electronics_loss": 0.4967746436595917, "eval_electronics_macro_f1": 0.8219916590377377, "eval_electronics_micro_f1": 0.825, "eval_electronics_precision": 0.7595628415300546, "eval_electronics_recall": 0.8424242424242424, "eval_electronics_runtime": 1.1523, "eval_electronics_samples_per_second": 347.119, "eval_electronics_specificity": 0.8127659574468085, "eval_electronics_steps_per_second": 0.868, "step": 400 }, { "epoch": 4.5, "grad_norm": 1.9665794372558594, "learning_rate": 7.5e-06, "loss": 0.1609, "step": 450 }, { "epoch": 4.5, "eval_electronics_AUC": 0.9006576402321084, "eval_electronics_accuracy": 0.815, "eval_electronics_f1": 0.7823529411764706, "eval_electronics_loss": 0.7328701019287109, "eval_electronics_macro_f1": 0.8107416879795397, "eval_electronics_micro_f1": 0.815, "eval_electronics_precision": 0.76, "eval_electronics_recall": 0.806060606060606, "eval_electronics_runtime": 1.1294, "eval_electronics_samples_per_second": 354.163, "eval_electronics_specificity": 0.8212765957446808, "eval_electronics_steps_per_second": 0.885, "step": 450 }, { "epoch": 5.0, "grad_norm": 3.498450994491577, "learning_rate": 4.9999999999999996e-06, "loss": 0.1561, "step": 500 }, { "epoch": 5.0, "eval_electronics_AUC": 0.8925338491295938, "eval_electronics_accuracy": 0.81, "eval_electronics_f1": 0.7639751552795031, "eval_electronics_loss": 0.6941885948181152, "eval_electronics_macro_f1": 0.8024896696899608, "eval_electronics_micro_f1": 0.81, "eval_electronics_precision": 0.7834394904458599, "eval_electronics_recall": 0.7454545454545455, "eval_electronics_runtime": 1.1466, "eval_electronics_samples_per_second": 348.853, "eval_electronics_specificity": 0.8553191489361702, "eval_electronics_steps_per_second": 0.872, "step": 500 }, { "epoch": 5.5, "grad_norm": 15.850218772888184, "learning_rate": 2.4999999999999998e-06, "loss": 0.1236, "step": 550 }, { "epoch": 5.5, "eval_electronics_AUC": 0.8960928433268858, "eval_electronics_accuracy": 0.815, "eval_electronics_f1": 0.7797619047619048, "eval_electronics_loss": 0.7811417579650879, "eval_electronics_macro_f1": 0.8101395730706076, "eval_electronics_micro_f1": 0.815, "eval_electronics_precision": 0.7660818713450293, "eval_electronics_recall": 0.793939393939394, "eval_electronics_runtime": 1.1464, "eval_electronics_samples_per_second": 348.932, "eval_electronics_specificity": 0.8297872340425532, "eval_electronics_steps_per_second": 0.872, "step": 550 }, { "epoch": 6.0, "grad_norm": 0.2115035206079483, "learning_rate": 0.0, "loss": 0.1345, "step": 600 }, { "epoch": 6.0, "eval_electronics_AUC": 0.8953449387491941, "eval_electronics_accuracy": 0.8125, "eval_electronics_f1": 0.7720364741641338, "eval_electronics_loss": 0.7612780928611755, "eval_electronics_macro_f1": 0.8064004026871625, "eval_electronics_micro_f1": 0.8125, "eval_electronics_precision": 0.774390243902439, "eval_electronics_recall": 0.7696969696969697, "eval_electronics_runtime": 1.151, "eval_electronics_samples_per_second": 347.525, "eval_electronics_specificity": 0.8425531914893617, "eval_electronics_steps_per_second": 0.869, "step": 600 } ], "logging_steps": 50, "max_steps": 600, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 814912639488000.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }