{ "best_metric": 2.666287899017334, "best_model_checkpoint": "miner_id_24/checkpoint-30", "epoch": 0.000382616458884673, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.2753881962822434e-05, "eval_loss": 3.4868648052215576, "eval_runtime": 4278.1726, "eval_samples_per_second": 7.717, "eval_steps_per_second": 3.858, "step": 1 }, { "epoch": 3.82616458884673e-05, "grad_norm": 1.6738007068634033, "learning_rate": 6e-05, "loss": 3.5199, "step": 3 }, { "epoch": 6.376940981411218e-05, "eval_loss": 3.4390904903411865, "eval_runtime": 4295.5009, "eval_samples_per_second": 7.686, "eval_steps_per_second": 3.843, "step": 5 }, { "epoch": 7.65232917769346e-05, "grad_norm": 1.7387325763702393, "learning_rate": 0.00012, "loss": 3.3451, "step": 6 }, { "epoch": 0.00011478493766540191, "grad_norm": 1.915514588356018, "learning_rate": 0.00018, "loss": 3.2395, "step": 9 }, { "epoch": 0.00012753881962822435, "eval_loss": 3.032045841217041, "eval_runtime": 4295.6165, "eval_samples_per_second": 7.686, "eval_steps_per_second": 3.843, "step": 10 }, { "epoch": 0.0001530465835538692, "grad_norm": 2.1459999084472656, "learning_rate": 0.00019510565162951537, "loss": 2.9681, "step": 12 }, { "epoch": 0.0001913082294423365, "grad_norm": 1.5466257333755493, "learning_rate": 0.00017071067811865476, "loss": 2.8409, "step": 15 }, { "epoch": 0.0001913082294423365, "eval_loss": 2.7555739879608154, "eval_runtime": 4291.7816, "eval_samples_per_second": 7.692, "eval_steps_per_second": 3.846, "step": 15 }, { "epoch": 0.00022956987533080382, "grad_norm": 1.3524450063705444, "learning_rate": 0.00013090169943749476, "loss": 2.7526, "step": 18 }, { "epoch": 0.0002550776392564487, "eval_loss": 2.691340923309326, "eval_runtime": 4290.55, "eval_samples_per_second": 7.695, "eval_steps_per_second": 3.847, "step": 20 }, { "epoch": 0.0002678315212192711, "grad_norm": 1.37107253074646, "learning_rate": 8.435655349597689e-05, "loss": 2.7938, "step": 21 }, { "epoch": 0.0003060931671077384, "grad_norm": 1.5554951429367065, "learning_rate": 4.12214747707527e-05, "loss": 2.7326, "step": 24 }, { "epoch": 0.00031884704907056085, "eval_loss": 2.6710398197174072, "eval_runtime": 4289.0441, "eval_samples_per_second": 7.697, "eval_steps_per_second": 3.849, "step": 25 }, { "epoch": 0.00034435481299620573, "grad_norm": 1.6495437622070312, "learning_rate": 1.0899347581163221e-05, "loss": 2.7699, "step": 27 }, { "epoch": 0.000382616458884673, "grad_norm": 1.6575912237167358, "learning_rate": 0.0, "loss": 2.7252, "step": 30 }, { "epoch": 0.000382616458884673, "eval_loss": 2.666287899017334, "eval_runtime": 4290.9939, "eval_samples_per_second": 7.694, "eval_steps_per_second": 3.847, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.112850252693504e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }