|
{ |
|
"best_metric": 1.826446294784546, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-1350", |
|
"epoch": 1.1958688168146403, |
|
"eval_steps": 150, |
|
"global_step": 1650, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007247689798876608, |
|
"eval_loss": 3.070380210876465, |
|
"eval_runtime": 85.9153, |
|
"eval_samples_per_second": 13.525, |
|
"eval_steps_per_second": 3.387, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03623844899438304, |
|
"grad_norm": 3.273714542388916, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1084, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07247689798876608, |
|
"grad_norm": 1.639181137084961, |
|
"learning_rate": 0.0001998582695676762, |
|
"loss": 1.989, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10871534698314912, |
|
"grad_norm": 3.0539445877075195, |
|
"learning_rate": 0.00019943348002101371, |
|
"loss": 1.9929, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.10871534698314912, |
|
"eval_loss": 2.0453155040740967, |
|
"eval_runtime": 86.7107, |
|
"eval_samples_per_second": 13.401, |
|
"eval_steps_per_second": 3.356, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.14495379597753216, |
|
"grad_norm": 1.7175414562225342, |
|
"learning_rate": 0.00019872683547213446, |
|
"loss": 1.9457, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1811922449719152, |
|
"grad_norm": 3.7000324726104736, |
|
"learning_rate": 0.00019774033898178667, |
|
"loss": 1.9367, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.21743069396629824, |
|
"grad_norm": 2.244403123855591, |
|
"learning_rate": 0.0001964767868814516, |
|
"loss": 1.9372, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.21743069396629824, |
|
"eval_loss": 1.9559648036956787, |
|
"eval_runtime": 86.7219, |
|
"eval_samples_per_second": 13.399, |
|
"eval_steps_per_second": 3.356, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.25366914296068127, |
|
"grad_norm": 2.310765027999878, |
|
"learning_rate": 0.00019493976084683813, |
|
"loss": 1.9331, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2899075919550643, |
|
"grad_norm": 2.4135353565216064, |
|
"learning_rate": 0.00019313361774523385, |
|
"loss": 1.9337, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3261460409494474, |
|
"grad_norm": 3.474848747253418, |
|
"learning_rate": 0.00019106347728549135, |
|
"loss": 1.9164, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3261460409494474, |
|
"eval_loss": 1.952536702156067, |
|
"eval_runtime": 86.9956, |
|
"eval_samples_per_second": 13.357, |
|
"eval_steps_per_second": 3.345, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3623844899438304, |
|
"grad_norm": 2.140085458755493, |
|
"learning_rate": 0.00018873520750565718, |
|
"loss": 1.877, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.39862293893821343, |
|
"grad_norm": 2.0083487033843994, |
|
"learning_rate": 0.0001861554081393806, |
|
"loss": 1.8591, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4348613879325965, |
|
"grad_norm": 3.061316967010498, |
|
"learning_rate": 0.0001833313919082515, |
|
"loss": 1.9117, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4348613879325965, |
|
"eval_loss": 1.942734956741333, |
|
"eval_runtime": 86.6654, |
|
"eval_samples_per_second": 13.408, |
|
"eval_steps_per_second": 3.358, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.47109983692697954, |
|
"grad_norm": 2.36161470413208, |
|
"learning_rate": 0.00018027116379309638, |
|
"loss": 1.9137, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5073382859213625, |
|
"grad_norm": 2.791883707046509, |
|
"learning_rate": 0.00017698339834299061, |
|
"loss": 1.9284, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5435767349157457, |
|
"grad_norm": 2.8679757118225098, |
|
"learning_rate": 0.00017347741508630672, |
|
"loss": 1.8691, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5435767349157457, |
|
"eval_loss": 1.9142440557479858, |
|
"eval_runtime": 86.7231, |
|
"eval_samples_per_second": 13.399, |
|
"eval_steps_per_second": 3.356, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5798151839101287, |
|
"grad_norm": 2.4429304599761963, |
|
"learning_rate": 0.0001697631521134985, |
|
"loss": 1.9101, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6160536329045116, |
|
"grad_norm": 2.8223161697387695, |
|
"learning_rate": 0.00016585113790650388, |
|
"loss": 1.8409, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6522920818988948, |
|
"grad_norm": 1.8749058246612549, |
|
"learning_rate": 0.0001617524614946192, |
|
"loss": 1.8731, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6522920818988948, |
|
"eval_loss": 1.8797276020050049, |
|
"eval_runtime": 86.9372, |
|
"eval_samples_per_second": 13.366, |
|
"eval_steps_per_second": 3.347, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6885305308932778, |
|
"grad_norm": 3.5651133060455322, |
|
"learning_rate": 0.0001574787410214407, |
|
"loss": 1.8334, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7247689798876608, |
|
"grad_norm": 2.771768569946289, |
|
"learning_rate": 0.00015304209081197425, |
|
"loss": 1.8534, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7610074288820439, |
|
"grad_norm": 2.3072197437286377, |
|
"learning_rate": 0.00014845508703326504, |
|
"loss": 1.8311, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7610074288820439, |
|
"eval_loss": 1.865134835243225, |
|
"eval_runtime": 86.9546, |
|
"eval_samples_per_second": 13.363, |
|
"eval_steps_per_second": 3.347, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7972458778764269, |
|
"grad_norm": 2.7600791454315186, |
|
"learning_rate": 0.00014373073204588556, |
|
"loss": 1.8482, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.83348432687081, |
|
"grad_norm": 3.6101410388946533, |
|
"learning_rate": 0.00013888241754733208, |
|
"loss": 1.8151, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.869722775865193, |
|
"grad_norm": 2.4437174797058105, |
|
"learning_rate": 0.00013392388661180303, |
|
"loss": 1.836, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.869722775865193, |
|
"eval_loss": 1.8474547863006592, |
|
"eval_runtime": 86.4314, |
|
"eval_samples_per_second": 13.444, |
|
"eval_steps_per_second": 3.367, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.905961224859576, |
|
"grad_norm": 2.752454996109009, |
|
"learning_rate": 0.0001288691947339621, |
|
"loss": 1.8837, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9421996738539591, |
|
"grad_norm": 2.2887587547302246, |
|
"learning_rate": 0.0001237326699871115, |
|
"loss": 1.8392, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9784381228483421, |
|
"grad_norm": 4.001684665679932, |
|
"learning_rate": 0.00011852887240871145, |
|
"loss": 1.8195, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9784381228483421, |
|
"eval_loss": 1.826446294784546, |
|
"eval_runtime": 86.7246, |
|
"eval_samples_per_second": 13.399, |
|
"eval_steps_per_second": 3.355, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.014676571842725, |
|
"grad_norm": 0.9796865582466125, |
|
"learning_rate": 0.00011327255272837221, |
|
"loss": 1.7798, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.050915020837108, |
|
"grad_norm": 0.8189029693603516, |
|
"learning_rate": 0.00010797861055530831, |
|
"loss": 1.4192, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.0871534698314913, |
|
"grad_norm": 0.9342411756515503, |
|
"learning_rate": 0.00010266205214377748, |
|
"loss": 1.4029, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0871534698314913, |
|
"eval_loss": 1.8509882688522339, |
|
"eval_runtime": 86.7099, |
|
"eval_samples_per_second": 13.401, |
|
"eval_steps_per_second": 3.356, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.1233919188258743, |
|
"grad_norm": 0.8776970505714417, |
|
"learning_rate": 9.733794785622253e-05, |
|
"loss": 1.3963, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.1596303678202573, |
|
"grad_norm": 0.9756889939308167, |
|
"learning_rate": 9.202138944469168e-05, |
|
"loss": 1.393, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.1958688168146403, |
|
"grad_norm": 0.8379771113395691, |
|
"learning_rate": 8.672744727162781e-05, |
|
"loss": 1.4002, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.1958688168146403, |
|
"eval_loss": 1.837064504623413, |
|
"eval_runtime": 86.6691, |
|
"eval_samples_per_second": 13.407, |
|
"eval_steps_per_second": 3.358, |
|
"step": 1650 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 3000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 150, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 2, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 2 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1904824490055434e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|