{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999761032612221, "eval_steps": 500, "global_step": 36615, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013655279301669017, "grad_norm": 1.5929315090179443, "learning_rate": 4.932268196094497e-05, "loss": 4.1479, "step": 500 }, { "epoch": 0.027310558603338034, "grad_norm": 1.3556170463562012, "learning_rate": 4.8639901679639495e-05, "loss": 4.0077, "step": 1000 }, { "epoch": 0.040965837905007046, "grad_norm": 1.5639110803604126, "learning_rate": 4.795712139833402e-05, "loss": 3.9642, "step": 1500 }, { "epoch": 0.05462111720667607, "grad_norm": 1.4091506004333496, "learning_rate": 4.727434111702854e-05, "loss": 3.9318, "step": 2000 }, { "epoch": 0.06827639650834508, "grad_norm": 2.065875768661499, "learning_rate": 4.659292639628568e-05, "loss": 3.9178, "step": 2500 }, { "epoch": 0.08193167581001409, "grad_norm": 1.561885118484497, "learning_rate": 4.59101461149802e-05, "loss": 3.9145, "step": 3000 }, { "epoch": 0.09558695511168312, "grad_norm": 1.4199737310409546, "learning_rate": 4.522736583367472e-05, "loss": 3.8947, "step": 3500 }, { "epoch": 0.10924223441335214, "grad_norm": 1.28683340549469, "learning_rate": 4.454458555236925e-05, "loss": 3.8839, "step": 4000 }, { "epoch": 0.12289751371502115, "grad_norm": 1.4174977540969849, "learning_rate": 4.386180527106377e-05, "loss": 3.8799, "step": 4500 }, { "epoch": 0.13655279301669015, "grad_norm": 1.7308415174484253, "learning_rate": 4.31790249897583e-05, "loss": 3.8645, "step": 5000 }, { "epoch": 0.15020807231835917, "grad_norm": 1.5396959781646729, "learning_rate": 4.249624470845282e-05, "loss": 3.8627, "step": 5500 }, { "epoch": 0.16386335162002819, "grad_norm": 1.4415068626403809, "learning_rate": 4.181482998770996e-05, "loss": 3.8429, "step": 6000 }, { "epoch": 0.1775186309216972, "grad_norm": 2.0282580852508545, "learning_rate": 4.113204970640448e-05, "loss": 3.8493, "step": 6500 }, { "epoch": 0.19117391022336624, "grad_norm": 1.4611490964889526, "learning_rate": 4.044926942509901e-05, "loss": 3.8486, "step": 7000 }, { "epoch": 0.20482918952503526, "grad_norm": 1.350560188293457, "learning_rate": 3.976648914379353e-05, "loss": 3.8321, "step": 7500 }, { "epoch": 0.21848446882670428, "grad_norm": 2.157869815826416, "learning_rate": 3.9085074423050665e-05, "loss": 3.8313, "step": 8000 }, { "epoch": 0.2321397481283733, "grad_norm": 1.4486709833145142, "learning_rate": 3.840229414174519e-05, "loss": 3.8381, "step": 8500 }, { "epoch": 0.2457950274300423, "grad_norm": 1.6016576290130615, "learning_rate": 3.7719513860439716e-05, "loss": 3.8231, "step": 9000 }, { "epoch": 0.2594503067317113, "grad_norm": 1.3553367853164673, "learning_rate": 3.7036733579134235e-05, "loss": 3.8324, "step": 9500 }, { "epoch": 0.2731055860333803, "grad_norm": 1.6493974924087524, "learning_rate": 3.635395329782876e-05, "loss": 3.8101, "step": 10000 }, { "epoch": 0.28676086533504935, "grad_norm": 1.4142000675201416, "learning_rate": 3.5671173016523286e-05, "loss": 3.8158, "step": 10500 }, { "epoch": 0.30041614463671834, "grad_norm": 1.528771162033081, "learning_rate": 3.4989758295780415e-05, "loss": 3.81, "step": 11000 }, { "epoch": 0.3140714239383874, "grad_norm": 1.4616787433624268, "learning_rate": 3.430697801447494e-05, "loss": 3.8223, "step": 11500 }, { "epoch": 0.32772670324005637, "grad_norm": 1.4154361486434937, "learning_rate": 3.362419773316947e-05, "loss": 3.8105, "step": 12000 }, { "epoch": 0.3413819825417254, "grad_norm": 1.5867959260940552, "learning_rate": 3.294141745186399e-05, "loss": 3.7967, "step": 12500 }, { "epoch": 0.3550372618433944, "grad_norm": 1.3519176244735718, "learning_rate": 3.225863717055851e-05, "loss": 3.8049, "step": 13000 }, { "epoch": 0.36869254114506345, "grad_norm": 1.7033674716949463, "learning_rate": 3.1575856889253044e-05, "loss": 3.7998, "step": 13500 }, { "epoch": 0.3823478204467325, "grad_norm": 1.695982813835144, "learning_rate": 3.089444216851017e-05, "loss": 3.803, "step": 14000 }, { "epoch": 0.3960030997484015, "grad_norm": 1.7947542667388916, "learning_rate": 3.0211661887204702e-05, "loss": 3.7871, "step": 14500 }, { "epoch": 0.4096583790500705, "grad_norm": 1.637911319732666, "learning_rate": 2.952888160589922e-05, "loss": 3.8051, "step": 15000 }, { "epoch": 0.4233136583517395, "grad_norm": 1.2652530670166016, "learning_rate": 2.884610132459375e-05, "loss": 3.8007, "step": 15500 }, { "epoch": 0.43696893765340855, "grad_norm": 1.6936414241790771, "learning_rate": 2.8163321043288272e-05, "loss": 3.7773, "step": 16000 }, { "epoch": 0.45062421695507754, "grad_norm": 1.5409188270568848, "learning_rate": 2.7480540761982794e-05, "loss": 3.7779, "step": 16500 }, { "epoch": 0.4642794962567466, "grad_norm": 2.597111940383911, "learning_rate": 2.679776048067732e-05, "loss": 3.7858, "step": 17000 }, { "epoch": 0.47793477555841557, "grad_norm": 1.1228764057159424, "learning_rate": 2.6114980199371842e-05, "loss": 3.7722, "step": 17500 }, { "epoch": 0.4915900548600846, "grad_norm": 1.2788695096969604, "learning_rate": 2.5433565478628978e-05, "loss": 3.7811, "step": 18000 }, { "epoch": 0.5052453341617537, "grad_norm": 1.389441967010498, "learning_rate": 2.4750785197323504e-05, "loss": 3.7822, "step": 18500 }, { "epoch": 0.5189006134634226, "grad_norm": 3.613802671432495, "learning_rate": 2.4068004916018026e-05, "loss": 3.7919, "step": 19000 }, { "epoch": 0.5325558927650916, "grad_norm": 2.1549434661865234, "learning_rate": 2.338522463471255e-05, "loss": 3.7681, "step": 19500 }, { "epoch": 0.5462111720667606, "grad_norm": 1.6741634607315063, "learning_rate": 2.2703809913969688e-05, "loss": 3.7637, "step": 20000 }, { "epoch": 0.5598664513684297, "grad_norm": 1.480649709701538, "learning_rate": 2.202102963266421e-05, "loss": 3.7834, "step": 20500 }, { "epoch": 0.5735217306700987, "grad_norm": 1.560128927230835, "learning_rate": 2.1339614911921342e-05, "loss": 3.7737, "step": 21000 }, { "epoch": 0.5871770099717677, "grad_norm": 1.679948329925537, "learning_rate": 2.0656834630615868e-05, "loss": 3.778, "step": 21500 }, { "epoch": 0.6008322892734367, "grad_norm": 1.6976228952407837, "learning_rate": 1.9974054349310394e-05, "loss": 3.7753, "step": 22000 }, { "epoch": 0.6144875685751058, "grad_norm": 1.3865044116973877, "learning_rate": 1.9291274068004916e-05, "loss": 3.7695, "step": 22500 }, { "epoch": 0.6281428478767748, "grad_norm": 1.9324711561203003, "learning_rate": 1.860849378669944e-05, "loss": 3.7676, "step": 23000 }, { "epoch": 0.6417981271784438, "grad_norm": 1.169581651687622, "learning_rate": 1.7925713505393964e-05, "loss": 3.7722, "step": 23500 }, { "epoch": 0.6554534064801127, "grad_norm": 1.5867211818695068, "learning_rate": 1.72442987846511e-05, "loss": 3.7656, "step": 24000 }, { "epoch": 0.6691086857817818, "grad_norm": 1.4529768228530884, "learning_rate": 1.6561518503345625e-05, "loss": 3.7839, "step": 24500 }, { "epoch": 0.6827639650834508, "grad_norm": 1.6566076278686523, "learning_rate": 1.5878738222040148e-05, "loss": 3.7721, "step": 25000 }, { "epoch": 0.6964192443851198, "grad_norm": 2.1190998554229736, "learning_rate": 1.5195957940734673e-05, "loss": 3.7658, "step": 25500 }, { "epoch": 0.7100745236867888, "grad_norm": 1.4100236892700195, "learning_rate": 1.4513177659429197e-05, "loss": 3.764, "step": 26000 }, { "epoch": 0.7237298029884579, "grad_norm": 1.9166449308395386, "learning_rate": 1.3830397378123721e-05, "loss": 3.7658, "step": 26500 }, { "epoch": 0.7373850822901269, "grad_norm": 1.3906147480010986, "learning_rate": 1.3147617096818243e-05, "loss": 3.7643, "step": 27000 }, { "epoch": 0.7510403615917959, "grad_norm": 2.0951671600341797, "learning_rate": 1.2464836815512769e-05, "loss": 3.7537, "step": 27500 }, { "epoch": 0.764695640893465, "grad_norm": 1.4202444553375244, "learning_rate": 1.1783422094769903e-05, "loss": 3.7591, "step": 28000 }, { "epoch": 0.778350920195134, "grad_norm": 1.5016367435455322, "learning_rate": 1.1100641813464429e-05, "loss": 3.7615, "step": 28500 }, { "epoch": 0.792006199496803, "grad_norm": 2.102144241333008, "learning_rate": 1.0417861532158951e-05, "loss": 3.7697, "step": 29000 }, { "epoch": 0.8056614787984719, "grad_norm": 1.3535877466201782, "learning_rate": 9.735081250853475e-06, "loss": 3.7596, "step": 29500 }, { "epoch": 0.819316758100141, "grad_norm": 1.6755424737930298, "learning_rate": 9.053666530110611e-06, "loss": 3.7452, "step": 30000 }, { "epoch": 0.83297203740181, "grad_norm": 1.6872743368148804, "learning_rate": 8.370886248805135e-06, "loss": 3.7643, "step": 30500 }, { "epoch": 0.846627316703479, "grad_norm": 1.7479641437530518, "learning_rate": 7.688105967499659e-06, "loss": 3.7435, "step": 31000 }, { "epoch": 0.860282596005148, "grad_norm": 1.997037649154663, "learning_rate": 7.005325686194184e-06, "loss": 3.7504, "step": 31500 }, { "epoch": 0.8739378753068171, "grad_norm": 1.7758591175079346, "learning_rate": 6.323910965451318e-06, "loss": 3.7606, "step": 32000 }, { "epoch": 0.8875931546084861, "grad_norm": 1.2326438426971436, "learning_rate": 5.641130684145842e-06, "loss": 3.7513, "step": 32500 }, { "epoch": 0.9012484339101551, "grad_norm": 1.5938501358032227, "learning_rate": 4.958350402840366e-06, "loss": 3.7518, "step": 33000 }, { "epoch": 0.9149037132118241, "grad_norm": 1.4747166633605957, "learning_rate": 4.27557012153489e-06, "loss": 3.7528, "step": 33500 }, { "epoch": 0.9285589925134932, "grad_norm": 1.4525431394577026, "learning_rate": 3.5955209613546364e-06, "loss": 3.7514, "step": 34000 }, { "epoch": 0.9422142718151622, "grad_norm": 1.4325453042984009, "learning_rate": 2.9127406800491604e-06, "loss": 3.758, "step": 34500 }, { "epoch": 0.9558695511168311, "grad_norm": 1.4729219675064087, "learning_rate": 2.2299603987436843e-06, "loss": 3.7591, "step": 35000 }, { "epoch": 0.9695248304185001, "grad_norm": 1.5900017023086548, "learning_rate": 1.5471801174382084e-06, "loss": 3.7568, "step": 35500 }, { "epoch": 0.9831801097201692, "grad_norm": 1.5197690725326538, "learning_rate": 8.643998361327326e-07, "loss": 3.754, "step": 36000 }, { "epoch": 0.9968353890218382, "grad_norm": 1.3200616836547852, "learning_rate": 1.8161955482725659e-07, "loss": 3.7643, "step": 36500 }, { "epoch": 0.9999761032612221, "step": 36615, "total_flos": 4955540060897280.0, "train_loss": 3.805272859070416, "train_runtime": 26930.0964, "train_samples_per_second": 10.877, "train_steps_per_second": 1.36 } ], "logging_steps": 500, "max_steps": 36615, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4955540060897280.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }