{ "best_metric": 1.3446539640426636, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 1.0021413276231264, "eval_steps": 100, "global_step": 117, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008565310492505354, "grad_norm": 0.8522309064865112, "learning_rate": 5e-06, "loss": 1.5848, "step": 1 }, { "epoch": 0.008565310492505354, "eval_loss": 1.8308545351028442, "eval_runtime": 14.8698, "eval_samples_per_second": 13.248, "eval_steps_per_second": 3.363, "step": 1 }, { "epoch": 0.017130620985010708, "grad_norm": 1.066685676574707, "learning_rate": 1e-05, "loss": 1.7089, "step": 2 }, { "epoch": 0.02569593147751606, "grad_norm": 1.2914361953735352, "learning_rate": 1.5e-05, "loss": 1.6998, "step": 3 }, { "epoch": 0.034261241970021415, "grad_norm": 1.2189010381698608, "learning_rate": 2e-05, "loss": 1.6264, "step": 4 }, { "epoch": 0.042826552462526764, "grad_norm": 0.8527142405509949, "learning_rate": 2.5e-05, "loss": 1.6905, "step": 5 }, { "epoch": 0.05139186295503212, "grad_norm": 0.7591332197189331, "learning_rate": 3e-05, "loss": 1.6173, "step": 6 }, { "epoch": 0.059957173447537475, "grad_norm": 0.6710351705551147, "learning_rate": 3.5e-05, "loss": 1.6482, "step": 7 }, { "epoch": 0.06852248394004283, "grad_norm": 0.7942212224006653, "learning_rate": 4e-05, "loss": 1.5267, "step": 8 }, { "epoch": 0.07708779443254818, "grad_norm": 0.8262631893157959, "learning_rate": 4.5e-05, "loss": 1.6262, "step": 9 }, { "epoch": 0.08565310492505353, "grad_norm": 0.7496406435966492, "learning_rate": 5e-05, "loss": 1.5711, "step": 10 }, { "epoch": 0.09421841541755889, "grad_norm": 0.5732918977737427, "learning_rate": 5.500000000000001e-05, "loss": 1.3894, "step": 11 }, { "epoch": 0.10278372591006424, "grad_norm": 0.5378091931343079, "learning_rate": 6e-05, "loss": 1.4074, "step": 12 }, { "epoch": 0.11134903640256959, "grad_norm": 0.642239511013031, "learning_rate": 6.500000000000001e-05, "loss": 1.5217, "step": 13 }, { "epoch": 0.11991434689507495, "grad_norm": 0.7010293006896973, "learning_rate": 7e-05, "loss": 1.6286, "step": 14 }, { "epoch": 0.1284796573875803, "grad_norm": 0.5897760987281799, "learning_rate": 7.500000000000001e-05, "loss": 1.4918, "step": 15 }, { "epoch": 0.13704496788008566, "grad_norm": 0.628135621547699, "learning_rate": 8e-05, "loss": 1.4959, "step": 16 }, { "epoch": 0.145610278372591, "grad_norm": 0.6534596681594849, "learning_rate": 8.5e-05, "loss": 1.45, "step": 17 }, { "epoch": 0.15417558886509636, "grad_norm": 0.5244858860969543, "learning_rate": 9e-05, "loss": 1.524, "step": 18 }, { "epoch": 0.16274089935760172, "grad_norm": 0.4729230999946594, "learning_rate": 9.5e-05, "loss": 1.5277, "step": 19 }, { "epoch": 0.17130620985010706, "grad_norm": 0.5435839295387268, "learning_rate": 0.0001, "loss": 1.4584, "step": 20 }, { "epoch": 0.17987152034261242, "grad_norm": 0.565858006477356, "learning_rate": 9.997377845227576e-05, "loss": 1.472, "step": 21 }, { "epoch": 0.18843683083511778, "grad_norm": 0.593269944190979, "learning_rate": 9.989514131188559e-05, "loss": 1.5523, "step": 22 }, { "epoch": 0.19700214132762311, "grad_norm": 0.6731594800949097, "learning_rate": 9.97641710583307e-05, "loss": 1.5527, "step": 23 }, { "epoch": 0.20556745182012848, "grad_norm": 0.7709925174713135, "learning_rate": 9.958100506132127e-05, "loss": 1.6792, "step": 24 }, { "epoch": 0.21413276231263384, "grad_norm": 0.7717245221138, "learning_rate": 9.934583543669453e-05, "loss": 1.7106, "step": 25 }, { "epoch": 0.22269807280513917, "grad_norm": 0.865131139755249, "learning_rate": 9.905890884491195e-05, "loss": 1.726, "step": 26 }, { "epoch": 0.23126338329764454, "grad_norm": 0.7511882781982422, "learning_rate": 9.872052623234632e-05, "loss": 1.685, "step": 27 }, { "epoch": 0.2398286937901499, "grad_norm": 0.9146329760551453, "learning_rate": 9.833104251563056e-05, "loss": 1.5582, "step": 28 }, { "epoch": 0.24839400428265523, "grad_norm": 1.044276237487793, "learning_rate": 9.789086620939936e-05, "loss": 1.5964, "step": 29 }, { "epoch": 0.2569593147751606, "grad_norm": 0.4534660875797272, "learning_rate": 9.740045899781352e-05, "loss": 1.3756, "step": 30 }, { "epoch": 0.26552462526766596, "grad_norm": 0.4709625244140625, "learning_rate": 9.686033525031719e-05, "loss": 1.3499, "step": 31 }, { "epoch": 0.2740899357601713, "grad_norm": 0.4127805531024933, "learning_rate": 9.627106148213522e-05, "loss": 1.392, "step": 32 }, { "epoch": 0.2826552462526767, "grad_norm": 0.3448053002357483, "learning_rate": 9.563325576007701e-05, "loss": 1.3852, "step": 33 }, { "epoch": 0.291220556745182, "grad_norm": 0.3351711332798004, "learning_rate": 9.494758705426978e-05, "loss": 1.3387, "step": 34 }, { "epoch": 0.29978586723768735, "grad_norm": 0.3455820083618164, "learning_rate": 9.421477453650118e-05, "loss": 1.3788, "step": 35 }, { "epoch": 0.3083511777301927, "grad_norm": 0.3240755498409271, "learning_rate": 9.343558682590756e-05, "loss": 1.3604, "step": 36 }, { "epoch": 0.3169164882226981, "grad_norm": 0.352341890335083, "learning_rate": 9.261084118279847e-05, "loss": 1.3357, "step": 37 }, { "epoch": 0.32548179871520344, "grad_norm": 0.3270690143108368, "learning_rate": 9.174140265146356e-05, "loss": 1.3904, "step": 38 }, { "epoch": 0.3340471092077088, "grad_norm": 0.32343563437461853, "learning_rate": 9.082818315286055e-05, "loss": 1.3286, "step": 39 }, { "epoch": 0.3426124197002141, "grad_norm": 0.33070895075798035, "learning_rate": 8.987214052813604e-05, "loss": 1.2994, "step": 40 }, { "epoch": 0.3511777301927195, "grad_norm": 0.3631204664707184, "learning_rate": 8.887427753398248e-05, "loss": 1.3693, "step": 41 }, { "epoch": 0.35974304068522484, "grad_norm": 0.3394409120082855, "learning_rate": 8.783564079088477e-05, "loss": 1.332, "step": 42 }, { "epoch": 0.3683083511777302, "grad_norm": 0.34834015369415283, "learning_rate": 8.675731968536002e-05, "loss": 1.3462, "step": 43 }, { "epoch": 0.37687366167023556, "grad_norm": 0.37827223539352417, "learning_rate": 8.564044522734147e-05, "loss": 1.4134, "step": 44 }, { "epoch": 0.3854389721627409, "grad_norm": 0.36486443877220154, "learning_rate": 8.448618886390522e-05, "loss": 1.37, "step": 45 }, { "epoch": 0.39400428265524623, "grad_norm": 0.3832879662513733, "learning_rate": 8.329576125058406e-05, "loss": 1.3769, "step": 46 }, { "epoch": 0.4025695931477516, "grad_norm": 0.39366307854652405, "learning_rate": 8.2070410981557e-05, "loss": 1.277, "step": 47 }, { "epoch": 0.41113490364025695, "grad_norm": 0.4066513776779175, "learning_rate": 8.081142328004637e-05, "loss": 1.4122, "step": 48 }, { "epoch": 0.4197002141327623, "grad_norm": 0.4425739645957947, "learning_rate": 7.952011865029614e-05, "loss": 1.3783, "step": 49 }, { "epoch": 0.4282655246252677, "grad_norm": 0.5015028119087219, "learning_rate": 7.819785149254532e-05, "loss": 1.4114, "step": 50 }, { "epoch": 0.43683083511777304, "grad_norm": 0.5495409965515137, "learning_rate": 7.68460086824492e-05, "loss": 1.5046, "step": 51 }, { "epoch": 0.44539614561027835, "grad_norm": 0.5245218276977539, "learning_rate": 7.546600811643816e-05, "loss": 1.5037, "step": 52 }, { "epoch": 0.4539614561027837, "grad_norm": 0.583767831325531, "learning_rate": 7.405929722454026e-05, "loss": 1.3891, "step": 53 }, { "epoch": 0.4625267665952891, "grad_norm": 0.7021575570106506, "learning_rate": 7.262735145222696e-05, "loss": 1.7006, "step": 54 }, { "epoch": 0.47109207708779444, "grad_norm": 0.7322210073471069, "learning_rate": 7.117167271287453e-05, "loss": 1.5996, "step": 55 }, { "epoch": 0.4796573875802998, "grad_norm": 0.7390181422233582, "learning_rate": 6.969378781246436e-05, "loss": 1.5636, "step": 56 }, { "epoch": 0.48822269807280516, "grad_norm": 0.8508429527282715, "learning_rate": 6.819524684817438e-05, "loss": 1.5188, "step": 57 }, { "epoch": 0.49678800856531047, "grad_norm": 1.0224729776382446, "learning_rate": 6.667762158254104e-05, "loss": 1.4331, "step": 58 }, { "epoch": 0.5053533190578159, "grad_norm": 0.3958371579647064, "learning_rate": 6.514250379489753e-05, "loss": 1.2372, "step": 59 }, { "epoch": 0.5139186295503212, "grad_norm": 0.4225609302520752, "learning_rate": 6.359150361181715e-05, "loss": 1.2968, "step": 60 }, { "epoch": 0.5224839400428265, "grad_norm": 0.3793676793575287, "learning_rate": 6.202624781831268e-05, "loss": 1.345, "step": 61 }, { "epoch": 0.5310492505353319, "grad_norm": 0.3745555281639099, "learning_rate": 6.044837815156377e-05, "loss": 1.3348, "step": 62 }, { "epoch": 0.5396145610278372, "grad_norm": 0.3653203547000885, "learning_rate": 5.885954957896115e-05, "loss": 1.2985, "step": 63 }, { "epoch": 0.5481798715203426, "grad_norm": 0.33918502926826477, "learning_rate": 5.726142856227452e-05, "loss": 1.3243, "step": 64 }, { "epoch": 0.556745182012848, "grad_norm": 0.3646984100341797, "learning_rate": 5.565569130976422e-05, "loss": 1.3372, "step": 65 }, { "epoch": 0.5653104925053534, "grad_norm": 0.37293246388435364, "learning_rate": 5.4044022018070214e-05, "loss": 1.3093, "step": 66 }, { "epoch": 0.5738758029978587, "grad_norm": 0.3586772680282593, "learning_rate": 5.242811110572242e-05, "loss": 1.2544, "step": 67 }, { "epoch": 0.582441113490364, "grad_norm": 0.3929898142814636, "learning_rate": 5.080965344012508e-05, "loss": 1.3226, "step": 68 }, { "epoch": 0.5910064239828694, "grad_norm": 0.379663348197937, "learning_rate": 4.919034655987493e-05, "loss": 1.3065, "step": 69 }, { "epoch": 0.5995717344753747, "grad_norm": 0.36220523715019226, "learning_rate": 4.7571888894277604e-05, "loss": 1.2447, "step": 70 }, { "epoch": 0.6081370449678801, "grad_norm": 0.37926536798477173, "learning_rate": 4.59559779819298e-05, "loss": 1.3579, "step": 71 }, { "epoch": 0.6167023554603854, "grad_norm": 0.3654515743255615, "learning_rate": 4.434430869023579e-05, "loss": 1.3629, "step": 72 }, { "epoch": 0.6252676659528907, "grad_norm": 0.3808283507823944, "learning_rate": 4.27385714377255e-05, "loss": 1.432, "step": 73 }, { "epoch": 0.6338329764453962, "grad_norm": 0.38817018270492554, "learning_rate": 4.114045042103887e-05, "loss": 1.2995, "step": 74 }, { "epoch": 0.6423982869379015, "grad_norm": 0.3956097364425659, "learning_rate": 3.955162184843625e-05, "loss": 1.326, "step": 75 }, { "epoch": 0.6509635974304069, "grad_norm": 0.4163751006126404, "learning_rate": 3.7973752181687335e-05, "loss": 1.2521, "step": 76 }, { "epoch": 0.6595289079229122, "grad_norm": 0.47268781065940857, "learning_rate": 3.640849638818286e-05, "loss": 1.4913, "step": 77 }, { "epoch": 0.6680942184154176, "grad_norm": 0.4544057548046112, "learning_rate": 3.4857496205102474e-05, "loss": 1.3786, "step": 78 }, { "epoch": 0.6766595289079229, "grad_norm": 0.4968644380569458, "learning_rate": 3.332237841745898e-05, "loss": 1.341, "step": 79 }, { "epoch": 0.6852248394004282, "grad_norm": 0.5321404933929443, "learning_rate": 3.180475315182563e-05, "loss": 1.5097, "step": 80 }, { "epoch": 0.6937901498929336, "grad_norm": 0.5931894779205322, "learning_rate": 3.0306212187535653e-05, "loss": 1.4581, "step": 81 }, { "epoch": 0.702355460385439, "grad_norm": 0.658595621585846, "learning_rate": 2.882832728712551e-05, "loss": 1.4683, "step": 82 }, { "epoch": 0.7109207708779444, "grad_norm": 0.7256901860237122, "learning_rate": 2.737264854777306e-05, "loss": 1.5388, "step": 83 }, { "epoch": 0.7194860813704497, "grad_norm": 0.7952492237091064, "learning_rate": 2.5940702775459747e-05, "loss": 1.4592, "step": 84 }, { "epoch": 0.728051391862955, "grad_norm": 0.830305814743042, "learning_rate": 2.4533991883561868e-05, "loss": 1.3849, "step": 85 }, { "epoch": 0.7366167023554604, "grad_norm": 1.0240117311477661, "learning_rate": 2.315399131755081e-05, "loss": 1.6763, "step": 86 }, { "epoch": 0.7451820128479657, "grad_norm": 1.2047959566116333, "learning_rate": 2.180214850745467e-05, "loss": 1.5311, "step": 87 }, { "epoch": 0.7537473233404711, "grad_norm": 0.25514495372772217, "learning_rate": 2.0479881349703883e-05, "loss": 1.1414, "step": 88 }, { "epoch": 0.7623126338329764, "grad_norm": 0.2713034749031067, "learning_rate": 1.9188576719953633e-05, "loss": 1.2483, "step": 89 }, { "epoch": 0.7708779443254818, "grad_norm": 0.28236207365989685, "learning_rate": 1.7929589018443016e-05, "loss": 1.3151, "step": 90 }, { "epoch": 0.7794432548179872, "grad_norm": 0.2980254590511322, "learning_rate": 1.6704238749415957e-05, "loss": 1.2465, "step": 91 }, { "epoch": 0.7880085653104925, "grad_norm": 0.30458417534828186, "learning_rate": 1.5513811136094787e-05, "loss": 1.3183, "step": 92 }, { "epoch": 0.7965738758029979, "grad_norm": 0.31403467059135437, "learning_rate": 1.4359554772658552e-05, "loss": 1.3259, "step": 93 }, { "epoch": 0.8051391862955032, "grad_norm": 0.32990753650665283, "learning_rate": 1.3242680314639993e-05, "loss": 1.3187, "step": 94 }, { "epoch": 0.8137044967880086, "grad_norm": 0.3176243007183075, "learning_rate": 1.2164359209115234e-05, "loss": 1.3707, "step": 95 }, { "epoch": 0.8222698072805139, "grad_norm": 0.3447314500808716, "learning_rate": 1.1125722466017547e-05, "loss": 1.3448, "step": 96 }, { "epoch": 0.8308351177730193, "grad_norm": 0.3323560953140259, "learning_rate": 1.012785947186397e-05, "loss": 1.3207, "step": 97 }, { "epoch": 0.8394004282655246, "grad_norm": 0.34290653467178345, "learning_rate": 9.171816847139448e-06, "loss": 1.2039, "step": 98 }, { "epoch": 0.8479657387580299, "grad_norm": 0.3590240478515625, "learning_rate": 8.25859734853645e-06, "loss": 1.2172, "step": 99 }, { "epoch": 0.8565310492505354, "grad_norm": 0.369677871465683, "learning_rate": 7.389158817201542e-06, "loss": 1.2644, "step": 100 }, { "epoch": 0.8565310492505354, "eval_loss": 1.3446539640426636, "eval_runtime": 15.2285, "eval_samples_per_second": 12.936, "eval_steps_per_second": 3.283, "step": 100 }, { "epoch": 0.8650963597430407, "grad_norm": 0.39375630021095276, "learning_rate": 6.564413174092443e-06, "loss": 1.3082, "step": 101 }, { "epoch": 0.8736616702355461, "grad_norm": 0.37335363030433655, "learning_rate": 5.785225463498828e-06, "loss": 1.3074, "step": 102 }, { "epoch": 0.8822269807280514, "grad_norm": 0.39959633350372314, "learning_rate": 5.05241294573024e-06, "loss": 1.2991, "step": 103 }, { "epoch": 0.8907922912205567, "grad_norm": 0.4094507694244385, "learning_rate": 4.366744239922998e-06, "loss": 1.2843, "step": 104 }, { "epoch": 0.8993576017130621, "grad_norm": 0.4303068518638611, "learning_rate": 3.728938517864794e-06, "loss": 1.3151, "step": 105 }, { "epoch": 0.9079229122055674, "grad_norm": 0.42622363567352295, "learning_rate": 3.1396647496828247e-06, "loss": 1.3469, "step": 106 }, { "epoch": 0.9164882226980728, "grad_norm": 0.4673132002353668, "learning_rate": 2.5995410021864787e-06, "loss": 1.4081, "step": 107 }, { "epoch": 0.9250535331905781, "grad_norm": 0.5070969462394714, "learning_rate": 2.1091337906006482e-06, "loss": 1.4305, "step": 108 }, { "epoch": 0.9336188436830836, "grad_norm": 0.526165246963501, "learning_rate": 1.6689574843694433e-06, "loss": 1.4545, "step": 109 }, { "epoch": 0.9421841541755889, "grad_norm": 0.5437924265861511, "learning_rate": 1.2794737676536994e-06, "loss": 1.4216, "step": 110 }, { "epoch": 0.9507494646680942, "grad_norm": 0.6112513542175293, "learning_rate": 9.410911550880475e-07, "loss": 1.5846, "step": 111 }, { "epoch": 0.9593147751605996, "grad_norm": 0.6393526792526245, "learning_rate": 6.54164563305465e-07, "loss": 1.2972, "step": 112 }, { "epoch": 0.9678800856531049, "grad_norm": 0.6852487921714783, "learning_rate": 4.189949386787462e-07, "loss": 1.3823, "step": 113 }, { "epoch": 0.9764453961456103, "grad_norm": 0.7834839224815369, "learning_rate": 2.3582894166930268e-07, "loss": 1.4955, "step": 114 }, { "epoch": 0.9850107066381156, "grad_norm": 0.9401648044586182, "learning_rate": 1.0485868811441757e-07, "loss": 1.515, "step": 115 }, { "epoch": 0.9935760171306209, "grad_norm": 1.1517846584320068, "learning_rate": 2.6221547724253337e-08, "loss": 1.4529, "step": 116 }, { "epoch": 1.0021413276231264, "grad_norm": 0.49016863107681274, "learning_rate": 0.0, "loss": 1.788, "step": 117 } ], "logging_steps": 1, "max_steps": 117, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.5869640165870797e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }