{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9991532599491956, "eval_steps": 500, "global_step": 295, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01693480101608806, "grad_norm": 11.07681941986084, "learning_rate": 3.3333333333333333e-06, "loss": 1.8357, "step": 5 }, { "epoch": 0.03386960203217612, "grad_norm": 4.368485450744629, "learning_rate": 6.666666666666667e-06, "loss": 1.6208, "step": 10 }, { "epoch": 0.05080440304826418, "grad_norm": 2.597416877746582, "learning_rate": 1e-05, "loss": 1.2437, "step": 15 }, { "epoch": 0.06773920406435224, "grad_norm": 2.159755229949951, "learning_rate": 1.3333333333333333e-05, "loss": 0.9804, "step": 20 }, { "epoch": 0.0846740050804403, "grad_norm": 1.7200169563293457, "learning_rate": 1.6666666666666667e-05, "loss": 0.8607, "step": 25 }, { "epoch": 0.10160880609652836, "grad_norm": 1.2486546039581299, "learning_rate": 2e-05, "loss": 0.7869, "step": 30 }, { "epoch": 0.11854360711261643, "grad_norm": 1.2447178363800049, "learning_rate": 1.9982437317643218e-05, "loss": 0.7324, "step": 35 }, { "epoch": 0.1354784081287045, "grad_norm": 1.129758358001709, "learning_rate": 1.992981096013517e-05, "loss": 0.7058, "step": 40 }, { "epoch": 0.15241320914479256, "grad_norm": 0.9256328344345093, "learning_rate": 1.984230577947597e-05, "loss": 0.6845, "step": 45 }, { "epoch": 0.1693480101608806, "grad_norm": 0.9811558723449707, "learning_rate": 1.972022914080411e-05, "loss": 0.6698, "step": 50 }, { "epoch": 0.18628281117696868, "grad_norm": 1.0069135427474976, "learning_rate": 1.9564009842765225e-05, "loss": 0.6393, "step": 55 }, { "epoch": 0.20321761219305673, "grad_norm": 0.9143001437187195, "learning_rate": 1.9374196611341212e-05, "loss": 0.6321, "step": 60 }, { "epoch": 0.2201524132091448, "grad_norm": 0.9945001006126404, "learning_rate": 1.9151456172430186e-05, "loss": 0.6286, "step": 65 }, { "epoch": 0.23708721422523285, "grad_norm": 0.9755650758743286, "learning_rate": 1.8896570909947477e-05, "loss": 0.6178, "step": 70 }, { "epoch": 0.2540220152413209, "grad_norm": 0.9144276976585388, "learning_rate": 1.8610436117673557e-05, "loss": 0.6204, "step": 75 }, { "epoch": 0.270956816257409, "grad_norm": 0.9252317547798157, "learning_rate": 1.829405685450202e-05, "loss": 0.6124, "step": 80 }, { "epoch": 0.28789161727349705, "grad_norm": 0.7833272218704224, "learning_rate": 1.7948544414133534e-05, "loss": 0.5958, "step": 85 }, { "epoch": 0.3048264182895851, "grad_norm": 0.8833147883415222, "learning_rate": 1.7575112421616203e-05, "loss": 0.6049, "step": 90 }, { "epoch": 0.32176121930567314, "grad_norm": 0.8253676295280457, "learning_rate": 1.717507257044331e-05, "loss": 0.5905, "step": 95 }, { "epoch": 0.3386960203217612, "grad_norm": 0.800396740436554, "learning_rate": 1.6749830015182106e-05, "loss": 0.5698, "step": 100 }, { "epoch": 0.3556308213378493, "grad_norm": 0.9177567362785339, "learning_rate": 1.6300878435817115e-05, "loss": 0.5787, "step": 105 }, { "epoch": 0.37256562235393736, "grad_norm": 0.8931817412376404, "learning_rate": 1.5829794791144723e-05, "loss": 0.5748, "step": 110 }, { "epoch": 0.3895004233700254, "grad_norm": 1.0776097774505615, "learning_rate": 1.533823377964791e-05, "loss": 0.5693, "step": 115 }, { "epoch": 0.40643522438611346, "grad_norm": 0.9571148157119751, "learning_rate": 1.482792202730745e-05, "loss": 0.5795, "step": 120 }, { "epoch": 0.42337002540220153, "grad_norm": 0.9276224970817566, "learning_rate": 1.4300652022765207e-05, "loss": 0.5744, "step": 125 }, { "epoch": 0.4403048264182896, "grad_norm": 0.8562670350074768, "learning_rate": 1.3758275821142382e-05, "loss": 0.574, "step": 130 }, { "epoch": 0.4572396274343776, "grad_norm": 0.987328827381134, "learning_rate": 1.3202698538628376e-05, "loss": 0.5612, "step": 135 }, { "epoch": 0.4741744284504657, "grad_norm": 0.7609542012214661, "learning_rate": 1.2635871660690677e-05, "loss": 0.5608, "step": 140 }, { "epoch": 0.4911092294665538, "grad_norm": 0.752984881401062, "learning_rate": 1.2059786187410984e-05, "loss": 0.5462, "step": 145 }, { "epoch": 0.5080440304826418, "grad_norm": 0.7577420473098755, "learning_rate": 1.1476465640024814e-05, "loss": 0.5565, "step": 150 }, { "epoch": 0.5249788314987299, "grad_norm": 0.7515721321105957, "learning_rate": 1.0887958953229349e-05, "loss": 0.5475, "step": 155 }, { "epoch": 0.541913632514818, "grad_norm": 0.7427639961242676, "learning_rate": 1.0296333278225599e-05, "loss": 0.5561, "step": 160 }, { "epoch": 0.558848433530906, "grad_norm": 0.8810573816299438, "learning_rate": 9.703666721774403e-06, "loss": 0.5391, "step": 165 }, { "epoch": 0.5757832345469941, "grad_norm": 0.7623007297515869, "learning_rate": 9.112041046770653e-06, "loss": 0.5504, "step": 170 }, { "epoch": 0.5927180355630821, "grad_norm": 0.7749871015548706, "learning_rate": 8.52353435997519e-06, "loss": 0.5442, "step": 175 }, { "epoch": 0.6096528365791702, "grad_norm": 0.731037974357605, "learning_rate": 7.940213812589018e-06, "loss": 0.5504, "step": 180 }, { "epoch": 0.6265876375952583, "grad_norm": 0.6718469262123108, "learning_rate": 7.364128339309326e-06, "loss": 0.5461, "step": 185 }, { "epoch": 0.6435224386113463, "grad_norm": 0.7169979214668274, "learning_rate": 6.797301461371626e-06, "loss": 0.5387, "step": 190 }, { "epoch": 0.6604572396274344, "grad_norm": 0.6952066421508789, "learning_rate": 6.241724178857621e-06, "loss": 0.5391, "step": 195 }, { "epoch": 0.6773920406435224, "grad_norm": 0.7324413657188416, "learning_rate": 5.699347977234799e-06, "loss": 0.5351, "step": 200 }, { "epoch": 0.6943268416596104, "grad_norm": 0.7053231596946716, "learning_rate": 5.172077972692553e-06, "loss": 0.5393, "step": 205 }, { "epoch": 0.7112616426756986, "grad_norm": 0.6486250758171082, "learning_rate": 4.661766220352098e-06, "loss": 0.5311, "step": 210 }, { "epoch": 0.7281964436917866, "grad_norm": 0.6176902055740356, "learning_rate": 4.170205208855281e-06, "loss": 0.5342, "step": 215 }, { "epoch": 0.7451312447078747, "grad_norm": 0.6902855634689331, "learning_rate": 3.6991215641828903e-06, "loss": 0.526, "step": 220 }, { "epoch": 0.7620660457239627, "grad_norm": 0.6924082636833191, "learning_rate": 3.250169984817897e-06, "loss": 0.5393, "step": 225 }, { "epoch": 0.7790008467400508, "grad_norm": 0.6927950978279114, "learning_rate": 2.8249274295566863e-06, "loss": 0.5372, "step": 230 }, { "epoch": 0.7959356477561389, "grad_norm": 0.6544324159622192, "learning_rate": 2.424887578383799e-06, "loss": 0.53, "step": 235 }, { "epoch": 0.8128704487722269, "grad_norm": 0.6218433976173401, "learning_rate": 2.0514555858664663e-06, "loss": 0.5383, "step": 240 }, { "epoch": 0.8298052497883149, "grad_norm": 0.6293333172798157, "learning_rate": 1.7059431454979825e-06, "loss": 0.5242, "step": 245 }, { "epoch": 0.8467400508044031, "grad_norm": 0.6629937291145325, "learning_rate": 1.3895638823264447e-06, "loss": 0.5283, "step": 250 }, { "epoch": 0.8636748518204911, "grad_norm": 0.6638931632041931, "learning_rate": 1.1034290900525279e-06, "loss": 0.5346, "step": 255 }, { "epoch": 0.8806096528365792, "grad_norm": 0.6154753565788269, "learning_rate": 8.485438275698154e-07, "loss": 0.5286, "step": 260 }, { "epoch": 0.8975444538526672, "grad_norm": 0.5576867461204529, "learning_rate": 6.258033886587911e-07, "loss": 0.5197, "step": 265 }, { "epoch": 0.9144792548687553, "grad_norm": 0.5925081372261047, "learning_rate": 4.359901572347758e-07, "loss": 0.5272, "step": 270 }, { "epoch": 0.9314140558848434, "grad_norm": 0.8894816040992737, "learning_rate": 2.7977085919589253e-07, "loss": 0.5229, "step": 275 }, { "epoch": 0.9483488569009314, "grad_norm": 0.613675057888031, "learning_rate": 1.5769422052403172e-07, "loss": 0.5336, "step": 280 }, { "epoch": 0.9652836579170194, "grad_norm": 0.5702030658721924, "learning_rate": 7.018903986483083e-08, "loss": 0.5295, "step": 285 }, { "epoch": 0.9822184589331076, "grad_norm": 0.6007622480392456, "learning_rate": 1.7562682356786488e-08, "loss": 0.5357, "step": 290 }, { "epoch": 0.9991532599491956, "grad_norm": 0.5643032193183899, "learning_rate": 0.0, "loss": 0.5246, "step": 295 }, { "epoch": 0.9991532599491956, "step": 295, "total_flos": 8.380405739893555e+17, "train_loss": 0.6345516301817813, "train_runtime": 3216.2267, "train_samples_per_second": 11.745, "train_steps_per_second": 0.092 } ], "logging_steps": 5, "max_steps": 295, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.380405739893555e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }