Spaces:
Running
Running
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 1.0183299389002036, | |
"eval_steps": 500, | |
"global_step": 500, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.020366598778004074, | |
"grad_norm": 0.12626299262046814, | |
"learning_rate": 0.00013333333333333334, | |
"loss": 1.7006, | |
"mean_token_accuracy": 0.6053715996444226, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.04073319755600815, | |
"grad_norm": 0.2351449579000473, | |
"learning_rate": 0.00019994755690455152, | |
"loss": 1.5214, | |
"mean_token_accuracy": 0.6368309155106544, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.06109979633401222, | |
"grad_norm": 0.32846614718437195, | |
"learning_rate": 0.0001995283421166614, | |
"loss": 1.5662, | |
"mean_token_accuracy": 0.6361105382442475, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.0814663951120163, | |
"grad_norm": 0.24607931077480316, | |
"learning_rate": 0.00019869167087338907, | |
"loss": 1.4452, | |
"mean_token_accuracy": 0.6645107805728913, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.10183299389002037, | |
"grad_norm": 0.21988731622695923, | |
"learning_rate": 0.00019744105246469263, | |
"loss": 1.3939, | |
"mean_token_accuracy": 0.6644507594406605, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.12219959266802444, | |
"grad_norm": 0.18629378080368042, | |
"learning_rate": 0.00019578173241879872, | |
"loss": 1.3307, | |
"mean_token_accuracy": 0.671517875790596, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.1425661914460285, | |
"grad_norm": 0.19982416927814484, | |
"learning_rate": 0.00019372067050063438, | |
"loss": 1.3089, | |
"mean_token_accuracy": 0.6763716876506806, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.1629327902240326, | |
"grad_norm": 0.1744842529296875, | |
"learning_rate": 0.00019126651152015403, | |
"loss": 1.3482, | |
"mean_token_accuracy": 0.6714572340250016, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.18329938900203666, | |
"grad_norm": 0.22721126675605774, | |
"learning_rate": 0.00018842954907300236, | |
"loss": 1.4167, | |
"mean_token_accuracy": 0.6588860973715782, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.20366598778004075, | |
"grad_norm": 0.17914065718650818, | |
"learning_rate": 0.00018522168236559695, | |
"loss": 1.3608, | |
"mean_token_accuracy": 0.6660649880766869, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.2240325865580448, | |
"grad_norm": 0.1730283796787262, | |
"learning_rate": 0.0001816563663057211, | |
"loss": 1.4223, | |
"mean_token_accuracy": 0.6599490791559219, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.24439918533604887, | |
"grad_norm": 0.17649827897548676, | |
"learning_rate": 0.00017774855506796496, | |
"loss": 1.3737, | |
"mean_token_accuracy": 0.6688225455582142, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.26476578411405294, | |
"grad_norm": 0.17069195210933685, | |
"learning_rate": 0.00017351463937072004, | |
"loss": 1.3767, | |
"mean_token_accuracy": 0.6619200393557548, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.285132382892057, | |
"grad_norm": 0.1913885921239853, | |
"learning_rate": 0.00016897237772781044, | |
"loss": 1.4375, | |
"mean_token_accuracy": 0.6498532116413116, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.3054989816700611, | |
"grad_norm": 0.17547893524169922, | |
"learning_rate": 0.000164140821963114, | |
"loss": 1.3268, | |
"mean_token_accuracy": 0.6731642320752144, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.3258655804480652, | |
"grad_norm": 0.16026997566223145, | |
"learning_rate": 0.00015904023730059228, | |
"loss": 1.4003, | |
"mean_token_accuracy": 0.663200007379055, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.34623217922606925, | |
"grad_norm": 0.15250708162784576, | |
"learning_rate": 0.0001536920173648984, | |
"loss": 1.2856, | |
"mean_token_accuracy": 0.6764356568455696, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.3665987780040733, | |
"grad_norm": 0.13922956585884094, | |
"learning_rate": 0.00014811859444908052, | |
"loss": 1.3469, | |
"mean_token_accuracy": 0.6688723161816597, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.3869653767820774, | |
"grad_norm": 0.16509701311588287, | |
"learning_rate": 0.00014234334542574906, | |
"loss": 1.3352, | |
"mean_token_accuracy": 0.6676128759980202, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.4073319755600815, | |
"grad_norm": 0.14618448913097382, | |
"learning_rate": 0.00013639049369634876, | |
"loss": 1.3593, | |
"mean_token_accuracy": 0.6654890060424805, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.42769857433808556, | |
"grad_norm": 0.16653190553188324, | |
"learning_rate": 0.00013028500758979506, | |
"loss": 1.328, | |
"mean_token_accuracy": 0.6699673473834992, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.4480651731160896, | |
"grad_norm": 0.146384596824646, | |
"learning_rate": 0.00012405249563662537, | |
"loss": 1.2914, | |
"mean_token_accuracy": 0.6747352227568626, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.4684317718940937, | |
"grad_norm": 0.16215957701206207, | |
"learning_rate": 0.0001177190991579223, | |
"loss": 1.3342, | |
"mean_token_accuracy": 0.6690372809767723, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.48879837067209775, | |
"grad_norm": 0.17549686133861542, | |
"learning_rate": 0.00011131138261952845, | |
"loss": 1.2852, | |
"mean_token_accuracy": 0.6783039927482605, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.5091649694501018, | |
"grad_norm": 0.14437216520309448, | |
"learning_rate": 0.00010485622221144484, | |
"loss": 1.3264, | |
"mean_token_accuracy": 0.6815439119935036, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.5295315682281059, | |
"grad_norm": 0.139692023396492, | |
"learning_rate": 9.838069311974986e-05, | |
"loss": 1.3195, | |
"mean_token_accuracy": 0.6713656410574913, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.5498981670061099, | |
"grad_norm": 0.16701075434684753, | |
"learning_rate": 9.19119559638596e-05, | |
"loss": 1.3618, | |
"mean_token_accuracy": 0.6655304417014122, | |
"step": 270 | |
}, | |
{ | |
"epoch": 0.570264765784114, | |
"grad_norm": 0.15681137144565582, | |
"learning_rate": 8.5477142875451e-05, | |
"loss": 1.3549, | |
"mean_token_accuracy": 0.6620298072695732, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.5906313645621182, | |
"grad_norm": 0.1511278599500656, | |
"learning_rate": 7.91032436968725e-05, | |
"loss": 1.3159, | |
"mean_token_accuracy": 0.6778326541185379, | |
"step": 290 | |
}, | |
{ | |
"epoch": 0.6109979633401222, | |
"grad_norm": 0.16071146726608276, | |
"learning_rate": 7.281699277636572e-05, | |
"loss": 1.3977, | |
"mean_token_accuracy": 0.6576820626854897, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.6313645621181263, | |
"grad_norm": 0.1634049415588379, | |
"learning_rate": 6.664475683491796e-05, | |
"loss": 1.3118, | |
"mean_token_accuracy": 0.6826698362827301, | |
"step": 310 | |
}, | |
{ | |
"epoch": 0.6517311608961304, | |
"grad_norm": 0.15096673369407654, | |
"learning_rate": 6.061242437507131e-05, | |
"loss": 1.3492, | |
"mean_token_accuracy": 0.6674635127186775, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.6720977596741344, | |
"grad_norm": 0.16971102356910706, | |
"learning_rate": 5.474529709554612e-05, | |
"loss": 1.4399, | |
"mean_token_accuracy": 0.6552789464592934, | |
"step": 330 | |
}, | |
{ | |
"epoch": 0.6924643584521385, | |
"grad_norm": 0.14999577403068542, | |
"learning_rate": 4.9067983767123736e-05, | |
"loss": 1.4486, | |
"mean_token_accuracy": 0.6491607405245304, | |
"step": 340 | |
}, | |
{ | |
"epoch": 0.7128309572301426, | |
"grad_norm": 0.16720150411128998, | |
"learning_rate": 4.360429701490934e-05, | |
"loss": 1.3931, | |
"mean_token_accuracy": 0.6607658788561821, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.7331975560081466, | |
"grad_norm": 0.138802170753479, | |
"learning_rate": 3.8377153439907266e-05, | |
"loss": 1.2131, | |
"mean_token_accuracy": 0.6939047828316689, | |
"step": 360 | |
}, | |
{ | |
"epoch": 0.7535641547861507, | |
"grad_norm": 0.18726502358913422, | |
"learning_rate": 3.340847749883191e-05, | |
"loss": 1.2142, | |
"mean_token_accuracy": 0.6881774321198464, | |
"step": 370 | |
}, | |
{ | |
"epoch": 0.7739307535641547, | |
"grad_norm": 0.190961092710495, | |
"learning_rate": 2.8719109545317103e-05, | |
"loss": 1.3967, | |
"mean_token_accuracy": 0.6626075744628906, | |
"step": 380 | |
}, | |
{ | |
"epoch": 0.7942973523421588, | |
"grad_norm": 0.14274843037128448, | |
"learning_rate": 2.432871841823047e-05, | |
"loss": 1.3339, | |
"mean_token_accuracy": 0.671070359647274, | |
"step": 390 | |
}, | |
{ | |
"epoch": 0.814663951120163, | |
"grad_norm": 0.1557278335094452, | |
"learning_rate": 2.025571894372794e-05, | |
"loss": 1.296, | |
"mean_token_accuracy": 0.6788829267024994, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.835030549898167, | |
"grad_norm": 0.17218472063541412, | |
"learning_rate": 1.65171946970729e-05, | |
"loss": 1.2488, | |
"mean_token_accuracy": 0.6864925757050514, | |
"step": 410 | |
}, | |
{ | |
"epoch": 0.8553971486761711, | |
"grad_norm": 0.14736278355121613, | |
"learning_rate": 1.3128826348184887e-05, | |
"loss": 1.3984, | |
"mean_token_accuracy": 0.6586415357887745, | |
"step": 420 | |
}, | |
{ | |
"epoch": 0.8757637474541752, | |
"grad_norm": 0.16238394379615784, | |
"learning_rate": 1.010482589146048e-05, | |
"loss": 1.2756, | |
"mean_token_accuracy": 0.6823231220245362, | |
"step": 430 | |
}, | |
{ | |
"epoch": 0.8961303462321792, | |
"grad_norm": 0.1799972802400589, | |
"learning_rate": 7.457877035729588e-06, | |
"loss": 1.3597, | |
"mean_token_accuracy": 0.6700396433472633, | |
"step": 440 | |
}, | |
{ | |
"epoch": 0.9164969450101833, | |
"grad_norm": 0.18641585111618042, | |
"learning_rate": 5.199082004372957e-06, | |
"loss": 1.2676, | |
"mean_token_accuracy": 0.6873666003346444, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.9368635437881874, | |
"grad_norm": 0.15163969993591309, | |
"learning_rate": 3.3379149687388867e-06, | |
"loss": 1.3899, | |
"mean_token_accuracy": 0.6645716562867164, | |
"step": 460 | |
}, | |
{ | |
"epoch": 0.9572301425661914, | |
"grad_norm": 0.15963105857372284, | |
"learning_rate": 1.882182310176095e-06, | |
"loss": 1.342, | |
"mean_token_accuracy": 0.6667100310325622, | |
"step": 470 | |
}, | |
{ | |
"epoch": 0.9775967413441955, | |
"grad_norm": 0.14211098849773407, | |
"learning_rate": 8.379898773574924e-07, | |
"loss": 1.3966, | |
"mean_token_accuracy": 0.6606692716479301, | |
"step": 480 | |
}, | |
{ | |
"epoch": 0.9979633401221996, | |
"grad_norm": 0.16811959445476532, | |
"learning_rate": 2.0971737622883515e-07, | |
"loss": 1.4444, | |
"mean_token_accuracy": 0.655492453277111, | |
"step": 490 | |
}, | |
{ | |
"epoch": 1.0183299389002036, | |
"grad_norm": 0.17285843193531036, | |
"learning_rate": 0.0, | |
"loss": 1.3099, | |
"mean_token_accuracy": 0.6747938022017479, | |
"step": 500 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 500, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 2, | |
"save_steps": 10, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": true | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 6.528555810816e+16, | |
"train_batch_size": 4, | |
"trial_name": null, | |
"trial_params": null | |
} | |