cutelemonlili's picture
Add files using upload-large-folder tool
04a19dd verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 200,
"global_step": 268,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.007462686567164179,
"grad_norm": 0.02919401967076395,
"learning_rate": 3.7037037037037037e-06,
"loss": 0.1054,
"step": 1
},
{
"epoch": 0.014925373134328358,
"grad_norm": 0.020838068877362836,
"learning_rate": 7.4074074074074075e-06,
"loss": 0.0753,
"step": 2
},
{
"epoch": 0.022388059701492536,
"grad_norm": 0.01937615418470836,
"learning_rate": 1.1111111111111112e-05,
"loss": 0.0831,
"step": 3
},
{
"epoch": 0.029850746268656716,
"grad_norm": 0.028469822431763606,
"learning_rate": 1.4814814814814815e-05,
"loss": 0.0989,
"step": 4
},
{
"epoch": 0.03731343283582089,
"grad_norm": 0.03395939106878981,
"learning_rate": 1.8518518518518518e-05,
"loss": 0.103,
"step": 5
},
{
"epoch": 0.04477611940298507,
"grad_norm": 0.041383149287986205,
"learning_rate": 2.2222222222222223e-05,
"loss": 0.1343,
"step": 6
},
{
"epoch": 0.05223880597014925,
"grad_norm": 0.0413395483383645,
"learning_rate": 2.5925925925925925e-05,
"loss": 0.1273,
"step": 7
},
{
"epoch": 0.05970149253731343,
"grad_norm": 0.024480164341057986,
"learning_rate": 2.962962962962963e-05,
"loss": 0.0813,
"step": 8
},
{
"epoch": 0.06716417910447761,
"grad_norm": 0.033946084199447085,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.0887,
"step": 9
},
{
"epoch": 0.07462686567164178,
"grad_norm": 0.03513746660341717,
"learning_rate": 3.7037037037037037e-05,
"loss": 0.082,
"step": 10
},
{
"epoch": 0.08208955223880597,
"grad_norm": 0.025282860695458678,
"learning_rate": 4.074074074074074e-05,
"loss": 0.0686,
"step": 11
},
{
"epoch": 0.08955223880597014,
"grad_norm": 0.03350566986793766,
"learning_rate": 4.4444444444444447e-05,
"loss": 0.072,
"step": 12
},
{
"epoch": 0.09701492537313433,
"grad_norm": 0.05550069293455211,
"learning_rate": 4.814814814814815e-05,
"loss": 0.1,
"step": 13
},
{
"epoch": 0.1044776119402985,
"grad_norm": 0.03956999409254062,
"learning_rate": 5.185185185185185e-05,
"loss": 0.0842,
"step": 14
},
{
"epoch": 0.11194029850746269,
"grad_norm": 0.05135762708917783,
"learning_rate": 5.555555555555556e-05,
"loss": 0.0866,
"step": 15
},
{
"epoch": 0.11940298507462686,
"grad_norm": 0.05114207718083794,
"learning_rate": 5.925925925925926e-05,
"loss": 0.0755,
"step": 16
},
{
"epoch": 0.12686567164179105,
"grad_norm": 0.033864855495790755,
"learning_rate": 6.296296296296296e-05,
"loss": 0.0642,
"step": 17
},
{
"epoch": 0.13432835820895522,
"grad_norm": 0.031422023139512284,
"learning_rate": 6.666666666666667e-05,
"loss": 0.0547,
"step": 18
},
{
"epoch": 0.1417910447761194,
"grad_norm": 0.03313447893394791,
"learning_rate": 7.037037037037038e-05,
"loss": 0.0695,
"step": 19
},
{
"epoch": 0.14925373134328357,
"grad_norm": 0.02448036621370803,
"learning_rate": 7.407407407407407e-05,
"loss": 0.0582,
"step": 20
},
{
"epoch": 0.15671641791044777,
"grad_norm": 0.025763892833299634,
"learning_rate": 7.777777777777778e-05,
"loss": 0.0543,
"step": 21
},
{
"epoch": 0.16417910447761194,
"grad_norm": 0.027745981321721364,
"learning_rate": 8.148148148148148e-05,
"loss": 0.0711,
"step": 22
},
{
"epoch": 0.17164179104477612,
"grad_norm": 0.042796285583299835,
"learning_rate": 8.518518518518518e-05,
"loss": 0.069,
"step": 23
},
{
"epoch": 0.1791044776119403,
"grad_norm": 0.03557712335612332,
"learning_rate": 8.888888888888889e-05,
"loss": 0.0579,
"step": 24
},
{
"epoch": 0.1865671641791045,
"grad_norm": 0.044133668448392234,
"learning_rate": 9.25925925925926e-05,
"loss": 0.0573,
"step": 25
},
{
"epoch": 0.19402985074626866,
"grad_norm": 0.04917021976502218,
"learning_rate": 9.62962962962963e-05,
"loss": 0.0606,
"step": 26
},
{
"epoch": 0.20149253731343283,
"grad_norm": 0.039476881840879816,
"learning_rate": 0.0001,
"loss": 0.0591,
"step": 27
},
{
"epoch": 0.208955223880597,
"grad_norm": 0.046858172620967635,
"learning_rate": 9.999575185316994e-05,
"loss": 0.059,
"step": 28
},
{
"epoch": 0.21641791044776118,
"grad_norm": 0.05376514163226526,
"learning_rate": 9.998300813454982e-05,
"loss": 0.0676,
"step": 29
},
{
"epoch": 0.22388059701492538,
"grad_norm": 0.047620783901977486,
"learning_rate": 9.996177100962714e-05,
"loss": 0.0739,
"step": 30
},
{
"epoch": 0.23134328358208955,
"grad_norm": 0.06404757715770105,
"learning_rate": 9.99320440871389e-05,
"loss": 0.081,
"step": 31
},
{
"epoch": 0.23880597014925373,
"grad_norm": 0.032370232949459114,
"learning_rate": 9.989383241845838e-05,
"loss": 0.0631,
"step": 32
},
{
"epoch": 0.2462686567164179,
"grad_norm": 0.02198221870558641,
"learning_rate": 9.984714249673675e-05,
"loss": 0.0468,
"step": 33
},
{
"epoch": 0.2537313432835821,
"grad_norm": 0.029148788282928297,
"learning_rate": 9.979198225579968e-05,
"loss": 0.0634,
"step": 34
},
{
"epoch": 0.26119402985074625,
"grad_norm": 0.047984517615355146,
"learning_rate": 9.972836106879935e-05,
"loss": 0.0822,
"step": 35
},
{
"epoch": 0.26865671641791045,
"grad_norm": 0.025256795499786325,
"learning_rate": 9.965628974662144e-05,
"loss": 0.0497,
"step": 36
},
{
"epoch": 0.27611940298507465,
"grad_norm": 0.041039829048038795,
"learning_rate": 9.957578053604837e-05,
"loss": 0.0537,
"step": 37
},
{
"epoch": 0.2835820895522388,
"grad_norm": 0.022712174218454447,
"learning_rate": 9.9486847117678e-05,
"loss": 0.0529,
"step": 38
},
{
"epoch": 0.291044776119403,
"grad_norm": 0.027820439299072,
"learning_rate": 9.938950460359913e-05,
"loss": 0.0512,
"step": 39
},
{
"epoch": 0.29850746268656714,
"grad_norm": 0.029625704751798147,
"learning_rate": 9.928376953482343e-05,
"loss": 0.0526,
"step": 40
},
{
"epoch": 0.30597014925373134,
"grad_norm": 0.035649943358832585,
"learning_rate": 9.916965987847485e-05,
"loss": 0.0612,
"step": 41
},
{
"epoch": 0.31343283582089554,
"grad_norm": 0.03718438599924447,
"learning_rate": 9.904719502473634e-05,
"loss": 0.0699,
"step": 42
},
{
"epoch": 0.3208955223880597,
"grad_norm": 0.030176691889828987,
"learning_rate": 9.891639578355511e-05,
"loss": 0.0641,
"step": 43
},
{
"epoch": 0.3283582089552239,
"grad_norm": 0.031071278949314794,
"learning_rate": 9.877728438110645e-05,
"loss": 0.0562,
"step": 44
},
{
"epoch": 0.3358208955223881,
"grad_norm": 0.04495281452755722,
"learning_rate": 9.862988445601688e-05,
"loss": 0.0625,
"step": 45
},
{
"epoch": 0.34328358208955223,
"grad_norm": 0.03063389414608373,
"learning_rate": 9.847422105534739e-05,
"loss": 0.0725,
"step": 46
},
{
"epoch": 0.35074626865671643,
"grad_norm": 0.036502084694794205,
"learning_rate": 9.831032063033726e-05,
"loss": 0.0682,
"step": 47
},
{
"epoch": 0.3582089552238806,
"grad_norm": 0.035498747609547306,
"learning_rate": 9.813821103190932e-05,
"loss": 0.0528,
"step": 48
},
{
"epoch": 0.3656716417910448,
"grad_norm": 0.04409053680205051,
"learning_rate": 9.795792150593739e-05,
"loss": 0.0624,
"step": 49
},
{
"epoch": 0.373134328358209,
"grad_norm": 0.030682616113562263,
"learning_rate": 9.776948268827659e-05,
"loss": 0.0575,
"step": 50
},
{
"epoch": 0.3805970149253731,
"grad_norm": 0.030470506614129134,
"learning_rate": 9.757292659955755e-05,
"loss": 0.0591,
"step": 51
},
{
"epoch": 0.3880597014925373,
"grad_norm": 0.02671947240839377,
"learning_rate": 9.736828663974527e-05,
"loss": 0.0583,
"step": 52
},
{
"epoch": 0.39552238805970147,
"grad_norm": 0.031037639144256054,
"learning_rate": 9.715559758246363e-05,
"loss": 0.0518,
"step": 53
},
{
"epoch": 0.40298507462686567,
"grad_norm": 0.02841501008727202,
"learning_rate": 9.693489556908641e-05,
"loss": 0.0584,
"step": 54
},
{
"epoch": 0.41044776119402987,
"grad_norm": 0.04837015226261885,
"learning_rate": 9.670621810259595e-05,
"loss": 0.0727,
"step": 55
},
{
"epoch": 0.417910447761194,
"grad_norm": 0.03250895479571609,
"learning_rate": 9.646960404121042e-05,
"loss": 0.0518,
"step": 56
},
{
"epoch": 0.4253731343283582,
"grad_norm": 0.031411258255519776,
"learning_rate": 9.62250935917808e-05,
"loss": 0.0614,
"step": 57
},
{
"epoch": 0.43283582089552236,
"grad_norm": 0.059967490499311084,
"learning_rate": 9.597272830295876e-05,
"loss": 0.0726,
"step": 58
},
{
"epoch": 0.44029850746268656,
"grad_norm": 0.04086918510114368,
"learning_rate": 9.571255105813632e-05,
"loss": 0.0608,
"step": 59
},
{
"epoch": 0.44776119402985076,
"grad_norm": 0.039609700799485066,
"learning_rate": 9.5444606068159e-05,
"loss": 0.07,
"step": 60
},
{
"epoch": 0.4552238805970149,
"grad_norm": 0.03186573999267819,
"learning_rate": 9.516893886381323e-05,
"loss": 0.072,
"step": 61
},
{
"epoch": 0.4626865671641791,
"grad_norm": 0.03326197860069246,
"learning_rate": 9.488559628808939e-05,
"loss": 0.0647,
"step": 62
},
{
"epoch": 0.4701492537313433,
"grad_norm": 0.03167948302339881,
"learning_rate": 9.459462648822208e-05,
"loss": 0.0616,
"step": 63
},
{
"epoch": 0.47761194029850745,
"grad_norm": 0.02603734725183324,
"learning_rate": 9.429607890750863e-05,
"loss": 0.0541,
"step": 64
},
{
"epoch": 0.48507462686567165,
"grad_norm": 0.047753395728305725,
"learning_rate": 9.399000427690735e-05,
"loss": 0.056,
"step": 65
},
{
"epoch": 0.4925373134328358,
"grad_norm": 0.0319902663818862,
"learning_rate": 9.367645460641716e-05,
"loss": 0.0562,
"step": 66
},
{
"epoch": 0.5,
"grad_norm": 0.10592144833550245,
"learning_rate": 9.335548317623957e-05,
"loss": 0.0703,
"step": 67
},
{
"epoch": 0.5074626865671642,
"grad_norm": 0.04415289013333453,
"learning_rate": 9.302714452772516e-05,
"loss": 0.0578,
"step": 68
},
{
"epoch": 0.5149253731343284,
"grad_norm": 0.05922882437474166,
"learning_rate": 9.269149445410545e-05,
"loss": 0.0602,
"step": 69
},
{
"epoch": 0.5223880597014925,
"grad_norm": 0.04087608521226697,
"learning_rate": 9.234858999101231e-05,
"loss": 0.0493,
"step": 70
},
{
"epoch": 0.5298507462686567,
"grad_norm": 0.06407150308129361,
"learning_rate": 9.199848940678606e-05,
"loss": 0.0602,
"step": 71
},
{
"epoch": 0.5373134328358209,
"grad_norm": 0.03286157656507991,
"learning_rate": 9.164125219257418e-05,
"loss": 0.0528,
"step": 72
},
{
"epoch": 0.5447761194029851,
"grad_norm": 0.045437734403213616,
"learning_rate": 9.127693905222224e-05,
"loss": 0.0608,
"step": 73
},
{
"epoch": 0.5522388059701493,
"grad_norm": 0.03573058005192053,
"learning_rate": 9.09056118919587e-05,
"loss": 0.061,
"step": 74
},
{
"epoch": 0.5597014925373134,
"grad_norm": 0.04555803646336276,
"learning_rate": 9.052733380987554e-05,
"loss": 0.0616,
"step": 75
},
{
"epoch": 0.5671641791044776,
"grad_norm": 0.04474047330665906,
"learning_rate": 9.014216908520618e-05,
"loss": 0.0627,
"step": 76
},
{
"epoch": 0.5746268656716418,
"grad_norm": 0.040211174281131454,
"learning_rate": 8.975018316740278e-05,
"loss": 0.0459,
"step": 77
},
{
"epoch": 0.582089552238806,
"grad_norm": 0.03261428866046916,
"learning_rate": 8.935144266501469e-05,
"loss": 0.0624,
"step": 78
},
{
"epoch": 0.5895522388059702,
"grad_norm": 0.029388880489185733,
"learning_rate": 8.894601533436999e-05,
"loss": 0.0572,
"step": 79
},
{
"epoch": 0.5970149253731343,
"grad_norm": 0.028886106980896584,
"learning_rate": 8.853397006806182e-05,
"loss": 0.0651,
"step": 80
},
{
"epoch": 0.6044776119402985,
"grad_norm": 0.04454335383255878,
"learning_rate": 8.811537688324188e-05,
"loss": 0.0592,
"step": 81
},
{
"epoch": 0.6119402985074627,
"grad_norm": 0.0471135611588739,
"learning_rate": 8.769030690972262e-05,
"loss": 0.0697,
"step": 82
},
{
"epoch": 0.6194029850746269,
"grad_norm": 0.0296623365867784,
"learning_rate": 8.725883237789045e-05,
"loss": 0.0469,
"step": 83
},
{
"epoch": 0.6268656716417911,
"grad_norm": 0.03167282878695001,
"learning_rate": 8.682102660643197e-05,
"loss": 0.0547,
"step": 84
},
{
"epoch": 0.6343283582089553,
"grad_norm": 0.04865018192575572,
"learning_rate": 8.637696398987516e-05,
"loss": 0.0716,
"step": 85
},
{
"epoch": 0.6417910447761194,
"grad_norm": 0.028735120219778615,
"learning_rate": 8.592671998594794e-05,
"loss": 0.0572,
"step": 86
},
{
"epoch": 0.6492537313432836,
"grad_norm": 0.026050366630294017,
"learning_rate": 8.547037110275579e-05,
"loss": 0.0481,
"step": 87
},
{
"epoch": 0.6567164179104478,
"grad_norm": 0.04073494985026619,
"learning_rate": 8.50079948857812e-05,
"loss": 0.0661,
"step": 88
},
{
"epoch": 0.664179104477612,
"grad_norm": 0.033495112935702676,
"learning_rate": 8.453966990470656e-05,
"loss": 0.0623,
"step": 89
},
{
"epoch": 0.6716417910447762,
"grad_norm": 0.030497361040509492,
"learning_rate": 8.406547574006325e-05,
"loss": 0.0612,
"step": 90
},
{
"epoch": 0.6791044776119403,
"grad_norm": 0.04298436163572994,
"learning_rate": 8.358549296970876e-05,
"loss": 0.0569,
"step": 91
},
{
"epoch": 0.6865671641791045,
"grad_norm": 0.028361321641782122,
"learning_rate": 8.309980315513444e-05,
"loss": 0.0585,
"step": 92
},
{
"epoch": 0.6940298507462687,
"grad_norm": 0.03803075573155342,
"learning_rate": 8.260848882760615e-05,
"loss": 0.0571,
"step": 93
},
{
"epoch": 0.7014925373134329,
"grad_norm": 0.05072647730034115,
"learning_rate": 8.211163347414003e-05,
"loss": 0.0635,
"step": 94
},
{
"epoch": 0.7089552238805971,
"grad_norm": 0.028524701829953716,
"learning_rate": 8.160932152331586e-05,
"loss": 0.0507,
"step": 95
},
{
"epoch": 0.7164179104477612,
"grad_norm": 0.03163367256659563,
"learning_rate": 8.11016383309305e-05,
"loss": 0.0641,
"step": 96
},
{
"epoch": 0.7238805970149254,
"grad_norm": 0.03317124309154403,
"learning_rate": 8.058867016549372e-05,
"loss": 0.0488,
"step": 97
},
{
"epoch": 0.7313432835820896,
"grad_norm": 0.03740803806211238,
"learning_rate": 8.007050419356899e-05,
"loss": 0.0611,
"step": 98
},
{
"epoch": 0.7388059701492538,
"grad_norm": 0.02706261492437245,
"learning_rate": 7.95472284649615e-05,
"loss": 0.0443,
"step": 99
},
{
"epoch": 0.746268656716418,
"grad_norm": 0.027321548744590845,
"learning_rate": 7.90189318977564e-05,
"loss": 0.0499,
"step": 100
},
{
"epoch": 0.753731343283582,
"grad_norm": 0.03331256964229156,
"learning_rate": 7.848570426320917e-05,
"loss": 0.0574,
"step": 101
},
{
"epoch": 0.7611940298507462,
"grad_norm": 0.04488781019829851,
"learning_rate": 7.794763617049124e-05,
"loss": 0.0675,
"step": 102
},
{
"epoch": 0.7686567164179104,
"grad_norm": 0.03589965410994236,
"learning_rate": 7.740481905129306e-05,
"loss": 0.0704,
"step": 103
},
{
"epoch": 0.7761194029850746,
"grad_norm": 0.038503509262491756,
"learning_rate": 7.685734514428766e-05,
"loss": 0.0648,
"step": 104
},
{
"epoch": 0.7835820895522388,
"grad_norm": 0.02427146713864325,
"learning_rate": 7.630530747945673e-05,
"loss": 0.0451,
"step": 105
},
{
"epoch": 0.7910447761194029,
"grad_norm": 0.03683952347349413,
"learning_rate": 7.574879986228245e-05,
"loss": 0.0595,
"step": 106
},
{
"epoch": 0.7985074626865671,
"grad_norm": 0.04856315142914648,
"learning_rate": 7.518791685780768e-05,
"loss": 0.0554,
"step": 107
},
{
"epoch": 0.8059701492537313,
"grad_norm": 0.0366517612862182,
"learning_rate": 7.46227537745667e-05,
"loss": 0.0649,
"step": 108
},
{
"epoch": 0.8134328358208955,
"grad_norm": 0.06587252219576946,
"learning_rate": 7.405340664838993e-05,
"loss": 0.0724,
"step": 109
},
{
"epoch": 0.8208955223880597,
"grad_norm": 0.031328118637405186,
"learning_rate": 7.347997222608492e-05,
"loss": 0.0532,
"step": 110
},
{
"epoch": 0.8283582089552238,
"grad_norm": 0.027882967444547536,
"learning_rate": 7.290254794899664e-05,
"loss": 0.0478,
"step": 111
},
{
"epoch": 0.835820895522388,
"grad_norm": 0.039566219868688,
"learning_rate": 7.232123193644957e-05,
"loss": 0.0584,
"step": 112
},
{
"epoch": 0.8432835820895522,
"grad_norm": 0.0315149053030866,
"learning_rate": 7.173612296907472e-05,
"loss": 0.0506,
"step": 113
},
{
"epoch": 0.8507462686567164,
"grad_norm": 0.03184981504463341,
"learning_rate": 7.114732047202433e-05,
"loss": 0.0432,
"step": 114
},
{
"epoch": 0.8582089552238806,
"grad_norm": 0.03272549516697388,
"learning_rate": 7.055492449807684e-05,
"loss": 0.0552,
"step": 115
},
{
"epoch": 0.8656716417910447,
"grad_norm": 0.03189649842931783,
"learning_rate": 6.99590357106354e-05,
"loss": 0.0518,
"step": 116
},
{
"epoch": 0.8731343283582089,
"grad_norm": 0.030147363978434292,
"learning_rate": 6.935975536662253e-05,
"loss": 0.0525,
"step": 117
},
{
"epoch": 0.8805970149253731,
"grad_norm": 0.040517602492255646,
"learning_rate": 6.875718529927405e-05,
"loss": 0.0669,
"step": 118
},
{
"epoch": 0.8880597014925373,
"grad_norm": 0.05316845870182898,
"learning_rate": 6.815142790083472e-05,
"loss": 0.0628,
"step": 119
},
{
"epoch": 0.8955223880597015,
"grad_norm": 0.034819746793725306,
"learning_rate": 6.75425861051595e-05,
"loss": 0.0555,
"step": 120
},
{
"epoch": 0.9029850746268657,
"grad_norm": 0.030100164091976505,
"learning_rate": 6.693076337022211e-05,
"loss": 0.05,
"step": 121
},
{
"epoch": 0.9104477611940298,
"grad_norm": 0.0314797643475597,
"learning_rate": 6.631606366053506e-05,
"loss": 0.0578,
"step": 122
},
{
"epoch": 0.917910447761194,
"grad_norm": 0.029223545243950332,
"learning_rate": 6.569859142948328e-05,
"loss": 0.0476,
"step": 123
},
{
"epoch": 0.9253731343283582,
"grad_norm": 0.0331348198460615,
"learning_rate": 6.507845160157477e-05,
"loss": 0.0632,
"step": 124
},
{
"epoch": 0.9328358208955224,
"grad_norm": 0.031079975615423625,
"learning_rate": 6.445574955461134e-05,
"loss": 0.0498,
"step": 125
},
{
"epoch": 0.9402985074626866,
"grad_norm": 0.035932362771267295,
"learning_rate": 6.383059110178204e-05,
"loss": 0.0649,
"step": 126
},
{
"epoch": 0.9477611940298507,
"grad_norm": 0.026660107576390996,
"learning_rate": 6.320308247368286e-05,
"loss": 0.0429,
"step": 127
},
{
"epoch": 0.9552238805970149,
"grad_norm": 0.042220621034671266,
"learning_rate": 6.257333030026538e-05,
"loss": 0.0756,
"step": 128
},
{
"epoch": 0.9626865671641791,
"grad_norm": 0.031042783082721563,
"learning_rate": 6.194144159271756e-05,
"loss": 0.0644,
"step": 129
},
{
"epoch": 0.9701492537313433,
"grad_norm": 0.028923034995146566,
"learning_rate": 6.130752372527982e-05,
"loss": 0.0535,
"step": 130
},
{
"epoch": 0.9776119402985075,
"grad_norm": 0.027560166142966646,
"learning_rate": 6.0671684416999273e-05,
"loss": 0.0515,
"step": 131
},
{
"epoch": 0.9850746268656716,
"grad_norm": 0.026132856653996697,
"learning_rate": 6.003403171342563e-05,
"loss": 0.054,
"step": 132
},
{
"epoch": 0.9925373134328358,
"grad_norm": 0.06392037144081057,
"learning_rate": 5.939467396825137e-05,
"loss": 0.06,
"step": 133
},
{
"epoch": 1.0,
"grad_norm": 0.042679994559313705,
"learning_rate": 5.875371982489959e-05,
"loss": 0.0619,
"step": 134
},
{
"epoch": 1.007462686567164,
"grad_norm": 0.026673005745909233,
"learning_rate": 5.811127819806277e-05,
"loss": 0.0459,
"step": 135
},
{
"epoch": 1.0149253731343284,
"grad_norm": 0.029023621087789386,
"learning_rate": 5.7467458255195384e-05,
"loss": 0.0446,
"step": 136
},
{
"epoch": 1.0223880597014925,
"grad_norm": 0.029071701785167842,
"learning_rate": 5.682236939796337e-05,
"loss": 0.0462,
"step": 137
},
{
"epoch": 1.0298507462686568,
"grad_norm": 0.031121046523713853,
"learning_rate": 5.61761212436541e-05,
"loss": 0.0537,
"step": 138
},
{
"epoch": 1.037313432835821,
"grad_norm": 0.03221700721378129,
"learning_rate": 5.55288236065495e-05,
"loss": 0.0548,
"step": 139
},
{
"epoch": 1.044776119402985,
"grad_norm": 0.03151438885910822,
"learning_rate": 5.488058647926577e-05,
"loss": 0.0534,
"step": 140
},
{
"epoch": 1.0522388059701493,
"grad_norm": 0.029920771304069553,
"learning_rate": 5.423152001406282e-05,
"loss": 0.0465,
"step": 141
},
{
"epoch": 1.0597014925373134,
"grad_norm": 0.02988182757789321,
"learning_rate": 5.3581734504126494e-05,
"loss": 0.0427,
"step": 142
},
{
"epoch": 1.0671641791044777,
"grad_norm": 0.04146879785852393,
"learning_rate": 5.293134036482698e-05,
"loss": 0.0663,
"step": 143
},
{
"epoch": 1.0746268656716418,
"grad_norm": 0.039152659337203376,
"learning_rate": 5.2280448114956316e-05,
"loss": 0.0426,
"step": 144
},
{
"epoch": 1.0820895522388059,
"grad_norm": 0.036826130066773974,
"learning_rate": 5.1629168357948435e-05,
"loss": 0.0554,
"step": 145
},
{
"epoch": 1.0895522388059702,
"grad_norm": 0.03349602680235328,
"learning_rate": 5.097761176308471e-05,
"loss": 0.0463,
"step": 146
},
{
"epoch": 1.0970149253731343,
"grad_norm": 0.03863625091563246,
"learning_rate": 5.032588904668851e-05,
"loss": 0.05,
"step": 147
},
{
"epoch": 1.1044776119402986,
"grad_norm": 0.03414480378687678,
"learning_rate": 4.967411095331149e-05,
"loss": 0.0532,
"step": 148
},
{
"epoch": 1.1119402985074627,
"grad_norm": 0.03458114819717078,
"learning_rate": 4.90223882369153e-05,
"loss": 0.0509,
"step": 149
},
{
"epoch": 1.1194029850746268,
"grad_norm": 0.03408662760824781,
"learning_rate": 4.837083164205159e-05,
"loss": 0.0483,
"step": 150
},
{
"epoch": 1.126865671641791,
"grad_norm": 0.0420831349151817,
"learning_rate": 4.771955188504371e-05,
"loss": 0.06,
"step": 151
},
{
"epoch": 1.1343283582089552,
"grad_norm": 0.03479713775750584,
"learning_rate": 4.7068659635173026e-05,
"loss": 0.0544,
"step": 152
},
{
"epoch": 1.1417910447761195,
"grad_norm": 0.04008915214172921,
"learning_rate": 4.641826549587352e-05,
"loss": 0.0515,
"step": 153
},
{
"epoch": 1.1492537313432836,
"grad_norm": 0.02486863707158603,
"learning_rate": 4.57684799859372e-05,
"loss": 0.0357,
"step": 154
},
{
"epoch": 1.1567164179104479,
"grad_norm": 0.047042070292214286,
"learning_rate": 4.511941352073424e-05,
"loss": 0.0572,
"step": 155
},
{
"epoch": 1.164179104477612,
"grad_norm": 0.03758648119450133,
"learning_rate": 4.447117639345052e-05,
"loss": 0.051,
"step": 156
},
{
"epoch": 1.171641791044776,
"grad_norm": 0.03672648821870239,
"learning_rate": 4.382387875634591e-05,
"loss": 0.056,
"step": 157
},
{
"epoch": 1.1791044776119404,
"grad_norm": 0.031421123448233466,
"learning_rate": 4.317763060203664e-05,
"loss": 0.0406,
"step": 158
},
{
"epoch": 1.1865671641791045,
"grad_norm": 0.03920340761152944,
"learning_rate": 4.253254174480462e-05,
"loss": 0.0457,
"step": 159
},
{
"epoch": 1.1940298507462686,
"grad_norm": 0.03183854980282451,
"learning_rate": 4.188872180193723e-05,
"loss": 0.0416,
"step": 160
},
{
"epoch": 1.2014925373134329,
"grad_norm": 0.04378327235602219,
"learning_rate": 4.124628017510043e-05,
"loss": 0.0571,
"step": 161
},
{
"epoch": 1.208955223880597,
"grad_norm": 0.039625445906029455,
"learning_rate": 4.0605326031748645e-05,
"loss": 0.0441,
"step": 162
},
{
"epoch": 1.2164179104477613,
"grad_norm": 0.03812779985968061,
"learning_rate": 3.9965968286574376e-05,
"loss": 0.0456,
"step": 163
},
{
"epoch": 1.2238805970149254,
"grad_norm": 0.03587300189954543,
"learning_rate": 3.932831558300074e-05,
"loss": 0.0573,
"step": 164
},
{
"epoch": 1.2313432835820897,
"grad_norm": 0.032411868193150284,
"learning_rate": 3.869247627472021e-05,
"loss": 0.0378,
"step": 165
},
{
"epoch": 1.2388059701492538,
"grad_norm": 0.03454002672639094,
"learning_rate": 3.8058558407282464e-05,
"loss": 0.0472,
"step": 166
},
{
"epoch": 1.2462686567164178,
"grad_norm": 0.04911967144007234,
"learning_rate": 3.742666969973463e-05,
"loss": 0.0487,
"step": 167
},
{
"epoch": 1.2537313432835822,
"grad_norm": 0.044265339009080897,
"learning_rate": 3.6796917526317156e-05,
"loss": 0.0588,
"step": 168
},
{
"epoch": 1.2611940298507462,
"grad_norm": 0.03747737389183713,
"learning_rate": 3.616940889821797e-05,
"loss": 0.0574,
"step": 169
},
{
"epoch": 1.2686567164179103,
"grad_norm": 0.03511814609607568,
"learning_rate": 3.5544250445388675e-05,
"loss": 0.0393,
"step": 170
},
{
"epoch": 1.2761194029850746,
"grad_norm": 0.04698205222474845,
"learning_rate": 3.492154839842525e-05,
"loss": 0.0442,
"step": 171
},
{
"epoch": 1.2835820895522387,
"grad_norm": 0.03454787270419316,
"learning_rate": 3.430140857051675e-05,
"loss": 0.0454,
"step": 172
},
{
"epoch": 1.291044776119403,
"grad_norm": 0.05089940274324606,
"learning_rate": 3.368393633946496e-05,
"loss": 0.0538,
"step": 173
},
{
"epoch": 1.2985074626865671,
"grad_norm": 0.037051061355777494,
"learning_rate": 3.306923662977789e-05,
"loss": 0.0486,
"step": 174
},
{
"epoch": 1.3059701492537314,
"grad_norm": 0.04009215418213975,
"learning_rate": 3.245741389484052e-05,
"loss": 0.0491,
"step": 175
},
{
"epoch": 1.3134328358208955,
"grad_norm": 0.03345150363311112,
"learning_rate": 3.184857209916528e-05,
"loss": 0.0389,
"step": 176
},
{
"epoch": 1.3208955223880596,
"grad_norm": 0.048558141019666455,
"learning_rate": 3.124281470072597e-05,
"loss": 0.0618,
"step": 177
},
{
"epoch": 1.328358208955224,
"grad_norm": 0.04663730712313748,
"learning_rate": 3.064024463337747e-05,
"loss": 0.0489,
"step": 178
},
{
"epoch": 1.335820895522388,
"grad_norm": 0.03522439433466753,
"learning_rate": 3.0040964289364616e-05,
"loss": 0.0382,
"step": 179
},
{
"epoch": 1.3432835820895521,
"grad_norm": 0.04483417143212535,
"learning_rate": 2.944507550192318e-05,
"loss": 0.0661,
"step": 180
},
{
"epoch": 1.3507462686567164,
"grad_norm": 0.037625578707262686,
"learning_rate": 2.885267952797569e-05,
"loss": 0.0392,
"step": 181
},
{
"epoch": 1.3582089552238805,
"grad_norm": 0.04068557239382607,
"learning_rate": 2.8263877030925277e-05,
"loss": 0.0542,
"step": 182
},
{
"epoch": 1.3656716417910448,
"grad_norm": 0.0389079661184576,
"learning_rate": 2.7678768063550452e-05,
"loss": 0.0562,
"step": 183
},
{
"epoch": 1.373134328358209,
"grad_norm": 0.04888440207831169,
"learning_rate": 2.7097452051003375e-05,
"loss": 0.0546,
"step": 184
},
{
"epoch": 1.3805970149253732,
"grad_norm": 0.05062117973516137,
"learning_rate": 2.6520027773915075e-05,
"loss": 0.0346,
"step": 185
},
{
"epoch": 1.3880597014925373,
"grad_norm": 0.03839649084491081,
"learning_rate": 2.5946593351610082e-05,
"loss": 0.043,
"step": 186
},
{
"epoch": 1.3955223880597014,
"grad_norm": 0.03894797485622364,
"learning_rate": 2.5377246225433303e-05,
"loss": 0.0499,
"step": 187
},
{
"epoch": 1.4029850746268657,
"grad_norm": 0.03574246701240581,
"learning_rate": 2.4812083142192328e-05,
"loss": 0.0427,
"step": 188
},
{
"epoch": 1.4104477611940298,
"grad_norm": 0.035928493587213936,
"learning_rate": 2.4251200137717544e-05,
"loss": 0.0495,
"step": 189
},
{
"epoch": 1.417910447761194,
"grad_norm": 0.0561432642757027,
"learning_rate": 2.3694692520543295e-05,
"loss": 0.0437,
"step": 190
},
{
"epoch": 1.4253731343283582,
"grad_norm": 0.029967493597351468,
"learning_rate": 2.3142654855712354e-05,
"loss": 0.0359,
"step": 191
},
{
"epoch": 1.4328358208955223,
"grad_norm": 0.030336483710158894,
"learning_rate": 2.259518094870693e-05,
"loss": 0.0333,
"step": 192
},
{
"epoch": 1.4402985074626866,
"grad_norm": 0.04356827813942889,
"learning_rate": 2.2052363829508775e-05,
"loss": 0.0508,
"step": 193
},
{
"epoch": 1.4477611940298507,
"grad_norm": 0.03897503968612337,
"learning_rate": 2.151429573679084e-05,
"loss": 0.0484,
"step": 194
},
{
"epoch": 1.455223880597015,
"grad_norm": 0.04667243646797709,
"learning_rate": 2.0981068102243616e-05,
"loss": 0.0471,
"step": 195
},
{
"epoch": 1.462686567164179,
"grad_norm": 0.039721557688575104,
"learning_rate": 2.0452771535038518e-05,
"loss": 0.0546,
"step": 196
},
{
"epoch": 1.4701492537313432,
"grad_norm": 0.03703957811531601,
"learning_rate": 1.9929495806431025e-05,
"loss": 0.0417,
"step": 197
},
{
"epoch": 1.4776119402985075,
"grad_norm": 0.05652478416127825,
"learning_rate": 1.9411329834506286e-05,
"loss": 0.0494,
"step": 198
},
{
"epoch": 1.4850746268656716,
"grad_norm": 0.03617159966848812,
"learning_rate": 1.8898361669069497e-05,
"loss": 0.046,
"step": 199
},
{
"epoch": 1.4925373134328357,
"grad_norm": 0.037324198259541815,
"learning_rate": 1.8390678476684142e-05,
"loss": 0.0436,
"step": 200
},
{
"epoch": 1.4925373134328357,
"eval_loss": 0.0473957434296608,
"eval_runtime": 6.4787,
"eval_samples_per_second": 0.926,
"eval_steps_per_second": 0.309,
"step": 200
},
{
"epoch": 1.5,
"grad_norm": 0.042848477030181235,
"learning_rate": 1.7888366525859968e-05,
"loss": 0.0513,
"step": 201
},
{
"epoch": 1.5074626865671643,
"grad_norm": 0.04133178844515671,
"learning_rate": 1.739151117239385e-05,
"loss": 0.0434,
"step": 202
},
{
"epoch": 1.5149253731343284,
"grad_norm": 0.0470387983073566,
"learning_rate": 1.6900196844865573e-05,
"loss": 0.049,
"step": 203
},
{
"epoch": 1.5223880597014925,
"grad_norm": 0.04143101813950498,
"learning_rate": 1.641450703029125e-05,
"loss": 0.0505,
"step": 204
},
{
"epoch": 1.5298507462686568,
"grad_norm": 0.03907054079716859,
"learning_rate": 1.5934524259936756e-05,
"loss": 0.0448,
"step": 205
},
{
"epoch": 1.537313432835821,
"grad_norm": 0.0360128545432656,
"learning_rate": 1.5460330095293447e-05,
"loss": 0.0448,
"step": 206
},
{
"epoch": 1.544776119402985,
"grad_norm": 0.04127920465748009,
"learning_rate": 1.4992005114218805e-05,
"loss": 0.0472,
"step": 207
},
{
"epoch": 1.5522388059701493,
"grad_norm": 0.037629999831237014,
"learning_rate": 1.4529628897244212e-05,
"loss": 0.045,
"step": 208
},
{
"epoch": 1.5597014925373134,
"grad_norm": 0.04441048097377383,
"learning_rate": 1.4073280014052077e-05,
"loss": 0.0479,
"step": 209
},
{
"epoch": 1.5671641791044775,
"grad_norm": 0.03871964970576081,
"learning_rate": 1.3623036010124846e-05,
"loss": 0.0397,
"step": 210
},
{
"epoch": 1.5746268656716418,
"grad_norm": 0.04375110246238586,
"learning_rate": 1.3178973393568057e-05,
"loss": 0.051,
"step": 211
},
{
"epoch": 1.582089552238806,
"grad_norm": 0.04933919962951339,
"learning_rate": 1.2741167622109556e-05,
"loss": 0.0426,
"step": 212
},
{
"epoch": 1.5895522388059702,
"grad_norm": 0.03861540416331047,
"learning_rate": 1.230969309027739e-05,
"loss": 0.0461,
"step": 213
},
{
"epoch": 1.5970149253731343,
"grad_norm": 0.04244105016684205,
"learning_rate": 1.1884623116758121e-05,
"loss": 0.0521,
"step": 214
},
{
"epoch": 1.6044776119402986,
"grad_norm": 0.046525033488129953,
"learning_rate": 1.1466029931938182e-05,
"loss": 0.0547,
"step": 215
},
{
"epoch": 1.6119402985074627,
"grad_norm": 0.05356919684133014,
"learning_rate": 1.1053984665630024e-05,
"loss": 0.0423,
"step": 216
},
{
"epoch": 1.6194029850746268,
"grad_norm": 0.04764484850078174,
"learning_rate": 1.0648557334985309e-05,
"loss": 0.0578,
"step": 217
},
{
"epoch": 1.626865671641791,
"grad_norm": 0.04353330012930423,
"learning_rate": 1.024981683259723e-05,
"loss": 0.0508,
"step": 218
},
{
"epoch": 1.6343283582089554,
"grad_norm": 0.0439439630643127,
"learning_rate": 9.857830914793826e-06,
"loss": 0.0503,
"step": 219
},
{
"epoch": 1.6417910447761193,
"grad_norm": 0.0431244279753579,
"learning_rate": 9.472666190124457e-06,
"loss": 0.0438,
"step": 220
},
{
"epoch": 1.6492537313432836,
"grad_norm": 0.06281467675130384,
"learning_rate": 9.094388108041302e-06,
"loss": 0.0525,
"step": 221
},
{
"epoch": 1.6567164179104479,
"grad_norm": 0.03911370572303081,
"learning_rate": 8.723060947777777e-06,
"loss": 0.0468,
"step": 222
},
{
"epoch": 1.664179104477612,
"grad_norm": 0.04237425485741587,
"learning_rate": 8.358747807425826e-06,
"loss": 0.0488,
"step": 223
},
{
"epoch": 1.671641791044776,
"grad_norm": 0.04205074962767055,
"learning_rate": 8.001510593213946e-06,
"loss": 0.0499,
"step": 224
},
{
"epoch": 1.6791044776119404,
"grad_norm": 0.038935088692434276,
"learning_rate": 7.651410008987697e-06,
"loss": 0.0505,
"step": 225
},
{
"epoch": 1.6865671641791045,
"grad_norm": 0.036013941908150396,
"learning_rate": 7.308505545894567e-06,
"loss": 0.0423,
"step": 226
},
{
"epoch": 1.6940298507462686,
"grad_norm": 0.04324030148308444,
"learning_rate": 6.972855472274853e-06,
"loss": 0.0501,
"step": 227
},
{
"epoch": 1.7014925373134329,
"grad_norm": 0.03960682986150278,
"learning_rate": 6.6445168237604385e-06,
"loss": 0.0548,
"step": 228
},
{
"epoch": 1.7089552238805972,
"grad_norm": 0.040566743727832796,
"learning_rate": 6.323545393582847e-06,
"loss": 0.0485,
"step": 229
},
{
"epoch": 1.716417910447761,
"grad_norm": 0.04146343378333961,
"learning_rate": 6.009995723092654e-06,
"loss": 0.0495,
"step": 230
},
{
"epoch": 1.7238805970149254,
"grad_norm": 0.05743102370183059,
"learning_rate": 5.703921092491393e-06,
"loss": 0.0482,
"step": 231
},
{
"epoch": 1.7313432835820897,
"grad_norm": 0.04399341838703738,
"learning_rate": 5.405373511777939e-06,
"loss": 0.0579,
"step": 232
},
{
"epoch": 1.7388059701492538,
"grad_norm": 0.042551807139630436,
"learning_rate": 5.114403711910632e-06,
"loss": 0.0524,
"step": 233
},
{
"epoch": 1.7462686567164178,
"grad_norm": 0.05244930515335262,
"learning_rate": 4.8310611361867875e-06,
"loss": 0.0536,
"step": 234
},
{
"epoch": 1.7537313432835822,
"grad_norm": 0.037991763265651006,
"learning_rate": 4.555393931841001e-06,
"loss": 0.0438,
"step": 235
},
{
"epoch": 1.7611940298507462,
"grad_norm": 0.03923496935939972,
"learning_rate": 4.287448941863692e-06,
"loss": 0.0441,
"step": 236
},
{
"epoch": 1.7686567164179103,
"grad_norm": 0.04213613231764651,
"learning_rate": 4.027271697041252e-06,
"loss": 0.0451,
"step": 237
},
{
"epoch": 1.7761194029850746,
"grad_norm": 0.04391085681016651,
"learning_rate": 3.7749064082191977e-06,
"loss": 0.0391,
"step": 238
},
{
"epoch": 1.783582089552239,
"grad_norm": 0.040751284452250174,
"learning_rate": 3.5303959587895898e-06,
"loss": 0.0473,
"step": 239
},
{
"epoch": 1.7910447761194028,
"grad_norm": 0.044746440629601196,
"learning_rate": 3.2937818974040635e-06,
"loss": 0.0596,
"step": 240
},
{
"epoch": 1.7985074626865671,
"grad_norm": 0.04365755886772311,
"learning_rate": 3.065104430913601e-06,
"loss": 0.0535,
"step": 241
},
{
"epoch": 1.8059701492537314,
"grad_norm": 0.04282275262011111,
"learning_rate": 2.844402417536374e-06,
"loss": 0.0504,
"step": 242
},
{
"epoch": 1.8134328358208955,
"grad_norm": 0.040175140129488814,
"learning_rate": 2.631713360254734e-06,
"loss": 0.0512,
"step": 243
},
{
"epoch": 1.8208955223880596,
"grad_norm": 0.038332926215459244,
"learning_rate": 2.4270734004424643e-06,
"loss": 0.0395,
"step": 244
},
{
"epoch": 1.828358208955224,
"grad_norm": 0.05597837964991172,
"learning_rate": 2.2305173117234236e-06,
"loss": 0.0537,
"step": 245
},
{
"epoch": 1.835820895522388,
"grad_norm": 0.04065238850547589,
"learning_rate": 2.0420784940626157e-06,
"loss": 0.0452,
"step": 246
},
{
"epoch": 1.8432835820895521,
"grad_norm": 0.0476763458104042,
"learning_rate": 1.861788968090683e-06,
"loss": 0.0451,
"step": 247
},
{
"epoch": 1.8507462686567164,
"grad_norm": 0.04498306599006193,
"learning_rate": 1.68967936966275e-06,
"loss": 0.0429,
"step": 248
},
{
"epoch": 1.8582089552238807,
"grad_norm": 0.05003148848106867,
"learning_rate": 1.5257789446526172e-06,
"loss": 0.0441,
"step": 249
},
{
"epoch": 1.8656716417910446,
"grad_norm": 0.03810179831443809,
"learning_rate": 1.3701155439831249e-06,
"loss": 0.045,
"step": 250
},
{
"epoch": 1.873134328358209,
"grad_norm": 0.046823905381387004,
"learning_rate": 1.222715618893555e-06,
"loss": 0.046,
"step": 251
},
{
"epoch": 1.8805970149253732,
"grad_norm": 0.03508296706383799,
"learning_rate": 1.0836042164448945e-06,
"loss": 0.0414,
"step": 252
},
{
"epoch": 1.8880597014925373,
"grad_norm": 0.03914973587838145,
"learning_rate": 9.528049752636714e-07,
"loss": 0.0382,
"step": 253
},
{
"epoch": 1.8955223880597014,
"grad_norm": 0.04522068545246384,
"learning_rate": 8.303401215251583e-07,
"loss": 0.0622,
"step": 254
},
{
"epoch": 1.9029850746268657,
"grad_norm": 0.0351448987447696,
"learning_rate": 7.16230465176565e-07,
"loss": 0.0365,
"step": 255
},
{
"epoch": 1.9104477611940298,
"grad_norm": 0.04164400823222792,
"learning_rate": 6.104953964008897e-07,
"loss": 0.0417,
"step": 256
},
{
"epoch": 1.917910447761194,
"grad_norm": 0.043785343997244305,
"learning_rate": 5.131528823220099e-07,
"loss": 0.0479,
"step": 257
},
{
"epoch": 1.9253731343283582,
"grad_norm": 0.04778705545009903,
"learning_rate": 4.242194639516417e-07,
"loss": 0.0515,
"step": 258
},
{
"epoch": 1.9328358208955225,
"grad_norm": 0.04402744832754031,
"learning_rate": 3.4371025337855413e-07,
"loss": 0.0459,
"step": 259
},
{
"epoch": 1.9402985074626866,
"grad_norm": 0.031589252550306986,
"learning_rate": 2.7163893120066285e-07,
"loss": 0.0345,
"step": 260
},
{
"epoch": 1.9477611940298507,
"grad_norm": 0.04157687462847643,
"learning_rate": 2.0801774420031173e-07,
"loss": 0.0491,
"step": 261
},
{
"epoch": 1.955223880597015,
"grad_norm": 0.03811048776366228,
"learning_rate": 1.5285750326325954e-07,
"loss": 0.0399,
"step": 262
},
{
"epoch": 1.962686567164179,
"grad_norm": 0.0459881175457512,
"learning_rate": 1.0616758154161632e-07,
"loss": 0.0536,
"step": 263
},
{
"epoch": 1.9701492537313432,
"grad_norm": 0.04236843642259093,
"learning_rate": 6.795591286109515e-08,
"loss": 0.0576,
"step": 264
},
{
"epoch": 1.9776119402985075,
"grad_norm": 0.04498028444612301,
"learning_rate": 3.822899037286276e-08,
"loss": 0.0437,
"step": 265
},
{
"epoch": 1.9850746268656716,
"grad_norm": 0.04723224011434202,
"learning_rate": 1.6991865450188827e-08,
"loss": 0.0511,
"step": 266
},
{
"epoch": 1.9925373134328357,
"grad_norm": 0.04408571036477793,
"learning_rate": 4.248146830060362e-09,
"loss": 0.0644,
"step": 267
},
{
"epoch": 2.0,
"grad_norm": 0.036686659173178024,
"learning_rate": 0.0,
"loss": 0.0407,
"step": 268
},
{
"epoch": 2.0,
"step": 268,
"total_flos": 678679592042496.0,
"train_loss": 0.055738416959100694,
"train_runtime": 2038.148,
"train_samples_per_second": 0.523,
"train_steps_per_second": 0.131
}
],
"logging_steps": 1,
"max_steps": 268,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 678679592042496.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}