|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9991031390134529, |
|
"eval_steps": 500, |
|
"global_step": 557, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002810468997013877, |
|
"grad_norm": 0.08777584135532379, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 1.6284, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005620937994027754, |
|
"grad_norm": 0.08739772439002991, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 1.6386, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00843140699104163, |
|
"grad_norm": 0.07890280336141586, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.6366, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.011241875988055507, |
|
"grad_norm": 0.050849832594394684, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 1.6428, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.014052344985069383, |
|
"grad_norm": 0.026262454688549042, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 1.6335, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01686281398208326, |
|
"grad_norm": 0.017007650807499886, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.6298, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.019673282979097137, |
|
"grad_norm": 0.021000387147068977, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 1.6349, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.022483751976111015, |
|
"grad_norm": 0.039422884583473206, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 1.6161, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02529422097312489, |
|
"grad_norm": 0.03416357561945915, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6133, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.028104689970138767, |
|
"grad_norm": 0.023096244782209396, |
|
"learning_rate": 1.1111111111111113e-05, |
|
"loss": 1.6017, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.030915158967152644, |
|
"grad_norm": 0.028580009937286377, |
|
"learning_rate": 1.2222222222222224e-05, |
|
"loss": 1.6117, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03372562796416652, |
|
"grad_norm": 0.02169407717883587, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 1.605, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.036536096961180396, |
|
"grad_norm": 0.02079104818403721, |
|
"learning_rate": 1.4444444444444446e-05, |
|
"loss": 1.6006, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.039346565958194274, |
|
"grad_norm": 0.02266255021095276, |
|
"learning_rate": 1.555555555555556e-05, |
|
"loss": 1.6076, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04215703495520815, |
|
"grad_norm": 0.023310977965593338, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 1.6051, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04496750395222203, |
|
"grad_norm": 0.02227545529603958, |
|
"learning_rate": 1.7777777777777777e-05, |
|
"loss": 1.5976, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04777797294923591, |
|
"grad_norm": 0.02008313126862049, |
|
"learning_rate": 1.888888888888889e-05, |
|
"loss": 1.6048, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05058844194624978, |
|
"grad_norm": 0.020431598648428917, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6033, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.053398910943263656, |
|
"grad_norm": 0.019984332844614983, |
|
"learning_rate": 1.999956548296958e-05, |
|
"loss": 1.5915, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05620937994027753, |
|
"grad_norm": 0.020108483731746674, |
|
"learning_rate": 1.9998261969639324e-05, |
|
"loss": 1.5938, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05901984893729141, |
|
"grad_norm": 0.016152869910001755, |
|
"learning_rate": 1.9996089573288985e-05, |
|
"loss": 1.5931, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06183031793430529, |
|
"grad_norm": 0.016522705554962158, |
|
"learning_rate": 1.99930484827072e-05, |
|
"loss": 1.5953, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06464078693131917, |
|
"grad_norm": 0.017014645040035248, |
|
"learning_rate": 1.9989138962175105e-05, |
|
"loss": 1.5898, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06745125592833304, |
|
"grad_norm": 0.016037074849009514, |
|
"learning_rate": 1.9984361351443343e-05, |
|
"loss": 1.5904, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07026172492534692, |
|
"grad_norm": 0.01631341129541397, |
|
"learning_rate": 1.9978716065702566e-05, |
|
"loss": 1.5913, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07307219392236079, |
|
"grad_norm": 0.014392613433301449, |
|
"learning_rate": 1.9972203595547334e-05, |
|
"loss": 1.5893, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07588266291937468, |
|
"grad_norm": 0.015151307918131351, |
|
"learning_rate": 1.996482450693348e-05, |
|
"loss": 1.5912, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07869313191638855, |
|
"grad_norm": 0.015482740476727486, |
|
"learning_rate": 1.9956579441128942e-05, |
|
"loss": 1.5847, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08150360091340242, |
|
"grad_norm": 0.01364535465836525, |
|
"learning_rate": 1.994746911465802e-05, |
|
"loss": 1.585, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0843140699104163, |
|
"grad_norm": 0.014720232225954533, |
|
"learning_rate": 1.9937494319239112e-05, |
|
"loss": 1.5773, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08712453890743017, |
|
"grad_norm": 0.012587211094796658, |
|
"learning_rate": 1.9926655921715924e-05, |
|
"loss": 1.5766, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08993500790444406, |
|
"grad_norm": 0.013409728184342384, |
|
"learning_rate": 1.9914954863982106e-05, |
|
"loss": 1.5764, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09274547690145793, |
|
"grad_norm": 0.013652725145220757, |
|
"learning_rate": 1.990239216289944e-05, |
|
"loss": 1.5818, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.09555594589847181, |
|
"grad_norm": 0.015371325425803661, |
|
"learning_rate": 1.9888968910209433e-05, |
|
"loss": 1.5834, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09836641489548568, |
|
"grad_norm": 0.013734452426433563, |
|
"learning_rate": 1.9874686272438467e-05, |
|
"loss": 1.5719, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.10117688389249956, |
|
"grad_norm": 0.012110204435884953, |
|
"learning_rate": 1.9859545490796414e-05, |
|
"loss": 1.5797, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.10398735288951344, |
|
"grad_norm": 0.013209005817770958, |
|
"learning_rate": 1.9843547881068763e-05, |
|
"loss": 1.5789, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.10679782188652731, |
|
"grad_norm": 0.01261916570365429, |
|
"learning_rate": 1.9826694833502295e-05, |
|
"loss": 1.5716, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.1096082908835412, |
|
"grad_norm": 0.012995808385312557, |
|
"learning_rate": 1.9808987812684247e-05, |
|
"loss": 1.5682, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.11241875988055507, |
|
"grad_norm": 0.013399253599345684, |
|
"learning_rate": 1.979042835741503e-05, |
|
"loss": 1.5711, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11522922887756895, |
|
"grad_norm": 0.012696914374828339, |
|
"learning_rate": 1.9771018080574534e-05, |
|
"loss": 1.5742, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.11803969787458282, |
|
"grad_norm": 0.011595464311540127, |
|
"learning_rate": 1.9750758668981925e-05, |
|
"loss": 1.581, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.12085016687159669, |
|
"grad_norm": 0.01301959715783596, |
|
"learning_rate": 1.9729651883249075e-05, |
|
"loss": 1.5821, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.12366063586861058, |
|
"grad_norm": 0.012804425321519375, |
|
"learning_rate": 1.9707699557627554e-05, |
|
"loss": 1.5921, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.12647110486562446, |
|
"grad_norm": 0.012021846137940884, |
|
"learning_rate": 1.968490359984923e-05, |
|
"loss": 1.5795, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.12928157386263833, |
|
"grad_norm": 0.013476583175361156, |
|
"learning_rate": 1.9661265990960486e-05, |
|
"loss": 1.5755, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1320920428596522, |
|
"grad_norm": 0.011448990553617477, |
|
"learning_rate": 1.9636788785150037e-05, |
|
"loss": 1.5688, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.13490251185666607, |
|
"grad_norm": 0.014088047668337822, |
|
"learning_rate": 1.9611474109570446e-05, |
|
"loss": 1.5785, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.13771298085367994, |
|
"grad_norm": 0.013553822413086891, |
|
"learning_rate": 1.9585324164153236e-05, |
|
"loss": 1.5812, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.14052344985069384, |
|
"grad_norm": 0.011343201622366905, |
|
"learning_rate": 1.9558341221417744e-05, |
|
"loss": 1.5778, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14333391884770771, |
|
"grad_norm": 0.01123133860528469, |
|
"learning_rate": 1.9530527626273592e-05, |
|
"loss": 1.5758, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.14614438784472158, |
|
"grad_norm": 0.01259944774210453, |
|
"learning_rate": 1.9501885795816937e-05, |
|
"loss": 1.5731, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.14895485684173546, |
|
"grad_norm": 0.011086889542639256, |
|
"learning_rate": 1.9472418219120403e-05, |
|
"loss": 1.576, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.15176532583874935, |
|
"grad_norm": 0.011971558444201946, |
|
"learning_rate": 1.9442127457016768e-05, |
|
"loss": 1.571, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.15457579483576322, |
|
"grad_norm": 0.012076422572135925, |
|
"learning_rate": 1.9411016141876438e-05, |
|
"loss": 1.5743, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1573862638327771, |
|
"grad_norm": 0.01147051528096199, |
|
"learning_rate": 1.9379086977378664e-05, |
|
"loss": 1.5701, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.16019673282979097, |
|
"grad_norm": 0.013713942840695381, |
|
"learning_rate": 1.9346342738276593e-05, |
|
"loss": 1.5632, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.16300720182680484, |
|
"grad_norm": 0.012207642197608948, |
|
"learning_rate": 1.9312786270156135e-05, |
|
"loss": 1.5684, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.16581767082381874, |
|
"grad_norm": 0.01164440531283617, |
|
"learning_rate": 1.927842048918867e-05, |
|
"loss": 1.572, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.1686281398208326, |
|
"grad_norm": 0.014730863273143768, |
|
"learning_rate": 1.9243248381877605e-05, |
|
"loss": 1.5737, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17143860881784648, |
|
"grad_norm": 0.01201706100255251, |
|
"learning_rate": 1.9207273004798873e-05, |
|
"loss": 1.5708, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.17424907781486035, |
|
"grad_norm": 0.012422306463122368, |
|
"learning_rate": 1.9170497484335276e-05, |
|
"loss": 1.5666, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.17705954681187422, |
|
"grad_norm": 0.014150385744869709, |
|
"learning_rate": 1.9132925016404805e-05, |
|
"loss": 1.5652, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.17987001580888812, |
|
"grad_norm": 0.01331349741667509, |
|
"learning_rate": 1.9094558866182892e-05, |
|
"loss": 1.5613, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.182680484805902, |
|
"grad_norm": 0.014323413372039795, |
|
"learning_rate": 1.9055402367818673e-05, |
|
"loss": 1.5643, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.18549095380291586, |
|
"grad_norm": 0.010913478210568428, |
|
"learning_rate": 1.901545892414523e-05, |
|
"loss": 1.5768, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.18830142279992973, |
|
"grad_norm": 0.013672353699803352, |
|
"learning_rate": 1.897473200638386e-05, |
|
"loss": 1.5707, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.19111189179694363, |
|
"grad_norm": 0.011649075895547867, |
|
"learning_rate": 1.8933225153842446e-05, |
|
"loss": 1.5678, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.1939223607939575, |
|
"grad_norm": 0.012527153827250004, |
|
"learning_rate": 1.8890941973607843e-05, |
|
"loss": 1.5637, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.19673282979097137, |
|
"grad_norm": 0.013441166840493679, |
|
"learning_rate": 1.8847886140232438e-05, |
|
"loss": 1.5543, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.19954329878798524, |
|
"grad_norm": 0.014678705483675003, |
|
"learning_rate": 1.8804061395414795e-05, |
|
"loss": 1.5594, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.2023537677849991, |
|
"grad_norm": 0.015578769147396088, |
|
"learning_rate": 1.875947154767452e-05, |
|
"loss": 1.5766, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.205164236782013, |
|
"grad_norm": 0.013635441660881042, |
|
"learning_rate": 1.8714120472021252e-05, |
|
"loss": 1.5622, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.20797470577902688, |
|
"grad_norm": 0.011884918436408043, |
|
"learning_rate": 1.8668012109617933e-05, |
|
"loss": 1.5539, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.21078517477604075, |
|
"grad_norm": 0.013244202360510826, |
|
"learning_rate": 1.862115046743831e-05, |
|
"loss": 1.5641, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.21359564377305462, |
|
"grad_norm": 0.011851202696561813, |
|
"learning_rate": 1.85735396179187e-05, |
|
"loss": 1.554, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.2164061127700685, |
|
"grad_norm": 0.012839299626648426, |
|
"learning_rate": 1.8525183698604098e-05, |
|
"loss": 1.5663, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.2192165817670824, |
|
"grad_norm": 0.013278639875352383, |
|
"learning_rate": 1.8476086911788588e-05, |
|
"loss": 1.5674, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.22202705076409626, |
|
"grad_norm": 0.011714858934283257, |
|
"learning_rate": 1.8426253524150176e-05, |
|
"loss": 1.5628, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.22483751976111013, |
|
"grad_norm": 0.01387378666549921, |
|
"learning_rate": 1.8375687866379988e-05, |
|
"loss": 1.5746, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.227647988758124, |
|
"grad_norm": 0.014316901564598083, |
|
"learning_rate": 1.8324394332805913e-05, |
|
"loss": 1.56, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.2304584577551379, |
|
"grad_norm": 0.013246218673884869, |
|
"learning_rate": 1.8272377381010726e-05, |
|
"loss": 1.5604, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.23326892675215177, |
|
"grad_norm": 0.014117361046373844, |
|
"learning_rate": 1.8219641531444713e-05, |
|
"loss": 1.5535, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.23607939574916564, |
|
"grad_norm": 0.013312868773937225, |
|
"learning_rate": 1.8166191367032828e-05, |
|
"loss": 1.5571, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.23888986474617951, |
|
"grad_norm": 0.012200911529362202, |
|
"learning_rate": 1.811203153277641e-05, |
|
"loss": 1.5636, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.24170033374319339, |
|
"grad_norm": 0.012060770764946938, |
|
"learning_rate": 1.8057166735349533e-05, |
|
"loss": 1.5548, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.24451080274020728, |
|
"grad_norm": 0.01396957691758871, |
|
"learning_rate": 1.800160174268996e-05, |
|
"loss": 1.566, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.24732127173722115, |
|
"grad_norm": 0.017935393378138542, |
|
"learning_rate": 1.7945341383584818e-05, |
|
"loss": 1.5705, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.25013174073423505, |
|
"grad_norm": 0.01408262737095356, |
|
"learning_rate": 1.7888390547250944e-05, |
|
"loss": 1.5701, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.2529422097312489, |
|
"grad_norm": 0.014612286351621151, |
|
"learning_rate": 1.7830754182909985e-05, |
|
"loss": 1.56, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2557526787282628, |
|
"grad_norm": 0.016053492203354836, |
|
"learning_rate": 1.7772437299358324e-05, |
|
"loss": 1.5655, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.25856314772527667, |
|
"grad_norm": 0.014997400343418121, |
|
"learning_rate": 1.771344496453177e-05, |
|
"loss": 1.5692, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.26137361672229054, |
|
"grad_norm": 0.013961107470095158, |
|
"learning_rate": 1.7653782305065158e-05, |
|
"loss": 1.56, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2641840857193044, |
|
"grad_norm": 0.015686677768826485, |
|
"learning_rate": 1.7593454505846807e-05, |
|
"loss": 1.5561, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2669945547163183, |
|
"grad_norm": 0.013397088274359703, |
|
"learning_rate": 1.753246680956795e-05, |
|
"loss": 1.5651, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.26980502371333215, |
|
"grad_norm": 0.014148705638945103, |
|
"learning_rate": 1.7470824516267125e-05, |
|
"loss": 1.5538, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.272615492710346, |
|
"grad_norm": 0.012879609130322933, |
|
"learning_rate": 1.7408532982869573e-05, |
|
"loss": 1.5592, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2754259617073599, |
|
"grad_norm": 0.012351011857390404, |
|
"learning_rate": 1.7345597622721727e-05, |
|
"loss": 1.5633, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2782364307043738, |
|
"grad_norm": 0.010927798226475716, |
|
"learning_rate": 1.7282023905120743e-05, |
|
"loss": 1.5623, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2810468997013877, |
|
"grad_norm": 0.015191658399999142, |
|
"learning_rate": 1.721781735483921e-05, |
|
"loss": 1.5585, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28385736869840156, |
|
"grad_norm": 0.01361748855561018, |
|
"learning_rate": 1.7152983551645054e-05, |
|
"loss": 1.5553, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.28666783769541543, |
|
"grad_norm": 0.01628255844116211, |
|
"learning_rate": 1.708752812981659e-05, |
|
"loss": 1.5681, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2894783066924293, |
|
"grad_norm": 0.011448734439909458, |
|
"learning_rate": 1.702145677765293e-05, |
|
"loss": 1.5619, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.29228877568944317, |
|
"grad_norm": 0.0159872155636549, |
|
"learning_rate": 1.6954775236979616e-05, |
|
"loss": 1.5528, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.29509924468645704, |
|
"grad_norm": 0.014284605160355568, |
|
"learning_rate": 1.6887489302649657e-05, |
|
"loss": 1.547, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2979097136834709, |
|
"grad_norm": 0.014883718453347683, |
|
"learning_rate": 1.6819604822039924e-05, |
|
"loss": 1.5608, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.3007201826804848, |
|
"grad_norm": 0.01631166599690914, |
|
"learning_rate": 1.6751127694543012e-05, |
|
"loss": 1.561, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.3035306516774987, |
|
"grad_norm": 0.014855324290692806, |
|
"learning_rate": 1.6682063871054534e-05, |
|
"loss": 1.557, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.3063411206745126, |
|
"grad_norm": 0.014950740151107311, |
|
"learning_rate": 1.661241935345599e-05, |
|
"loss": 1.5686, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.30915158967152645, |
|
"grad_norm": 0.015889016911387444, |
|
"learning_rate": 1.654220019409317e-05, |
|
"loss": 1.5525, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3119620586685403, |
|
"grad_norm": 0.012233362533152103, |
|
"learning_rate": 1.6471412495250195e-05, |
|
"loss": 1.5586, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.3147725276655542, |
|
"grad_norm": 0.013401877135038376, |
|
"learning_rate": 1.640006240861921e-05, |
|
"loss": 1.5645, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.31758299666256806, |
|
"grad_norm": 0.012674611061811447, |
|
"learning_rate": 1.632815613476576e-05, |
|
"loss": 1.5564, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.32039346565958193, |
|
"grad_norm": 0.013244451023638248, |
|
"learning_rate": 1.6255699922589968e-05, |
|
"loss": 1.5554, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.3232039346565958, |
|
"grad_norm": 0.013073718175292015, |
|
"learning_rate": 1.6182700068783463e-05, |
|
"loss": 1.5561, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3260144036536097, |
|
"grad_norm": 0.012644422240555286, |
|
"learning_rate": 1.610916291728218e-05, |
|
"loss": 1.5538, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.32882487265062355, |
|
"grad_norm": 0.013174659572541714, |
|
"learning_rate": 1.6035094858715065e-05, |
|
"loss": 1.5509, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.33163534164763747, |
|
"grad_norm": 0.015327691100537777, |
|
"learning_rate": 1.5960502329848683e-05, |
|
"loss": 1.558, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.33444581064465134, |
|
"grad_norm": 0.011017784476280212, |
|
"learning_rate": 1.588539181302786e-05, |
|
"loss": 1.5614, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.3372562796416652, |
|
"grad_norm": 0.015872148796916008, |
|
"learning_rate": 1.580976983561235e-05, |
|
"loss": 1.5557, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3400667486386791, |
|
"grad_norm": 0.01222875714302063, |
|
"learning_rate": 1.5733642969409553e-05, |
|
"loss": 1.5603, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.34287721763569295, |
|
"grad_norm": 0.015869170427322388, |
|
"learning_rate": 1.5657017830103448e-05, |
|
"loss": 1.5581, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3456876866327068, |
|
"grad_norm": 0.01336450781673193, |
|
"learning_rate": 1.5579901076679625e-05, |
|
"loss": 1.5663, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3484981556297207, |
|
"grad_norm": 0.015786062926054, |
|
"learning_rate": 1.5502299410846626e-05, |
|
"loss": 1.5518, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.35130862462673457, |
|
"grad_norm": 0.01679217629134655, |
|
"learning_rate": 1.5424219576453526e-05, |
|
"loss": 1.5535, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.35411909362374844, |
|
"grad_norm": 0.011585685424506664, |
|
"learning_rate": 1.5345668358903886e-05, |
|
"loss": 1.5715, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.35692956262076236, |
|
"grad_norm": 0.015714086592197418, |
|
"learning_rate": 1.5266652584566056e-05, |
|
"loss": 1.5489, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.35974003161777623, |
|
"grad_norm": 0.013761353679001331, |
|
"learning_rate": 1.5187179120179969e-05, |
|
"loss": 1.5515, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3625505006147901, |
|
"grad_norm": 0.013305050320923328, |
|
"learning_rate": 1.5107254872260366e-05, |
|
"loss": 1.5596, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.365360969611804, |
|
"grad_norm": 0.013329329900443554, |
|
"learning_rate": 1.5026886786496624e-05, |
|
"loss": 1.5488, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36817143860881785, |
|
"grad_norm": 0.014221809804439545, |
|
"learning_rate": 1.4946081847149134e-05, |
|
"loss": 1.5536, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.3709819076058317, |
|
"grad_norm": 0.011903174221515656, |
|
"learning_rate": 1.4864847076442358e-05, |
|
"loss": 1.5612, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3737923766028456, |
|
"grad_norm": 0.012618706561625004, |
|
"learning_rate": 1.4783189533954555e-05, |
|
"loss": 1.5578, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.37660284559985946, |
|
"grad_norm": 0.012923737987875938, |
|
"learning_rate": 1.4701116316004307e-05, |
|
"loss": 1.5619, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.37941331459687333, |
|
"grad_norm": 0.012171992100775242, |
|
"learning_rate": 1.46186345550338e-05, |
|
"loss": 1.5596, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.38222378359388726, |
|
"grad_norm": 0.014030322432518005, |
|
"learning_rate": 1.4535751418989e-05, |
|
"loss": 1.5541, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.3850342525909011, |
|
"grad_norm": 0.012220495380461216, |
|
"learning_rate": 1.4452474110696738e-05, |
|
"loss": 1.55, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.387844721587915, |
|
"grad_norm": 0.013919586315751076, |
|
"learning_rate": 1.4368809867238754e-05, |
|
"loss": 1.5607, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.39065519058492887, |
|
"grad_norm": 0.011982360854744911, |
|
"learning_rate": 1.4284765959322772e-05, |
|
"loss": 1.5464, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.39346565958194274, |
|
"grad_norm": 0.012515905313193798, |
|
"learning_rate": 1.4200349690650654e-05, |
|
"loss": 1.5645, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3962761285789566, |
|
"grad_norm": 0.011968614533543587, |
|
"learning_rate": 1.411556839728367e-05, |
|
"loss": 1.5559, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3990865975759705, |
|
"grad_norm": 0.010865562595427036, |
|
"learning_rate": 1.4030429447004992e-05, |
|
"loss": 1.5617, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.40189706657298435, |
|
"grad_norm": 0.013639074750244617, |
|
"learning_rate": 1.3944940238679384e-05, |
|
"loss": 1.5469, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.4047075355699982, |
|
"grad_norm": 0.0128669748082757, |
|
"learning_rate": 1.3859108201610236e-05, |
|
"loss": 1.5466, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.4075180045670121, |
|
"grad_norm": 0.011727554723620415, |
|
"learning_rate": 1.3772940794893916e-05, |
|
"loss": 1.5523, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.410328473564026, |
|
"grad_norm": 0.015109344385564327, |
|
"learning_rate": 1.368644550677157e-05, |
|
"loss": 1.5554, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.4131389425610399, |
|
"grad_norm": 0.013233068399131298, |
|
"learning_rate": 1.3599629853978342e-05, |
|
"loss": 1.5594, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.41594941155805376, |
|
"grad_norm": 0.014392644166946411, |
|
"learning_rate": 1.3512501381090158e-05, |
|
"loss": 1.5603, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.41875988055506763, |
|
"grad_norm": 0.011739206500351429, |
|
"learning_rate": 1.3425067659868084e-05, |
|
"loss": 1.5508, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.4215703495520815, |
|
"grad_norm": 0.016567695885896683, |
|
"learning_rate": 1.3337336288600297e-05, |
|
"loss": 1.5529, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4243808185490954, |
|
"grad_norm": 0.01213662326335907, |
|
"learning_rate": 1.324931489144178e-05, |
|
"loss": 1.5468, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.42719128754610924, |
|
"grad_norm": 0.01404926273971796, |
|
"learning_rate": 1.3161011117751756e-05, |
|
"loss": 1.5564, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.4300017565431231, |
|
"grad_norm": 0.013470892794430256, |
|
"learning_rate": 1.3072432641428931e-05, |
|
"loss": 1.5541, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.432812225540137, |
|
"grad_norm": 0.01260072086006403, |
|
"learning_rate": 1.2983587160244602e-05, |
|
"loss": 1.5453, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.4356226945371509, |
|
"grad_norm": 0.012238179333508015, |
|
"learning_rate": 1.2894482395173695e-05, |
|
"loss": 1.5541, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4384331635341648, |
|
"grad_norm": 0.014110546559095383, |
|
"learning_rate": 1.2805126089723798e-05, |
|
"loss": 1.5606, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.44124363253117865, |
|
"grad_norm": 0.011262672953307629, |
|
"learning_rate": 1.2715526009262209e-05, |
|
"loss": 1.5603, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.4440541015281925, |
|
"grad_norm": 0.014026992954313755, |
|
"learning_rate": 1.2625689940341102e-05, |
|
"loss": 1.5462, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.4468645705252064, |
|
"grad_norm": 0.011127503588795662, |
|
"learning_rate": 1.2535625690020861e-05, |
|
"loss": 1.5497, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.44967503952222027, |
|
"grad_norm": 0.014035338535904884, |
|
"learning_rate": 1.24453410851916e-05, |
|
"loss": 1.5551, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.45248550851923414, |
|
"grad_norm": 0.01174125261604786, |
|
"learning_rate": 1.2354843971892998e-05, |
|
"loss": 1.5494, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.455295977516248, |
|
"grad_norm": 0.013476992025971413, |
|
"learning_rate": 1.2264142214632441e-05, |
|
"loss": 1.5622, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.4581064465132619, |
|
"grad_norm": 0.012524358928203583, |
|
"learning_rate": 1.2173243695701575e-05, |
|
"loss": 1.5512, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.4609169155102758, |
|
"grad_norm": 0.011893150396645069, |
|
"learning_rate": 1.2082156314491298e-05, |
|
"loss": 1.5601, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4637273845072897, |
|
"grad_norm": 0.011717031709849834, |
|
"learning_rate": 1.1990887986805295e-05, |
|
"loss": 1.5401, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.46653785350430355, |
|
"grad_norm": 0.01049530878663063, |
|
"learning_rate": 1.1899446644172106e-05, |
|
"loss": 1.5434, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.4693483225013174, |
|
"grad_norm": 0.013684769161045551, |
|
"learning_rate": 1.1807840233155863e-05, |
|
"loss": 1.5473, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.4721587914983313, |
|
"grad_norm": 0.010303209535777569, |
|
"learning_rate": 1.1716076714665701e-05, |
|
"loss": 1.5534, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.47496926049534516, |
|
"grad_norm": 0.013134041801095009, |
|
"learning_rate": 1.1624164063263931e-05, |
|
"loss": 1.54, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.47777972949235903, |
|
"grad_norm": 0.011170011013746262, |
|
"learning_rate": 1.1532110266473026e-05, |
|
"loss": 1.5519, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4805901984893729, |
|
"grad_norm": 0.01223431620746851, |
|
"learning_rate": 1.1439923324081465e-05, |
|
"loss": 1.5461, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.48340066748638677, |
|
"grad_norm": 0.01103509496897459, |
|
"learning_rate": 1.1347611247448544e-05, |
|
"loss": 1.5469, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.48621113648340064, |
|
"grad_norm": 0.012365833856165409, |
|
"learning_rate": 1.1255182058808143e-05, |
|
"loss": 1.5546, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.48902160548041457, |
|
"grad_norm": 0.011856972239911556, |
|
"learning_rate": 1.1162643790571574e-05, |
|
"loss": 1.5493, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.49183207447742844, |
|
"grad_norm": 0.011247235350310802, |
|
"learning_rate": 1.1070004484629543e-05, |
|
"loss": 1.5485, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.4946425434744423, |
|
"grad_norm": 0.010246271267533302, |
|
"learning_rate": 1.0977272191653272e-05, |
|
"loss": 1.5385, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.4974530124714562, |
|
"grad_norm": 0.011663992889225483, |
|
"learning_rate": 1.0884454970394871e-05, |
|
"loss": 1.5543, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.5002634814684701, |
|
"grad_norm": 0.011617216281592846, |
|
"learning_rate": 1.0791560886987016e-05, |
|
"loss": 1.5621, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.503073950465484, |
|
"grad_norm": 0.010127879679203033, |
|
"learning_rate": 1.069859801424196e-05, |
|
"loss": 1.547, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.5058844194624978, |
|
"grad_norm": 0.01151992380619049, |
|
"learning_rate": 1.0605574430949983e-05, |
|
"loss": 1.5496, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5086948884595117, |
|
"grad_norm": 0.012575927190482616, |
|
"learning_rate": 1.0512498221177319e-05, |
|
"loss": 1.5523, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.5115053574565256, |
|
"grad_norm": 0.011552478186786175, |
|
"learning_rate": 1.0419377473563621e-05, |
|
"loss": 1.5537, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.5143158264535395, |
|
"grad_norm": 0.010599706321954727, |
|
"learning_rate": 1.0326220280619036e-05, |
|
"loss": 1.5448, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5171262954505533, |
|
"grad_norm": 0.010817637667059898, |
|
"learning_rate": 1.0233034738020933e-05, |
|
"loss": 1.5503, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.5199367644475672, |
|
"grad_norm": 0.012649599462747574, |
|
"learning_rate": 1.0139828943910358e-05, |
|
"loss": 1.5486, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5227472334445811, |
|
"grad_norm": 0.012253242544829845, |
|
"learning_rate": 1.004661099818829e-05, |
|
"loss": 1.5514, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.5255577024415949, |
|
"grad_norm": 0.011790635995566845, |
|
"learning_rate": 9.953389001811716e-06, |
|
"loss": 1.5518, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5283681714386088, |
|
"grad_norm": 0.013327023014426231, |
|
"learning_rate": 9.860171056089646e-06, |
|
"loss": 1.5424, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5311786404356227, |
|
"grad_norm": 0.0112162996083498, |
|
"learning_rate": 9.766965261979072e-06, |
|
"loss": 1.5488, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.5339891094326366, |
|
"grad_norm": 0.012566052377223969, |
|
"learning_rate": 9.673779719380967e-06, |
|
"loss": 1.5452, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5367995784296504, |
|
"grad_norm": 0.012440058402717113, |
|
"learning_rate": 9.580622526436382e-06, |
|
"loss": 1.5457, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.5396100474266643, |
|
"grad_norm": 0.011921185068786144, |
|
"learning_rate": 9.487501778822685e-06, |
|
"loss": 1.5413, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5424205164236782, |
|
"grad_norm": 0.011896064504981041, |
|
"learning_rate": 9.394425569050018e-06, |
|
"loss": 1.5423, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.545230985420692, |
|
"grad_norm": 0.01354946568608284, |
|
"learning_rate": 9.30140198575804e-06, |
|
"loss": 1.5523, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5480414544177059, |
|
"grad_norm": 0.011999576352536678, |
|
"learning_rate": 9.208439113012984e-06, |
|
"loss": 1.5367, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5508519234147198, |
|
"grad_norm": 0.012224970385432243, |
|
"learning_rate": 9.115545029605129e-06, |
|
"loss": 1.5564, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5536623924117338, |
|
"grad_norm": 0.012914243154227734, |
|
"learning_rate": 9.022727808346731e-06, |
|
"loss": 1.5426, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.5564728614087476, |
|
"grad_norm": 0.012274730019271374, |
|
"learning_rate": 8.92999551537046e-06, |
|
"loss": 1.5454, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.5592833304057615, |
|
"grad_norm": 0.010419441387057304, |
|
"learning_rate": 8.837356209428428e-06, |
|
"loss": 1.5388, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.5620937994027754, |
|
"grad_norm": 0.012639000080525875, |
|
"learning_rate": 8.744817941191862e-06, |
|
"loss": 1.5451, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5649042683997892, |
|
"grad_norm": 0.01181803084909916, |
|
"learning_rate": 8.652388752551458e-06, |
|
"loss": 1.548, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.5677147373968031, |
|
"grad_norm": 0.01122727058827877, |
|
"learning_rate": 8.560076675918537e-06, |
|
"loss": 1.5531, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.570525206393817, |
|
"grad_norm": 0.011986492201685905, |
|
"learning_rate": 8.467889733526977e-06, |
|
"loss": 1.5334, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.5733356753908309, |
|
"grad_norm": 0.01114217285066843, |
|
"learning_rate": 8.375835936736072e-06, |
|
"loss": 1.539, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.5761461443878447, |
|
"grad_norm": 0.011127611622214317, |
|
"learning_rate": 8.283923285334304e-06, |
|
"loss": 1.546, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5789566133848586, |
|
"grad_norm": 0.011980608105659485, |
|
"learning_rate": 8.19215976684414e-06, |
|
"loss": 1.5556, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.5817670823818725, |
|
"grad_norm": 0.011237296275794506, |
|
"learning_rate": 8.100553355827897e-06, |
|
"loss": 1.5368, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.5845775513788863, |
|
"grad_norm": 0.01083777192980051, |
|
"learning_rate": 8.009112013194707e-06, |
|
"loss": 1.5404, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.5873880203759002, |
|
"grad_norm": 0.01118643768131733, |
|
"learning_rate": 7.917843685508702e-06, |
|
"loss": 1.5405, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.5901984893729141, |
|
"grad_norm": 0.012426355853676796, |
|
"learning_rate": 7.826756304298428e-06, |
|
"loss": 1.5497, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.593008958369928, |
|
"grad_norm": 0.011632665991783142, |
|
"learning_rate": 7.73585778536756e-06, |
|
"loss": 1.5395, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.5958194273669418, |
|
"grad_norm": 0.010038619861006737, |
|
"learning_rate": 7.645156028107005e-06, |
|
"loss": 1.5388, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.5986298963639557, |
|
"grad_norm": 0.01042498741298914, |
|
"learning_rate": 7.554658914808404e-06, |
|
"loss": 1.5446, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.6014403653609696, |
|
"grad_norm": 0.009714511223137379, |
|
"learning_rate": 7.464374309979143e-06, |
|
"loss": 1.545, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.6042508343579834, |
|
"grad_norm": 0.011047974228858948, |
|
"learning_rate": 7.3743100596589e-06, |
|
"loss": 1.5496, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.6070613033549974, |
|
"grad_norm": 0.009808655828237534, |
|
"learning_rate": 7.284473990737795e-06, |
|
"loss": 1.542, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.6098717723520113, |
|
"grad_norm": 0.010092736221849918, |
|
"learning_rate": 7.194873910276205e-06, |
|
"loss": 1.5517, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.6126822413490252, |
|
"grad_norm": 0.01010197214782238, |
|
"learning_rate": 7.1055176048263085e-06, |
|
"loss": 1.541, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.615492710346039, |
|
"grad_norm": 0.011126919649541378, |
|
"learning_rate": 7.0164128397554e-06, |
|
"loss": 1.5582, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6183031793430529, |
|
"grad_norm": 0.01078337524086237, |
|
"learning_rate": 6.92756735857107e-06, |
|
"loss": 1.5438, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6211136483400668, |
|
"grad_norm": 0.010446320287883282, |
|
"learning_rate": 6.838988882248243e-06, |
|
"loss": 1.5448, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.6239241173370806, |
|
"grad_norm": 0.011228025890886784, |
|
"learning_rate": 6.750685108558221e-06, |
|
"loss": 1.5347, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.6267345863340945, |
|
"grad_norm": 0.012325327843427658, |
|
"learning_rate": 6.662663711399705e-06, |
|
"loss": 1.5537, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.6295450553311084, |
|
"grad_norm": 0.01042882353067398, |
|
"learning_rate": 6.574932340131917e-06, |
|
"loss": 1.55, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.6323555243281223, |
|
"grad_norm": 0.012044312432408333, |
|
"learning_rate": 6.487498618909845e-06, |
|
"loss": 1.5485, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6351659933251361, |
|
"grad_norm": 0.01117675006389618, |
|
"learning_rate": 6.400370146021662e-06, |
|
"loss": 1.5488, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.63797646232215, |
|
"grad_norm": 0.010941598564386368, |
|
"learning_rate": 6.313554493228431e-06, |
|
"loss": 1.5456, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.6407869313191639, |
|
"grad_norm": 0.011893962509930134, |
|
"learning_rate": 6.227059205106085e-06, |
|
"loss": 1.543, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.6435974003161777, |
|
"grad_norm": 0.010805683210492134, |
|
"learning_rate": 6.14089179838977e-06, |
|
"loss": 1.5479, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.6464078693131916, |
|
"grad_norm": 0.011175237596035004, |
|
"learning_rate": 6.0550597613206205e-06, |
|
"loss": 1.5399, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6492183383102055, |
|
"grad_norm": 0.011044224724173546, |
|
"learning_rate": 5.969570552995014e-06, |
|
"loss": 1.547, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.6520288073072193, |
|
"grad_norm": 0.010453018359839916, |
|
"learning_rate": 5.8844316027163315e-06, |
|
"loss": 1.5468, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.6548392763042332, |
|
"grad_norm": 0.010159061290323734, |
|
"learning_rate": 5.799650309349348e-06, |
|
"loss": 1.536, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.6576497453012471, |
|
"grad_norm": 0.010464577935636044, |
|
"learning_rate": 5.715234040677229e-06, |
|
"loss": 1.5529, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.6604602142982611, |
|
"grad_norm": 0.01091894879937172, |
|
"learning_rate": 5.631190132761247e-06, |
|
"loss": 1.5445, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6632706832952749, |
|
"grad_norm": 0.011955167166888714, |
|
"learning_rate": 5.547525889303265e-06, |
|
"loss": 1.54, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.6660811522922888, |
|
"grad_norm": 0.010030088014900684, |
|
"learning_rate": 5.464248581011002e-06, |
|
"loss": 1.5453, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.6688916212893027, |
|
"grad_norm": 0.009817596524953842, |
|
"learning_rate": 5.381365444966205e-06, |
|
"loss": 1.5367, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.6717020902863166, |
|
"grad_norm": 0.010095087811350822, |
|
"learning_rate": 5.298883683995697e-06, |
|
"loss": 1.5559, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.6745125592833304, |
|
"grad_norm": 0.010223811492323875, |
|
"learning_rate": 5.216810466045448e-06, |
|
"loss": 1.5406, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6773230282803443, |
|
"grad_norm": 0.009899747557938099, |
|
"learning_rate": 5.135152923557647e-06, |
|
"loss": 1.5395, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.6801334972773582, |
|
"grad_norm": 0.009352842345833778, |
|
"learning_rate": 5.053918152850868e-06, |
|
"loss": 1.5423, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.682943966274372, |
|
"grad_norm": 0.0106782466173172, |
|
"learning_rate": 4.973113213503379e-06, |
|
"loss": 1.5397, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.6857544352713859, |
|
"grad_norm": 0.010458718985319138, |
|
"learning_rate": 4.8927451277396365e-06, |
|
"loss": 1.5264, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.6885649042683998, |
|
"grad_norm": 0.01009244006127119, |
|
"learning_rate": 4.812820879820034e-06, |
|
"loss": 1.5512, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.6913753732654137, |
|
"grad_norm": 0.009827625937759876, |
|
"learning_rate": 4.733347415433946e-06, |
|
"loss": 1.5421, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.6941858422624275, |
|
"grad_norm": 0.010725892148911953, |
|
"learning_rate": 4.654331641096118e-06, |
|
"loss": 1.5447, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.6969963112594414, |
|
"grad_norm": 0.009530246257781982, |
|
"learning_rate": 4.575780423546476e-06, |
|
"loss": 1.5429, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.6998067802564553, |
|
"grad_norm": 0.009370286948978901, |
|
"learning_rate": 4.497700589153379e-06, |
|
"loss": 1.54, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.7026172492534691, |
|
"grad_norm": 0.010170893743634224, |
|
"learning_rate": 4.420098923320378e-06, |
|
"loss": 1.5448, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.705427718250483, |
|
"grad_norm": 0.01000099815428257, |
|
"learning_rate": 4.342982169896555e-06, |
|
"loss": 1.5377, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.7082381872474969, |
|
"grad_norm": 0.010095086880028248, |
|
"learning_rate": 4.266357030590449e-06, |
|
"loss": 1.5321, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.7110486562445109, |
|
"grad_norm": 0.009943116456270218, |
|
"learning_rate": 4.1902301643876555e-06, |
|
"loss": 1.543, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.7138591252415247, |
|
"grad_norm": 0.009373177774250507, |
|
"learning_rate": 4.114608186972143e-06, |
|
"loss": 1.5432, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.7166695942385386, |
|
"grad_norm": 0.009264045394957066, |
|
"learning_rate": 4.0394976701513235e-06, |
|
"loss": 1.5501, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.7194800632355525, |
|
"grad_norm": 0.00987312849611044, |
|
"learning_rate": 3.96490514128494e-06, |
|
"loss": 1.5376, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.7222905322325663, |
|
"grad_norm": 0.010003172792494297, |
|
"learning_rate": 3.890837082717822e-06, |
|
"loss": 1.5424, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.7251010012295802, |
|
"grad_norm": 0.009270971640944481, |
|
"learning_rate": 3.817299931216537e-06, |
|
"loss": 1.5453, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.7279114702265941, |
|
"grad_norm": 0.009572354145348072, |
|
"learning_rate": 3.74430007741003e-06, |
|
"loss": 1.5325, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.730721939223608, |
|
"grad_norm": 0.009282928891479969, |
|
"learning_rate": 3.671843865234238e-06, |
|
"loss": 1.5447, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7335324082206218, |
|
"grad_norm": 0.0098671093583107, |
|
"learning_rate": 3.599937591380791e-06, |
|
"loss": 1.5466, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.7363428772176357, |
|
"grad_norm": 0.010173565708100796, |
|
"learning_rate": 3.5285875047498075e-06, |
|
"loss": 1.5347, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.7391533462146496, |
|
"grad_norm": 0.0094713494181633, |
|
"learning_rate": 3.4577998059068354e-06, |
|
"loss": 1.542, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.7419638152116634, |
|
"grad_norm": 0.009647979401051998, |
|
"learning_rate": 3.3875806465440152e-06, |
|
"loss": 1.5437, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.7447742842086773, |
|
"grad_norm": 0.009664146229624748, |
|
"learning_rate": 3.3179361289454694e-06, |
|
"loss": 1.5406, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.7475847532056912, |
|
"grad_norm": 0.010614863596856594, |
|
"learning_rate": 3.2488723054569905e-06, |
|
"loss": 1.5512, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.750395222202705, |
|
"grad_norm": 0.009987459518015385, |
|
"learning_rate": 3.1803951779600774e-06, |
|
"loss": 1.5341, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.7532056911997189, |
|
"grad_norm": 0.009676804766058922, |
|
"learning_rate": 3.112510697350348e-06, |
|
"loss": 1.5457, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.7560161601967328, |
|
"grad_norm": 0.010411749593913555, |
|
"learning_rate": 3.04522476302039e-06, |
|
"loss": 1.5456, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.7588266291937467, |
|
"grad_norm": 0.010124873369932175, |
|
"learning_rate": 2.978543222347076e-06, |
|
"loss": 1.5452, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7616370981907605, |
|
"grad_norm": 0.009868764318525791, |
|
"learning_rate": 2.912471870183411e-06, |
|
"loss": 1.555, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.7644475671877745, |
|
"grad_norm": 0.009340250864624977, |
|
"learning_rate": 2.847016448354948e-06, |
|
"loss": 1.5419, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.7672580361847884, |
|
"grad_norm": 0.00967357400804758, |
|
"learning_rate": 2.782182645160789e-06, |
|
"loss": 1.5433, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.7700685051818023, |
|
"grad_norm": 0.009733307175338268, |
|
"learning_rate": 2.71797609487926e-06, |
|
"loss": 1.5378, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.7728789741788161, |
|
"grad_norm": 0.009725394658744335, |
|
"learning_rate": 2.6544023772782736e-06, |
|
"loss": 1.5373, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.77568944317583, |
|
"grad_norm": 0.009589461609721184, |
|
"learning_rate": 2.591467017130426e-06, |
|
"loss": 1.5331, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.7784999121728439, |
|
"grad_norm": 0.009730999357998371, |
|
"learning_rate": 2.5291754837328787e-06, |
|
"loss": 1.5374, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.7813103811698577, |
|
"grad_norm": 0.009977961890399456, |
|
"learning_rate": 2.4675331904320533e-06, |
|
"loss": 1.5386, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.7841208501668716, |
|
"grad_norm": 0.009760740213096142, |
|
"learning_rate": 2.4065454941531963e-06, |
|
"loss": 1.5521, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.7869313191638855, |
|
"grad_norm": 0.009903214871883392, |
|
"learning_rate": 2.346217694934847e-06, |
|
"loss": 1.5408, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7897417881608993, |
|
"grad_norm": 0.010044459253549576, |
|
"learning_rate": 2.286555035468233e-06, |
|
"loss": 1.54, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.7925522571579132, |
|
"grad_norm": 0.009294719435274601, |
|
"learning_rate": 2.22756270064168e-06, |
|
"loss": 1.5417, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.7953627261549271, |
|
"grad_norm": 0.009228096343576908, |
|
"learning_rate": 2.16924581709002e-06, |
|
"loss": 1.5285, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.798173195151941, |
|
"grad_norm": 0.009288915432989597, |
|
"learning_rate": 2.1116094527490594e-06, |
|
"loss": 1.5431, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.8009836641489548, |
|
"grad_norm": 0.010039190761744976, |
|
"learning_rate": 2.0546586164151827e-06, |
|
"loss": 1.5324, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.8037941331459687, |
|
"grad_norm": 0.009471042081713676, |
|
"learning_rate": 1.9983982573100413e-06, |
|
"loss": 1.5375, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.8066046021429826, |
|
"grad_norm": 0.009732691571116447, |
|
"learning_rate": 1.94283326465047e-06, |
|
"loss": 1.5425, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.8094150711399964, |
|
"grad_norm": 0.00961157213896513, |
|
"learning_rate": 1.887968467223591e-06, |
|
"loss": 1.5378, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.8122255401370103, |
|
"grad_norm": 0.009570587426424026, |
|
"learning_rate": 1.8338086329671734e-06, |
|
"loss": 1.546, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.8150360091340242, |
|
"grad_norm": 0.009907620027661324, |
|
"learning_rate": 1.7803584685552877e-06, |
|
"loss": 1.545, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8178464781310382, |
|
"grad_norm": 0.009468463249504566, |
|
"learning_rate": 1.7276226189892763e-06, |
|
"loss": 1.5387, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.820656947128052, |
|
"grad_norm": 0.008901305496692657, |
|
"learning_rate": 1.6756056671940902e-06, |
|
"loss": 1.541, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.8234674161250659, |
|
"grad_norm": 0.009180068038403988, |
|
"learning_rate": 1.624312133620013e-06, |
|
"loss": 1.543, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.8262778851220798, |
|
"grad_norm": 0.0097258435562253, |
|
"learning_rate": 1.5737464758498243e-06, |
|
"loss": 1.5323, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.8290883541190937, |
|
"grad_norm": 0.00945055577903986, |
|
"learning_rate": 1.523913088211415e-06, |
|
"loss": 1.5366, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.8318988231161075, |
|
"grad_norm": 0.009719719178974628, |
|
"learning_rate": 1.474816301395906e-06, |
|
"loss": 1.5489, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.8347092921131214, |
|
"grad_norm": 0.00904281809926033, |
|
"learning_rate": 1.4264603820813006e-06, |
|
"loss": 1.5413, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.8375197611101353, |
|
"grad_norm": 0.009636901319026947, |
|
"learning_rate": 1.3788495325616912e-06, |
|
"loss": 1.5493, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.8403302301071491, |
|
"grad_norm": 0.00919989962130785, |
|
"learning_rate": 1.3319878903820682e-06, |
|
"loss": 1.5387, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.843140699104163, |
|
"grad_norm": 0.009369692765176296, |
|
"learning_rate": 1.2858795279787517e-06, |
|
"loss": 1.54, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8459511681011769, |
|
"grad_norm": 0.009285050444304943, |
|
"learning_rate": 1.2405284523254823e-06, |
|
"loss": 1.5496, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.8487616370981907, |
|
"grad_norm": 0.009485352784395218, |
|
"learning_rate": 1.195938604585205e-06, |
|
"loss": 1.5274, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.8515721060952046, |
|
"grad_norm": 0.00928215030580759, |
|
"learning_rate": 1.152113859767565e-06, |
|
"loss": 1.5437, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.8543825750922185, |
|
"grad_norm": 0.009152165614068508, |
|
"learning_rate": 1.109058026392158e-06, |
|
"loss": 1.5395, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.8571930440892324, |
|
"grad_norm": 0.009171172976493835, |
|
"learning_rate": 1.0667748461575544e-06, |
|
"loss": 1.5487, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.8600035130862462, |
|
"grad_norm": 0.00959504209458828, |
|
"learning_rate": 1.0252679936161392e-06, |
|
"loss": 1.5374, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.8628139820832601, |
|
"grad_norm": 0.00906702782958746, |
|
"learning_rate": 9.845410758547724e-07, |
|
"loss": 1.5412, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.865624451080274, |
|
"grad_norm": 0.008921015076339245, |
|
"learning_rate": 9.445976321813277e-07, |
|
"loss": 1.5295, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.868434920077288, |
|
"grad_norm": 0.009434668347239494, |
|
"learning_rate": 9.054411338171099e-07, |
|
"loss": 1.5474, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.8712453890743018, |
|
"grad_norm": 0.009549295529723167, |
|
"learning_rate": 8.670749835951964e-07, |
|
"loss": 1.5425, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8740558580713157, |
|
"grad_norm": 0.009337035939097404, |
|
"learning_rate": 8.29502515664723e-07, |
|
"loss": 1.5412, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.8768663270683296, |
|
"grad_norm": 0.008889071643352509, |
|
"learning_rate": 7.927269952011285e-07, |
|
"loss": 1.5589, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.8796767960653434, |
|
"grad_norm": 0.008982825092971325, |
|
"learning_rate": 7.567516181223966e-07, |
|
"loss": 1.5504, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.8824872650623573, |
|
"grad_norm": 0.009038092568516731, |
|
"learning_rate": 7.215795108113343e-07, |
|
"loss": 1.5391, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.8852977340593712, |
|
"grad_norm": 0.0092889703810215, |
|
"learning_rate": 6.872137298438653e-07, |
|
"loss": 1.5354, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.888108203056385, |
|
"grad_norm": 0.009272675029933453, |
|
"learning_rate": 6.536572617234082e-07, |
|
"loss": 1.5449, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.8909186720533989, |
|
"grad_norm": 0.00896420981734991, |
|
"learning_rate": 6.209130226213378e-07, |
|
"loss": 1.5544, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.8937291410504128, |
|
"grad_norm": 0.00960630364716053, |
|
"learning_rate": 5.889838581235641e-07, |
|
"loss": 1.5425, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.8965396100474267, |
|
"grad_norm": 0.009224542416632175, |
|
"learning_rate": 5.578725429832344e-07, |
|
"loss": 1.5287, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.8993500790444405, |
|
"grad_norm": 0.009561055339872837, |
|
"learning_rate": 5.275817808796013e-07, |
|
"loss": 1.5399, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9021605480414544, |
|
"grad_norm": 0.00883413664996624, |
|
"learning_rate": 4.981142041830645e-07, |
|
"loss": 1.5373, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.9049710170384683, |
|
"grad_norm": 0.0092536062002182, |
|
"learning_rate": 4.6947237372640954e-07, |
|
"loss": 1.5372, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.9077814860354821, |
|
"grad_norm": 0.008912206627428532, |
|
"learning_rate": 4.416587785822568e-07, |
|
"loss": 1.5412, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.910591955032496, |
|
"grad_norm": 0.009592842310667038, |
|
"learning_rate": 4.1467583584676395e-07, |
|
"loss": 1.541, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.9134024240295099, |
|
"grad_norm": 0.00898828823119402, |
|
"learning_rate": 3.885258904295575e-07, |
|
"loss": 1.539, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.9162128930265238, |
|
"grad_norm": 0.009104624390602112, |
|
"learning_rate": 3.6321121484996447e-07, |
|
"loss": 1.5313, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.9190233620235376, |
|
"grad_norm": 0.00905083492398262, |
|
"learning_rate": 3.3873400903951636e-07, |
|
"loss": 1.5396, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.9218338310205516, |
|
"grad_norm": 0.009055250324308872, |
|
"learning_rate": 3.1509640015076946e-07, |
|
"loss": 1.5471, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.9246443000175655, |
|
"grad_norm": 0.00856536254286766, |
|
"learning_rate": 2.923004423724474e-07, |
|
"loss": 1.5302, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.9274547690145793, |
|
"grad_norm": 0.0095223942771554, |
|
"learning_rate": 2.703481167509281e-07, |
|
"loss": 1.5387, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9302652380115932, |
|
"grad_norm": 0.009137586690485477, |
|
"learning_rate": 2.4924133101807636e-07, |
|
"loss": 1.5379, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.9330757070086071, |
|
"grad_norm": 0.009256749413907528, |
|
"learning_rate": 2.289819194254661e-07, |
|
"loss": 1.54, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.935886176005621, |
|
"grad_norm": 0.00952975358814001, |
|
"learning_rate": 2.0957164258497031e-07, |
|
"loss": 1.5413, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.9386966450026348, |
|
"grad_norm": 0.009139897301793098, |
|
"learning_rate": 1.9101218731575777e-07, |
|
"loss": 1.5547, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.9415071139996487, |
|
"grad_norm": 0.009397204965353012, |
|
"learning_rate": 1.73305166497707e-07, |
|
"loss": 1.5374, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.9443175829966626, |
|
"grad_norm": 0.009158180095255375, |
|
"learning_rate": 1.5645211893123846e-07, |
|
"loss": 1.5449, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.9471280519936764, |
|
"grad_norm": 0.008892880752682686, |
|
"learning_rate": 1.4045450920358917e-07, |
|
"loss": 1.5418, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.9499385209906903, |
|
"grad_norm": 0.008902773261070251, |
|
"learning_rate": 1.2531372756153458e-07, |
|
"loss": 1.5451, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.9527489899877042, |
|
"grad_norm": 0.00923394039273262, |
|
"learning_rate": 1.1103108979056865e-07, |
|
"loss": 1.5329, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.9555594589847181, |
|
"grad_norm": 0.009119280613958836, |
|
"learning_rate": 9.760783710056176e-08, |
|
"loss": 1.5399, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9583699279817319, |
|
"grad_norm": 0.009382793679833412, |
|
"learning_rate": 8.504513601789388e-08, |
|
"loss": 1.5372, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.9611803969787458, |
|
"grad_norm": 0.0087265744805336, |
|
"learning_rate": 7.334407828407885e-08, |
|
"loss": 1.5415, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.9639908659757597, |
|
"grad_norm": 0.008734731003642082, |
|
"learning_rate": 6.250568076088814e-08, |
|
"loss": 1.5388, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.9668013349727735, |
|
"grad_norm": 0.00877598486840725, |
|
"learning_rate": 5.2530885341982586e-08, |
|
"loss": 1.5385, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.9696118039697874, |
|
"grad_norm": 0.008809634484350681, |
|
"learning_rate": 4.3420558871060116e-08, |
|
"loss": 1.5447, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.9724222729668013, |
|
"grad_norm": 0.00890128593891859, |
|
"learning_rate": 3.517549306652157e-08, |
|
"loss": 1.538, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.9752327419638153, |
|
"grad_norm": 0.008788186125457287, |
|
"learning_rate": 2.7796404452666847e-08, |
|
"loss": 1.5506, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.9780432109608291, |
|
"grad_norm": 0.009096509777009487, |
|
"learning_rate": 2.1283934297432472e-08, |
|
"loss": 1.5365, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.980853679957843, |
|
"grad_norm": 0.00890890508890152, |
|
"learning_rate": 1.5638648556656198e-08, |
|
"loss": 1.5406, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.9836641489548569, |
|
"grad_norm": 0.008733571507036686, |
|
"learning_rate": 1.0861037824896337e-08, |
|
"loss": 1.5474, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9864746179518707, |
|
"grad_norm": 0.00961330346763134, |
|
"learning_rate": 6.951517292800303e-09, |
|
"loss": 1.5417, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.9892850869488846, |
|
"grad_norm": 0.009068558923900127, |
|
"learning_rate": 3.9104267110168235e-09, |
|
"loss": 1.5369, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.9920955559458985, |
|
"grad_norm": 0.008888053707778454, |
|
"learning_rate": 1.738030360677323e-09, |
|
"loss": 1.5488, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.9949060249429124, |
|
"grad_norm": 0.009277201257646084, |
|
"learning_rate": 4.3451703042207694e-10, |
|
"loss": 1.5375, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.9977164939399262, |
|
"grad_norm": 0.009334351867437363, |
|
"learning_rate": 0.0, |
|
"loss": 1.5352, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.6385650224215247, |
|
"grad_norm": 0.010235507041215897, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5054, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.6403587443946188, |
|
"grad_norm": 0.010560178197920322, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5118, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.6421524663677131, |
|
"grad_norm": 0.010353959165513515, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5226, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.6439461883408072, |
|
"grad_norm": 0.010382940992712975, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5078, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.6457399103139013, |
|
"grad_norm": 0.009856803342700005, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5167, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6475336322869956, |
|
"grad_norm": 0.010195410810410976, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5142, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.6493273542600897, |
|
"grad_norm": 0.010302864946424961, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5136, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.6511210762331838, |
|
"grad_norm": 0.010046405717730522, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5112, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.6529147982062781, |
|
"grad_norm": 0.010849208571016788, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5114, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.6547085201793722, |
|
"grad_norm": 0.010421674698591232, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5173, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.6565022421524663, |
|
"grad_norm": 0.00989589188247919, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5063, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.6582959641255606, |
|
"grad_norm": 0.010465629398822784, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5031, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.6600896860986547, |
|
"grad_norm": 0.009964341297745705, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5207, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.6618834080717488, |
|
"grad_norm": 0.01189314667135477, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5361, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.6636771300448431, |
|
"grad_norm": 0.01012677513062954, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5215, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6654708520179372, |
|
"grad_norm": 0.009877102449536324, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5262, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.6672645739910313, |
|
"grad_norm": 0.01000463031232357, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5183, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.6690582959641256, |
|
"grad_norm": 0.010188892483711243, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5183, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.6708520179372197, |
|
"grad_norm": 0.010129815898835659, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5245, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.672645739910314, |
|
"grad_norm": 0.010608335956931114, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5169, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.6744394618834081, |
|
"grad_norm": 0.010223207995295525, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5185, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.6762331838565022, |
|
"grad_norm": 0.010141369886696339, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5161, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.6780269058295965, |
|
"grad_norm": 0.01027351152151823, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5119, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.6798206278026906, |
|
"grad_norm": 0.010362266562879086, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5126, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.6816143497757847, |
|
"grad_norm": 0.010336722247302532, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5173, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.683408071748879, |
|
"grad_norm": 0.01007298193871975, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5075, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.6852017937219731, |
|
"grad_norm": 0.010275410488247871, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5123, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.6869955156950672, |
|
"grad_norm": 0.010203160345554352, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5151, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.6887892376681615, |
|
"grad_norm": 0.010127630084753036, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5211, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.6905829596412556, |
|
"grad_norm": 0.009799284860491753, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5191, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.6923766816143497, |
|
"grad_norm": 0.01014394499361515, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5261, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.694170403587444, |
|
"grad_norm": 0.010567774064838886, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5232, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.6959641255605381, |
|
"grad_norm": 0.010051852092146873, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5212, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.6977578475336322, |
|
"grad_norm": 0.010241293348371983, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5094, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.6995515695067265, |
|
"grad_norm": 0.0095717404037714, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5115, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7013452914798206, |
|
"grad_norm": 0.00974031537771225, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5195, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.7031390134529149, |
|
"grad_norm": 0.010140657424926758, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5048, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.704932735426009, |
|
"grad_norm": 0.010055477730929852, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5162, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.7067264573991031, |
|
"grad_norm": 0.01005468424409628, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5258, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.7085201793721974, |
|
"grad_norm": 0.010284669697284698, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5094, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.7103139013452915, |
|
"grad_norm": 0.010200968012213707, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5172, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.7121076233183856, |
|
"grad_norm": 0.01015354972332716, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5117, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.7139013452914799, |
|
"grad_norm": 0.009913373738527298, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5268, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.715695067264574, |
|
"grad_norm": 0.010287330485880375, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5211, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.7174887892376681, |
|
"grad_norm": 0.01057345885783434, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5199, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7192825112107624, |
|
"grad_norm": 0.010113878175616264, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5168, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.7210762331838565, |
|
"grad_norm": 0.009940318763256073, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5175, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.7228699551569506, |
|
"grad_norm": 0.010180394165217876, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5211, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.7246636771300449, |
|
"grad_norm": 0.00961736124008894, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5228, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.726457399103139, |
|
"grad_norm": 0.010378845036029816, |
|
"learning_rate": 2e-05, |
|
"loss": 1.522, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.7282511210762331, |
|
"grad_norm": 0.010189516469836235, |
|
"learning_rate": 2e-05, |
|
"loss": 1.525, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.7300448430493274, |
|
"grad_norm": 0.010004358366131783, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5172, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.7318385650224215, |
|
"grad_norm": 0.010387993417680264, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5246, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.7336322869955157, |
|
"grad_norm": 0.010004810988903046, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5132, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.7354260089686099, |
|
"grad_norm": 0.009845850057899952, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5248, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.737219730941704, |
|
"grad_norm": 0.010015097446739674, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5196, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.7390134529147983, |
|
"grad_norm": 0.009975203312933445, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5096, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.7408071748878924, |
|
"grad_norm": 0.010078891180455685, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5162, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.7426008968609865, |
|
"grad_norm": 0.011885426007211208, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5189, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.7443946188340808, |
|
"grad_norm": 0.009693853557109833, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5194, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.7461883408071749, |
|
"grad_norm": 0.010337116196751595, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5191, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.747982062780269, |
|
"grad_norm": 0.00993486400693655, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5177, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.7497757847533633, |
|
"grad_norm": 0.010143253020942211, |
|
"learning_rate": 2e-05, |
|
"loss": 1.514, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.7515695067264574, |
|
"grad_norm": 0.010233073495328426, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5154, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.7533632286995515, |
|
"grad_norm": 0.009982983581721783, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5223, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7551569506726458, |
|
"grad_norm": 0.010409766808152199, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5152, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.7569506726457399, |
|
"grad_norm": 0.0099264495074749, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5185, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.758744394618834, |
|
"grad_norm": 0.009928545914590359, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4986, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.7605381165919283, |
|
"grad_norm": 0.009940563701093197, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5071, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.7623318385650224, |
|
"grad_norm": 0.010767797008156776, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5006, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.7641255605381166, |
|
"grad_norm": 0.010551121085882187, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5201, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.7659192825112108, |
|
"grad_norm": 0.010118665173649788, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5213, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.7677130044843049, |
|
"grad_norm": 0.010247626341879368, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5178, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.7695067264573991, |
|
"grad_norm": 0.010188435204327106, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5085, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.7713004484304933, |
|
"grad_norm": 0.010428003035485744, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5124, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7730941704035874, |
|
"grad_norm": 0.01012035645544529, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5299, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.7748878923766817, |
|
"grad_norm": 0.010584665462374687, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5095, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.7766816143497758, |
|
"grad_norm": 0.009979243390262127, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5193, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.7784753363228699, |
|
"grad_norm": 0.00958004966378212, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5214, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.7802690582959642, |
|
"grad_norm": 0.00973733700811863, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5208, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.7820627802690583, |
|
"grad_norm": 0.010465665720403194, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5227, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.7838565022421524, |
|
"grad_norm": 0.010098133236169815, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5248, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.7856502242152467, |
|
"grad_norm": 0.10259313136339188, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5222, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.7874439461883408, |
|
"grad_norm": 0.01040815282613039, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5205, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.7892376681614349, |
|
"grad_norm": 0.010325520299375057, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5189, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7910313901345292, |
|
"grad_norm": 0.010079775005578995, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5156, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.7928251121076233, |
|
"grad_norm": 0.010167201980948448, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5116, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.7946188340807175, |
|
"grad_norm": 0.010806124657392502, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5153, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.7964125560538117, |
|
"grad_norm": 0.010324080474674702, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5246, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.7982062780269058, |
|
"grad_norm": 0.010092305950820446, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5282, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.01007048413157463, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5108, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.8017937219730942, |
|
"grad_norm": 0.010184276849031448, |
|
"learning_rate": 2e-05, |
|
"loss": 1.51, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.8035874439461883, |
|
"grad_norm": 0.010521662421524525, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5139, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.8053811659192825, |
|
"grad_norm": 0.010600044392049313, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5091, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.8071748878923767, |
|
"grad_norm": 0.009714100509881973, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5122, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8089686098654708, |
|
"grad_norm": 0.010295005515217781, |
|
"learning_rate": 2e-05, |
|
"loss": 1.52, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.810762331838565, |
|
"grad_norm": 0.010034569539129734, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5197, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.8125560538116592, |
|
"grad_norm": 0.010086962021887302, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5117, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.8143497757847533, |
|
"grad_norm": 0.010277335532009602, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5033, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.8161434977578476, |
|
"grad_norm": 0.010540721006691456, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5166, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.8179372197309417, |
|
"grad_norm": 0.009755424223840237, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5149, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.8197309417040358, |
|
"grad_norm": 0.00984253827482462, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5093, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.8215246636771301, |
|
"grad_norm": 0.009836334735155106, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5141, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.8233183856502242, |
|
"grad_norm": 0.01032332144677639, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5241, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.8251121076233184, |
|
"grad_norm": 0.010635129176080227, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5068, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8269058295964126, |
|
"grad_norm": 0.009664127603173256, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5052, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.8286995515695067, |
|
"grad_norm": 0.010554889217019081, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5071, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.8304932735426009, |
|
"grad_norm": 0.009871057234704494, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5189, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.8322869955156951, |
|
"grad_norm": 0.010431516915559769, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5183, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.8340807174887892, |
|
"grad_norm": 0.009860005229711533, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5213, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.8358744394618834, |
|
"grad_norm": 0.010233579203486443, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5182, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.8376681614349776, |
|
"grad_norm": 0.010311591438949108, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5092, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.8394618834080717, |
|
"grad_norm": 0.010733729228377342, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5186, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.841255605381166, |
|
"grad_norm": 0.009951340965926647, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5097, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.8430493273542601, |
|
"grad_norm": 0.01003777701407671, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5173, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8448430493273542, |
|
"grad_norm": 0.009939250536262989, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5108, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.8466367713004485, |
|
"grad_norm": 0.009835812263190746, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5272, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.8484304932735426, |
|
"grad_norm": 0.010321546345949173, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5193, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.8502242152466367, |
|
"grad_norm": 0.01006554439663887, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5165, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.852017937219731, |
|
"grad_norm": 0.009972809813916683, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5228, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.8538116591928251, |
|
"grad_norm": 0.010388972237706184, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5188, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.8556053811659193, |
|
"grad_norm": 0.010111154057085514, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5199, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.8573991031390135, |
|
"grad_norm": 0.01029327604919672, |
|
"learning_rate": 2e-05, |
|
"loss": 1.516, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.8591928251121076, |
|
"grad_norm": 0.010400544852018356, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5218, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.8609865470852018, |
|
"grad_norm": 0.0099885743111372, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5155, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.862780269058296, |
|
"grad_norm": 0.010007279925048351, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5205, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.8645739910313901, |
|
"grad_norm": 0.01053563691675663, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5019, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.8663677130044843, |
|
"grad_norm": 0.01031608134508133, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5217, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.8681614349775785, |
|
"grad_norm": 0.010082092136144638, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5073, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.8699551569506726, |
|
"grad_norm": 0.01012254785746336, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5101, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.8717488789237668, |
|
"grad_norm": 0.010539901442825794, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5209, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.873542600896861, |
|
"grad_norm": 0.009883386082947254, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5275, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.8753363228699551, |
|
"grad_norm": 0.010055874474346638, |
|
"learning_rate": 2e-05, |
|
"loss": 1.521, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.8771300448430494, |
|
"grad_norm": 0.010441599413752556, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5253, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.8789237668161435, |
|
"grad_norm": 0.010321282781660557, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5128, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8807174887892377, |
|
"grad_norm": 0.010404079221189022, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5216, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.8825112107623319, |
|
"grad_norm": 0.010680857114493847, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5102, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.884304932735426, |
|
"grad_norm": 0.009785238653421402, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5152, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.8860986547085202, |
|
"grad_norm": 0.010622934438288212, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5134, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.8878923766816144, |
|
"grad_norm": 0.009563595987856388, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5213, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.8896860986547085, |
|
"grad_norm": 0.009900403209030628, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5254, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.8914798206278027, |
|
"grad_norm": 0.010441206395626068, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5042, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.8932735426008969, |
|
"grad_norm": 0.010110273025929928, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5141, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.895067264573991, |
|
"grad_norm": 0.00976527575403452, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5189, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.8968609865470852, |
|
"grad_norm": 0.010270185768604279, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5128, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8986547085201794, |
|
"grad_norm": 0.010477078147232533, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5331, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.9004484304932735, |
|
"grad_norm": 0.009786723181605339, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5143, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.9022421524663677, |
|
"grad_norm": 0.009838691912591457, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5237, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.9040358744394619, |
|
"grad_norm": 0.010305250994861126, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5236, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.905829596412556, |
|
"grad_norm": 0.010098317638039589, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5189, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.9076233183856502, |
|
"grad_norm": 0.010335841216146946, |
|
"learning_rate": 2e-05, |
|
"loss": 1.519, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.9094170403587444, |
|
"grad_norm": 0.009809168055653572, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5176, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.9112107623318386, |
|
"grad_norm": 0.01069081760942936, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5055, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.9130044843049328, |
|
"grad_norm": 0.009927291423082352, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5224, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.9147982062780269, |
|
"grad_norm": 0.010560589842498302, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5129, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.9165919282511211, |
|
"grad_norm": 0.010154438205063343, |
|
"learning_rate": 2e-05, |
|
"loss": 1.52, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.9183856502242153, |
|
"grad_norm": 0.010346156544983387, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5194, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.9201793721973094, |
|
"grad_norm": 0.010523281060159206, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5187, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.9219730941704036, |
|
"grad_norm": 0.010443002916872501, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5059, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.9237668161434978, |
|
"grad_norm": 0.010005362331867218, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5102, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.9255605381165919, |
|
"grad_norm": 0.010285025462508202, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5217, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.9273542600896861, |
|
"grad_norm": 0.010401098988950253, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5243, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.9291479820627803, |
|
"grad_norm": 0.010455128736793995, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5054, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.9309417040358744, |
|
"grad_norm": 0.00987928081303835, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5053, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.9327354260089686, |
|
"grad_norm": 0.010212692432105541, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5151, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9345291479820628, |
|
"grad_norm": 0.010937588289380074, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5099, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.9363228699551569, |
|
"grad_norm": 0.010248001664876938, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5256, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.9381165919282511, |
|
"grad_norm": 0.010430903173983097, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5056, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.9399103139013453, |
|
"grad_norm": 0.0102499695494771, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5285, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.9417040358744395, |
|
"grad_norm": 0.010674213990569115, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5136, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.9434977578475336, |
|
"grad_norm": 0.010732615366578102, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5119, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.9452914798206278, |
|
"grad_norm": 0.009994648396968842, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5228, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.947085201793722, |
|
"grad_norm": 0.010234368033707142, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5258, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.9488789237668162, |
|
"grad_norm": 0.010327205993235111, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5156, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.9506726457399103, |
|
"grad_norm": 0.009836922399699688, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5171, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9524663677130045, |
|
"grad_norm": 0.009962068870663643, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5125, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.9542600896860987, |
|
"grad_norm": 0.010127882473170757, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5182, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.9560538116591928, |
|
"grad_norm": 0.010251611471176147, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5139, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.957847533632287, |
|
"grad_norm": 0.010081682354211807, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5239, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.9596412556053812, |
|
"grad_norm": 0.010235367342829704, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5159, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.9614349775784753, |
|
"grad_norm": 0.009694702923297882, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5174, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.9632286995515695, |
|
"grad_norm": 0.010224996134638786, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5171, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.9650224215246637, |
|
"grad_norm": 0.010206632316112518, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5223, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.9668161434977578, |
|
"grad_norm": 0.010011864826083183, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5282, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.968609865470852, |
|
"grad_norm": 0.010364921763539314, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5092, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9704035874439462, |
|
"grad_norm": 0.010109508410096169, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5068, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.9721973094170404, |
|
"grad_norm": 0.00964987464249134, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5089, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.9739910313901345, |
|
"grad_norm": 0.010244207456707954, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5217, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.9757847533632287, |
|
"grad_norm": 0.009797874838113785, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5143, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.9775784753363229, |
|
"grad_norm": 0.010056640952825546, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5276, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.979372197309417, |
|
"grad_norm": 0.009898710064589977, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5222, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.9811659192825112, |
|
"grad_norm": 0.0099082225933671, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5276, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.9829596412556054, |
|
"grad_norm": 0.01018478162586689, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5217, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.9847533632286996, |
|
"grad_norm": 0.009828625246882439, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5194, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.9865470852017937, |
|
"grad_norm": 0.010311014950275421, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5138, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9883408071748879, |
|
"grad_norm": 0.010840130038559437, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5044, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.9901345291479821, |
|
"grad_norm": 0.009595104493200779, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5165, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.9919282511210762, |
|
"grad_norm": 0.01027593482285738, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5291, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.9937219730941704, |
|
"grad_norm": 0.010394555516541004, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5109, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.9955156950672646, |
|
"grad_norm": 0.00996735692024231, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5212, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.9973094170403587, |
|
"grad_norm": 0.010095257312059402, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5106, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.9991031390134529, |
|
"grad_norm": 0.01082176435738802, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5099, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.9991031390134529, |
|
"step": 557, |
|
"total_flos": 7841700554735616.0, |
|
"train_loss": 0.0, |
|
"train_runtime": 1.3623, |
|
"train_samples_per_second": 104762.356, |
|
"train_steps_per_second": 408.87 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 557, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7841700554735616.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|