|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0309544282029235, |
|
"eval_steps": 500, |
|
"global_step": 600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008598452278589854, |
|
"grad_norm": 0.5736560821533203, |
|
"learning_rate": 5.517241379310345e-08, |
|
"loss": 1.4983, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.017196904557179708, |
|
"grad_norm": 0.47119423747062683, |
|
"learning_rate": 1.2413793103448275e-07, |
|
"loss": 1.2594, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.025795356835769563, |
|
"grad_norm": 0.6366227865219116, |
|
"learning_rate": 1.9310344827586205e-07, |
|
"loss": 1.5001, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.034393809114359415, |
|
"grad_norm": 0.5710499882698059, |
|
"learning_rate": 2.620689655172414e-07, |
|
"loss": 1.5602, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04299226139294927, |
|
"grad_norm": 0.4520650804042816, |
|
"learning_rate": 3.310344827586207e-07, |
|
"loss": 1.4316, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.051590713671539126, |
|
"grad_norm": 0.5874906778335571, |
|
"learning_rate": 4e-07, |
|
"loss": 1.5158, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06018916595012898, |
|
"grad_norm": 0.4618767201900482, |
|
"learning_rate": 4.689655172413793e-07, |
|
"loss": 1.3945, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.06878761822871883, |
|
"grad_norm": 0.47305673360824585, |
|
"learning_rate": 5.379310344827586e-07, |
|
"loss": 1.4154, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07738607050730868, |
|
"grad_norm": 0.5665937066078186, |
|
"learning_rate": 6.068965517241379e-07, |
|
"loss": 1.3776, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.08598452278589853, |
|
"grad_norm": 0.5359976291656494, |
|
"learning_rate": 6.758620689655172e-07, |
|
"loss": 1.5018, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09458297506448839, |
|
"grad_norm": 0.5621670484542847, |
|
"learning_rate": 7.448275862068965e-07, |
|
"loss": 1.4493, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.10318142734307825, |
|
"grad_norm": 0.5842466354370117, |
|
"learning_rate": 8.137931034482758e-07, |
|
"loss": 1.4241, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1117798796216681, |
|
"grad_norm": 0.6147713661193848, |
|
"learning_rate": 8.827586206896551e-07, |
|
"loss": 1.4748, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.12037833190025796, |
|
"grad_norm": 0.6653420925140381, |
|
"learning_rate": 9.517241379310345e-07, |
|
"loss": 1.6576, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1289767841788478, |
|
"grad_norm": 0.5247882604598999, |
|
"learning_rate": 1.0206896551724139e-06, |
|
"loss": 1.439, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.13757523645743766, |
|
"grad_norm": 0.6043932437896729, |
|
"learning_rate": 1.089655172413793e-06, |
|
"loss": 1.475, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1461736887360275, |
|
"grad_norm": 0.614253580570221, |
|
"learning_rate": 1.1586206896551724e-06, |
|
"loss": 1.4928, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.15477214101461736, |
|
"grad_norm": 0.6523554921150208, |
|
"learning_rate": 1.2275862068965516e-06, |
|
"loss": 1.5111, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.16337059329320722, |
|
"grad_norm": 0.4596118628978729, |
|
"learning_rate": 1.2965517241379309e-06, |
|
"loss": 1.4672, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.17196904557179707, |
|
"grad_norm": 0.5627196431159973, |
|
"learning_rate": 1.3655172413793103e-06, |
|
"loss": 1.4621, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18056749785038692, |
|
"grad_norm": 0.5366098284721375, |
|
"learning_rate": 1.4344827586206896e-06, |
|
"loss": 1.3875, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.18916595012897677, |
|
"grad_norm": 0.48968908190727234, |
|
"learning_rate": 1.5034482758620688e-06, |
|
"loss": 1.4264, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.19776440240756663, |
|
"grad_norm": 0.5644566416740417, |
|
"learning_rate": 1.5724137931034483e-06, |
|
"loss": 1.3822, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2063628546861565, |
|
"grad_norm": 0.49979278445243835, |
|
"learning_rate": 1.6413793103448275e-06, |
|
"loss": 1.4866, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.21496130696474636, |
|
"grad_norm": 0.5352433919906616, |
|
"learning_rate": 1.710344827586207e-06, |
|
"loss": 1.3047, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2235597592433362, |
|
"grad_norm": 0.5073248744010925, |
|
"learning_rate": 1.7793103448275862e-06, |
|
"loss": 1.4763, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.23215821152192606, |
|
"grad_norm": 0.48227864503860474, |
|
"learning_rate": 1.8482758620689653e-06, |
|
"loss": 1.4552, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.2407566638005159, |
|
"grad_norm": 0.4420427978038788, |
|
"learning_rate": 1.9172413793103447e-06, |
|
"loss": 1.3285, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.24935511607910577, |
|
"grad_norm": 0.4224660396575928, |
|
"learning_rate": 1.986206896551724e-06, |
|
"loss": 1.4294, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2579535683576956, |
|
"grad_norm": 0.3935889005661011, |
|
"learning_rate": 1.999989634963924e-06, |
|
"loss": 1.32, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.26655202063628547, |
|
"grad_norm": 0.352851539850235, |
|
"learning_rate": 1.9999475273731217e-06, |
|
"loss": 1.326, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.2751504729148753, |
|
"grad_norm": 0.4401000142097473, |
|
"learning_rate": 1.9998730307756826e-06, |
|
"loss": 1.352, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2837489251934652, |
|
"grad_norm": 0.3836155831813812, |
|
"learning_rate": 1.9997661475846052e-06, |
|
"loss": 1.3492, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.292347377472055, |
|
"grad_norm": 0.32301604747772217, |
|
"learning_rate": 1.9996268812619105e-06, |
|
"loss": 1.309, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3009458297506449, |
|
"grad_norm": 0.37157368659973145, |
|
"learning_rate": 1.999455236318534e-06, |
|
"loss": 1.4444, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.30954428202923473, |
|
"grad_norm": 0.340994268655777, |
|
"learning_rate": 1.999251218314176e-06, |
|
"loss": 1.2391, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3181427343078246, |
|
"grad_norm": 0.3156416714191437, |
|
"learning_rate": 1.999014833857124e-06, |
|
"loss": 1.2496, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.32674118658641443, |
|
"grad_norm": 0.35868847370147705, |
|
"learning_rate": 1.998746090604037e-06, |
|
"loss": 1.3442, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3353396388650043, |
|
"grad_norm": 0.31525614857673645, |
|
"learning_rate": 1.9984449972597e-06, |
|
"loss": 1.3011, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.34393809114359414, |
|
"grad_norm": 0.47816550731658936, |
|
"learning_rate": 1.9981115635767376e-06, |
|
"loss": 1.2962, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.352536543422184, |
|
"grad_norm": 0.31586989760398865, |
|
"learning_rate": 1.9977458003553037e-06, |
|
"loss": 1.2353, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.36113499570077384, |
|
"grad_norm": 0.3226100206375122, |
|
"learning_rate": 1.9973477194427262e-06, |
|
"loss": 1.2135, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3697334479793637, |
|
"grad_norm": 0.3332583010196686, |
|
"learning_rate": 1.996917333733128e-06, |
|
"loss": 1.2553, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.37833190025795355, |
|
"grad_norm": 0.33345943689346313, |
|
"learning_rate": 1.9964546571670053e-06, |
|
"loss": 1.2652, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3869303525365434, |
|
"grad_norm": 0.37145930528640747, |
|
"learning_rate": 1.995959704730779e-06, |
|
"loss": 1.1999, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.39552880481513325, |
|
"grad_norm": 0.41526567935943604, |
|
"learning_rate": 1.9954324924563086e-06, |
|
"loss": 1.3373, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4041272570937231, |
|
"grad_norm": 0.2987145781517029, |
|
"learning_rate": 1.9948730374203715e-06, |
|
"loss": 1.2249, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.412725709372313, |
|
"grad_norm": 0.49898993968963623, |
|
"learning_rate": 1.994281357744112e-06, |
|
"loss": 1.2323, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.42132416165090286, |
|
"grad_norm": 0.2862403392791748, |
|
"learning_rate": 1.9936574725924525e-06, |
|
"loss": 1.1908, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.4299226139294927, |
|
"grad_norm": 0.31127598881721497, |
|
"learning_rate": 1.9930014021734732e-06, |
|
"loss": 1.3326, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.43852106620808257, |
|
"grad_norm": 0.44075530767440796, |
|
"learning_rate": 1.9923131677377585e-06, |
|
"loss": 1.1721, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.4471195184866724, |
|
"grad_norm": 0.29328110814094543, |
|
"learning_rate": 1.991592791577708e-06, |
|
"loss": 1.2735, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.45571797076526227, |
|
"grad_norm": 0.3364109396934509, |
|
"learning_rate": 1.9908402970268145e-06, |
|
"loss": 1.1244, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.4643164230438521, |
|
"grad_norm": 0.3128204047679901, |
|
"learning_rate": 1.9900557084589077e-06, |
|
"loss": 1.2281, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.472914875322442, |
|
"grad_norm": 0.30848929286003113, |
|
"learning_rate": 1.989239051287366e-06, |
|
"loss": 1.2333, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.4815133276010318, |
|
"grad_norm": 0.36058536171913147, |
|
"learning_rate": 1.988390351964291e-06, |
|
"loss": 1.2995, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4901117798796217, |
|
"grad_norm": 0.3388177752494812, |
|
"learning_rate": 1.9875096379796535e-06, |
|
"loss": 1.3073, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.49871023215821153, |
|
"grad_norm": 0.3674142062664032, |
|
"learning_rate": 1.986596937860402e-06, |
|
"loss": 1.2673, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5073086844368013, |
|
"grad_norm": 0.30132225155830383, |
|
"learning_rate": 1.9856522811695374e-06, |
|
"loss": 1.2107, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5159071367153912, |
|
"grad_norm": 0.3170187473297119, |
|
"learning_rate": 1.9846756985051573e-06, |
|
"loss": 1.1624, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.524505588993981, |
|
"grad_norm": 0.3753427565097809, |
|
"learning_rate": 1.9836672214994637e-06, |
|
"loss": 1.2782, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.5331040412725709, |
|
"grad_norm": 0.36705997586250305, |
|
"learning_rate": 1.9826268828177393e-06, |
|
"loss": 1.134, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5417024935511608, |
|
"grad_norm": 0.3330920338630676, |
|
"learning_rate": 1.9815547161572892e-06, |
|
"loss": 1.1611, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.5503009458297506, |
|
"grad_norm": 0.3084378242492676, |
|
"learning_rate": 1.980450756246348e-06, |
|
"loss": 1.2008, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5588993981083406, |
|
"grad_norm": 0.3422742486000061, |
|
"learning_rate": 1.979315038842957e-06, |
|
"loss": 1.136, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.5674978503869303, |
|
"grad_norm": 0.4076729118824005, |
|
"learning_rate": 1.9781476007338054e-06, |
|
"loss": 1.179, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5760963026655203, |
|
"grad_norm": 0.38809648156166077, |
|
"learning_rate": 1.976948479733038e-06, |
|
"loss": 1.1762, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.58469475494411, |
|
"grad_norm": 0.38286837935447693, |
|
"learning_rate": 1.9757177146810307e-06, |
|
"loss": 1.0976, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5932932072227, |
|
"grad_norm": 0.33073902130126953, |
|
"learning_rate": 1.9744553454431325e-06, |
|
"loss": 1.1775, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.6018916595012898, |
|
"grad_norm": 0.30669665336608887, |
|
"learning_rate": 1.9731614129083753e-06, |
|
"loss": 1.1242, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6104901117798797, |
|
"grad_norm": 0.36842820048332214, |
|
"learning_rate": 1.9718359589881475e-06, |
|
"loss": 1.233, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.6190885640584695, |
|
"grad_norm": 0.28934866189956665, |
|
"learning_rate": 1.970479026614837e-06, |
|
"loss": 1.0545, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6276870163370594, |
|
"grad_norm": 0.36205312609672546, |
|
"learning_rate": 1.9690906597404428e-06, |
|
"loss": 1.0926, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.6362854686156492, |
|
"grad_norm": 0.39489033818244934, |
|
"learning_rate": 1.967670903335148e-06, |
|
"loss": 1.2244, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6448839208942391, |
|
"grad_norm": 0.29848822951316833, |
|
"learning_rate": 1.966219803385865e-06, |
|
"loss": 1.1829, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.6534823731728289, |
|
"grad_norm": 0.31506502628326416, |
|
"learning_rate": 1.9647374068947467e-06, |
|
"loss": 1.1418, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6620808254514188, |
|
"grad_norm": 0.39251357316970825, |
|
"learning_rate": 1.963223761877662e-06, |
|
"loss": 1.1543, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.6706792777300086, |
|
"grad_norm": 0.3380272686481476, |
|
"learning_rate": 1.9616789173626418e-06, |
|
"loss": 1.0856, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6792777300085985, |
|
"grad_norm": 0.379609614610672, |
|
"learning_rate": 1.960102923388291e-06, |
|
"loss": 1.1562, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.6878761822871883, |
|
"grad_norm": 0.26444530487060547, |
|
"learning_rate": 1.958495831002168e-06, |
|
"loss": 1.1356, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6964746345657782, |
|
"grad_norm": 0.4068126082420349, |
|
"learning_rate": 1.9568576922591304e-06, |
|
"loss": 1.2286, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.705073086844368, |
|
"grad_norm": 0.3288695514202118, |
|
"learning_rate": 1.955188560219648e-06, |
|
"loss": 1.206, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7136715391229579, |
|
"grad_norm": 0.33392417430877686, |
|
"learning_rate": 1.9534884889480876e-06, |
|
"loss": 1.1944, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.7222699914015477, |
|
"grad_norm": 0.37305575609207153, |
|
"learning_rate": 1.951757533510957e-06, |
|
"loss": 1.204, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7308684436801376, |
|
"grad_norm": 0.3695738911628723, |
|
"learning_rate": 1.949995749975127e-06, |
|
"loss": 1.1728, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.7394668959587274, |
|
"grad_norm": 0.37770697474479675, |
|
"learning_rate": 1.948203195406009e-06, |
|
"loss": 1.2171, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7480653482373173, |
|
"grad_norm": 0.38213881850242615, |
|
"learning_rate": 1.9463799278657124e-06, |
|
"loss": 1.1948, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.7566638005159071, |
|
"grad_norm": 0.4096551239490509, |
|
"learning_rate": 1.9445260064111607e-06, |
|
"loss": 1.1774, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.765262252794497, |
|
"grad_norm": 0.41796615719795227, |
|
"learning_rate": 1.9426414910921785e-06, |
|
"loss": 1.1234, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.7738607050730868, |
|
"grad_norm": 0.3439182639122009, |
|
"learning_rate": 1.9407264429495484e-06, |
|
"loss": 1.1748, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7824591573516767, |
|
"grad_norm": 0.34333643317222595, |
|
"learning_rate": 1.938780924013032e-06, |
|
"loss": 1.1441, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.7910576096302665, |
|
"grad_norm": 0.32014700770378113, |
|
"learning_rate": 1.936804997299362e-06, |
|
"loss": 1.1205, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7996560619088564, |
|
"grad_norm": 0.3878689408302307, |
|
"learning_rate": 1.9347987268101996e-06, |
|
"loss": 1.1397, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.8082545141874462, |
|
"grad_norm": 0.3905511796474457, |
|
"learning_rate": 1.9327621775300633e-06, |
|
"loss": 1.1016, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8168529664660361, |
|
"grad_norm": 0.3861243426799774, |
|
"learning_rate": 1.9306954154242233e-06, |
|
"loss": 1.0783, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.825451418744626, |
|
"grad_norm": 0.3234269618988037, |
|
"learning_rate": 1.9285985074365627e-06, |
|
"loss": 1.0694, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8340498710232158, |
|
"grad_norm": 0.39914825558662415, |
|
"learning_rate": 1.926471521487413e-06, |
|
"loss": 1.2053, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.8426483233018057, |
|
"grad_norm": 0.39884087443351746, |
|
"learning_rate": 1.924314526471351e-06, |
|
"loss": 1.1663, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8512467755803955, |
|
"grad_norm": 0.3793065547943115, |
|
"learning_rate": 1.922127592254968e-06, |
|
"loss": 1.083, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.8598452278589854, |
|
"grad_norm": 0.3194003403186798, |
|
"learning_rate": 1.919910789674609e-06, |
|
"loss": 1.0664, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8684436801375752, |
|
"grad_norm": 0.30485859513282776, |
|
"learning_rate": 1.917664190534075e-06, |
|
"loss": 1.0426, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.8770421324161651, |
|
"grad_norm": 0.3151833117008209, |
|
"learning_rate": 1.915387867602298e-06, |
|
"loss": 1.157, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8856405846947549, |
|
"grad_norm": 0.41206279397010803, |
|
"learning_rate": 1.913081894610986e-06, |
|
"loss": 1.1125, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.8942390369733448, |
|
"grad_norm": 0.42712071537971497, |
|
"learning_rate": 1.9107463462522332e-06, |
|
"loss": 0.9867, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9028374892519346, |
|
"grad_norm": 0.45993658900260925, |
|
"learning_rate": 1.9083812981760998e-06, |
|
"loss": 1.2464, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.9114359415305245, |
|
"grad_norm": 0.34718605875968933, |
|
"learning_rate": 1.9059868269881636e-06, |
|
"loss": 1.0646, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9200343938091143, |
|
"grad_norm": 0.4059743285179138, |
|
"learning_rate": 1.9035630102470375e-06, |
|
"loss": 1.08, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.9286328460877042, |
|
"grad_norm": 0.34420260787010193, |
|
"learning_rate": 1.9011099264618573e-06, |
|
"loss": 1.0843, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.937231298366294, |
|
"grad_norm": 0.36102572083473206, |
|
"learning_rate": 1.89862765508974e-06, |
|
"loss": 1.0874, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.945829750644884, |
|
"grad_norm": 0.4396149516105652, |
|
"learning_rate": 1.896116276533208e-06, |
|
"loss": 1.1614, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9544282029234737, |
|
"grad_norm": 0.3992445170879364, |
|
"learning_rate": 1.8935758721375862e-06, |
|
"loss": 1.122, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.9630266552020637, |
|
"grad_norm": 0.3218730092048645, |
|
"learning_rate": 1.8910065241883678e-06, |
|
"loss": 0.9754, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9716251074806534, |
|
"grad_norm": 0.3818008601665497, |
|
"learning_rate": 1.8884083159085468e-06, |
|
"loss": 1.1747, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.9802235597592434, |
|
"grad_norm": 0.407429575920105, |
|
"learning_rate": 1.8857813314559254e-06, |
|
"loss": 1.1048, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.9888220120378332, |
|
"grad_norm": 0.4260193407535553, |
|
"learning_rate": 1.8831256559203843e-06, |
|
"loss": 1.1217, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.9974204643164231, |
|
"grad_norm": 0.3930356204509735, |
|
"learning_rate": 1.8804413753211304e-06, |
|
"loss": 1.1259, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.0051590713671539, |
|
"grad_norm": 0.3557448983192444, |
|
"learning_rate": 1.8777285766039075e-06, |
|
"loss": 1.1636, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.0137575236457437, |
|
"grad_norm": 0.34454646706581116, |
|
"learning_rate": 1.8749873476381826e-06, |
|
"loss": 1.093, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.0223559759243337, |
|
"grad_norm": 0.4703282415866852, |
|
"learning_rate": 1.8722177772142973e-06, |
|
"loss": 1.0716, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.0309544282029235, |
|
"grad_norm": 0.418491929769516, |
|
"learning_rate": 1.8694199550405942e-06, |
|
"loss": 1.0626, |
|
"step": 600 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 2905, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4341188063131075e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|