|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9293516810895164, |
|
"eval_steps": 500, |
|
"global_step": 6800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005674563767910342, |
|
"grad_norm": 1.8945719003677368, |
|
"learning_rate": 2.830188679245283e-06, |
|
"loss": 0.9878, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.011349127535820683, |
|
"grad_norm": 0.8699278235435486, |
|
"learning_rate": 5.660377358490566e-06, |
|
"loss": 0.9338, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.017023691303731027, |
|
"grad_norm": 0.9612842798233032, |
|
"learning_rate": 8.49056603773585e-06, |
|
"loss": 0.8992, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.022698255071641367, |
|
"grad_norm": 1.0209581851959229, |
|
"learning_rate": 1.1320754716981132e-05, |
|
"loss": 0.8802, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02837281883955171, |
|
"grad_norm": 1.1397087574005127, |
|
"learning_rate": 1.4150943396226415e-05, |
|
"loss": 0.8636, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.034047382607462054, |
|
"grad_norm": 1.0688011646270752, |
|
"learning_rate": 1.69811320754717e-05, |
|
"loss": 0.8589, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.039721946375372394, |
|
"grad_norm": 1.0701323747634888, |
|
"learning_rate": 1.981132075471698e-05, |
|
"loss": 0.8445, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.045396510143282734, |
|
"grad_norm": 1.0749995708465576, |
|
"learning_rate": 2.2641509433962265e-05, |
|
"loss": 0.8438, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.051071073911193074, |
|
"grad_norm": 1.2973322868347168, |
|
"learning_rate": 2.547169811320755e-05, |
|
"loss": 0.8399, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05674563767910342, |
|
"grad_norm": 0.9941120743751526, |
|
"learning_rate": 2.830188679245283e-05, |
|
"loss": 0.8459, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06242020144701376, |
|
"grad_norm": 1.1092499494552612, |
|
"learning_rate": 2.9999898623711896e-05, |
|
"loss": 0.8396, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06809476521492411, |
|
"grad_norm": 1.10667085647583, |
|
"learning_rate": 2.999875815620755e-05, |
|
"loss": 0.8403, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07376932898283445, |
|
"grad_norm": 1.0986227989196777, |
|
"learning_rate": 2.999635059750628e-05, |
|
"loss": 0.8296, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07944389275074479, |
|
"grad_norm": 0.9648028612136841, |
|
"learning_rate": 2.9992676150998032e-05, |
|
"loss": 0.8187, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08511845651865513, |
|
"grad_norm": 0.8029258251190186, |
|
"learning_rate": 2.998773512709909e-05, |
|
"loss": 0.8224, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09079302028656547, |
|
"grad_norm": 0.888502299785614, |
|
"learning_rate": 2.9981527943225862e-05, |
|
"loss": 0.8178, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09646758405447581, |
|
"grad_norm": 0.7894881963729858, |
|
"learning_rate": 2.997405512375964e-05, |
|
"loss": 0.8153, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.10214214782238615, |
|
"grad_norm": 0.8492247462272644, |
|
"learning_rate": 2.996531730000227e-05, |
|
"loss": 0.8105, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1078167115902965, |
|
"grad_norm": 0.8247759938240051, |
|
"learning_rate": 2.9955315210122842e-05, |
|
"loss": 0.8, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.11349127535820684, |
|
"grad_norm": 0.8270812034606934, |
|
"learning_rate": 2.99440496990953e-05, |
|
"loss": 0.802, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11916583912611718, |
|
"grad_norm": 0.8336136937141418, |
|
"learning_rate": 2.9931521718627107e-05, |
|
"loss": 0.7932, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.12484040289402752, |
|
"grad_norm": 0.7927630543708801, |
|
"learning_rate": 2.991773232707879e-05, |
|
"loss": 0.7903, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.13051496666193788, |
|
"grad_norm": 0.8075955510139465, |
|
"learning_rate": 2.9902682689374578e-05, |
|
"loss": 0.7897, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.13618953042984822, |
|
"grad_norm": 0.7381598353385925, |
|
"learning_rate": 2.9886374076903945e-05, |
|
"loss": 0.785, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.14186409419775856, |
|
"grad_norm": 0.799022912979126, |
|
"learning_rate": 2.986880786741426e-05, |
|
"loss": 0.7862, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1475386579656689, |
|
"grad_norm": 0.7515665292739868, |
|
"learning_rate": 2.9849985544894333e-05, |
|
"loss": 0.7845, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.15321322173357924, |
|
"grad_norm": 0.8161646723747253, |
|
"learning_rate": 2.982990869944908e-05, |
|
"loss": 0.7745, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.15888778550148958, |
|
"grad_norm": 0.671816885471344, |
|
"learning_rate": 2.9808579027165204e-05, |
|
"loss": 0.7786, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.16456234926939992, |
|
"grad_norm": 0.7310769557952881, |
|
"learning_rate": 2.978599832996788e-05, |
|
"loss": 0.7742, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.17023691303731026, |
|
"grad_norm": 0.7568747401237488, |
|
"learning_rate": 2.9762168515468548e-05, |
|
"loss": 0.7691, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1759114768052206, |
|
"grad_norm": 0.6345218420028687, |
|
"learning_rate": 2.973709159680375e-05, |
|
"loss": 0.7695, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.18158604057313094, |
|
"grad_norm": 0.7218050360679626, |
|
"learning_rate": 2.9710769692465073e-05, |
|
"loss": 0.7681, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.18726060434104128, |
|
"grad_norm": 0.7665095925331116, |
|
"learning_rate": 2.9683205026120163e-05, |
|
"loss": 0.7667, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.19293516810895162, |
|
"grad_norm": 0.6717973947525024, |
|
"learning_rate": 2.9654399926424884e-05, |
|
"loss": 0.7684, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.19860973187686196, |
|
"grad_norm": 0.7454754114151001, |
|
"learning_rate": 2.9624356826826577e-05, |
|
"loss": 0.7622, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2042842956447723, |
|
"grad_norm": 0.6865426898002625, |
|
"learning_rate": 2.9593078265358498e-05, |
|
"loss": 0.761, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.20995885941268266, |
|
"grad_norm": 0.7075285315513611, |
|
"learning_rate": 2.956056688442541e-05, |
|
"loss": 0.7578, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.215633423180593, |
|
"grad_norm": 0.7438149452209473, |
|
"learning_rate": 2.9526825430580337e-05, |
|
"loss": 0.7571, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.22130798694850334, |
|
"grad_norm": 0.6830400228500366, |
|
"learning_rate": 2.949185675429254e-05, |
|
"loss": 0.759, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.22698255071641368, |
|
"grad_norm": 0.7147162556648254, |
|
"learning_rate": 2.9455663809706725e-05, |
|
"loss": 0.756, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.23265711448432402, |
|
"grad_norm": 0.7116013765335083, |
|
"learning_rate": 2.9418249654393443e-05, |
|
"loss": 0.7538, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.23833167825223436, |
|
"grad_norm": 0.64736407995224, |
|
"learning_rate": 2.9379617449090847e-05, |
|
"loss": 0.7513, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.2440062420201447, |
|
"grad_norm": 0.6453843116760254, |
|
"learning_rate": 2.93397704574376e-05, |
|
"loss": 0.7538, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.24968080578805504, |
|
"grad_norm": 0.6253499388694763, |
|
"learning_rate": 2.929871204569722e-05, |
|
"loss": 0.7463, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2553553695559654, |
|
"grad_norm": 0.6677010655403137, |
|
"learning_rate": 2.9256445682473683e-05, |
|
"loss": 0.7419, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.26102993332387575, |
|
"grad_norm": 0.7070403695106506, |
|
"learning_rate": 2.9212974938418385e-05, |
|
"loss": 0.7449, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.26670449709178606, |
|
"grad_norm": 0.6784743070602417, |
|
"learning_rate": 2.9168303485928495e-05, |
|
"loss": 0.7453, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.27237906085969643, |
|
"grad_norm": 0.6076740026473999, |
|
"learning_rate": 2.912243509883673e-05, |
|
"loss": 0.7457, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.27805362462760674, |
|
"grad_norm": 0.6722409129142761, |
|
"learning_rate": 2.9075373652092535e-05, |
|
"loss": 0.7373, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2837281883955171, |
|
"grad_norm": 0.7188818454742432, |
|
"learning_rate": 2.9027123121434714e-05, |
|
"loss": 0.7343, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2894027521634274, |
|
"grad_norm": 0.657289981842041, |
|
"learning_rate": 2.897768758305558e-05, |
|
"loss": 0.7336, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.2950773159313378, |
|
"grad_norm": 0.6076385378837585, |
|
"learning_rate": 2.892707121325658e-05, |
|
"loss": 0.7331, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.3007518796992481, |
|
"grad_norm": 0.6217896342277527, |
|
"learning_rate": 2.8875278288095507e-05, |
|
"loss": 0.7339, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.30642644346715847, |
|
"grad_norm": 0.6453694701194763, |
|
"learning_rate": 2.882231318302523e-05, |
|
"loss": 0.7334, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.3121010072350688, |
|
"grad_norm": 0.6069263219833374, |
|
"learning_rate": 2.8768180372524093e-05, |
|
"loss": 0.734, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.31777557100297915, |
|
"grad_norm": 0.6342785358428955, |
|
"learning_rate": 2.8712884429717873e-05, |
|
"loss": 0.7254, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.32345013477088946, |
|
"grad_norm": 0.5936433672904968, |
|
"learning_rate": 2.8656430025993464e-05, |
|
"loss": 0.7232, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.32912469853879983, |
|
"grad_norm": 0.5988269448280334, |
|
"learning_rate": 2.8598821930604252e-05, |
|
"loss": 0.726, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.3347992623067102, |
|
"grad_norm": 0.6247944235801697, |
|
"learning_rate": 2.8540065010267183e-05, |
|
"loss": 0.729, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3404738260746205, |
|
"grad_norm": 0.6017037034034729, |
|
"learning_rate": 2.848016422875164e-05, |
|
"loss": 0.7216, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3461483898425309, |
|
"grad_norm": 0.7368952631950378, |
|
"learning_rate": 2.84191246464601e-05, |
|
"loss": 0.7331, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.3518229536104412, |
|
"grad_norm": 0.6655734777450562, |
|
"learning_rate": 2.835695142000064e-05, |
|
"loss": 0.7233, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.35749751737835156, |
|
"grad_norm": 0.6325275301933289, |
|
"learning_rate": 2.8293649801751288e-05, |
|
"loss": 0.7208, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.36317208114626187, |
|
"grad_norm": 0.6046157479286194, |
|
"learning_rate": 2.822922513941634e-05, |
|
"loss": 0.7156, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.36884664491417224, |
|
"grad_norm": 0.6081031560897827, |
|
"learning_rate": 2.816368287557454e-05, |
|
"loss": 0.722, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.37452120868208255, |
|
"grad_norm": 0.6153631806373596, |
|
"learning_rate": 2.809702854721934e-05, |
|
"loss": 0.7171, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3801957724499929, |
|
"grad_norm": 0.6361656188964844, |
|
"learning_rate": 2.8029267785291092e-05, |
|
"loss": 0.7134, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.38587033621790323, |
|
"grad_norm": 0.6033869981765747, |
|
"learning_rate": 2.796040631420139e-05, |
|
"loss": 0.7171, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.3915448999858136, |
|
"grad_norm": 0.6300106644630432, |
|
"learning_rate": 2.789044995134944e-05, |
|
"loss": 0.7139, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3972194637537239, |
|
"grad_norm": 0.5989068150520325, |
|
"learning_rate": 2.781940460663062e-05, |
|
"loss": 0.7142, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.4028940275216343, |
|
"grad_norm": 0.5790150761604309, |
|
"learning_rate": 2.774727628193721e-05, |
|
"loss": 0.7126, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.4085685912895446, |
|
"grad_norm": 0.5948804616928101, |
|
"learning_rate": 2.7674071070651378e-05, |
|
"loss": 0.7103, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.41424315505745496, |
|
"grad_norm": 0.6838712096214294, |
|
"learning_rate": 2.7599795157130364e-05, |
|
"loss": 0.7169, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.4199177188253653, |
|
"grad_norm": 0.6502018570899963, |
|
"learning_rate": 2.7524454816184076e-05, |
|
"loss": 0.7094, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.42559228259327564, |
|
"grad_norm": 0.6322967410087585, |
|
"learning_rate": 2.7448056412544956e-05, |
|
"loss": 0.7134, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.431266846361186, |
|
"grad_norm": 0.5761287212371826, |
|
"learning_rate": 2.7370606400330334e-05, |
|
"loss": 0.7067, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4369414101290963, |
|
"grad_norm": 0.6147580742835999, |
|
"learning_rate": 2.729211132249713e-05, |
|
"loss": 0.7078, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.4426159738970067, |
|
"grad_norm": 0.6231666207313538, |
|
"learning_rate": 2.7212577810289157e-05, |
|
"loss": 0.7066, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.448290537664917, |
|
"grad_norm": 0.5739862322807312, |
|
"learning_rate": 2.713201258267689e-05, |
|
"loss": 0.708, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.45396510143282737, |
|
"grad_norm": 0.7059602737426758, |
|
"learning_rate": 2.7050422445789843e-05, |
|
"loss": 0.7043, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4596396652007377, |
|
"grad_norm": 0.6156895160675049, |
|
"learning_rate": 2.696781429234162e-05, |
|
"loss": 0.7118, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.46531422896864805, |
|
"grad_norm": 0.5444714426994324, |
|
"learning_rate": 2.6884195101047567e-05, |
|
"loss": 0.7031, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.47098879273655836, |
|
"grad_norm": 0.6431369185447693, |
|
"learning_rate": 2.6799571936035284e-05, |
|
"loss": 0.7056, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4766633565044687, |
|
"grad_norm": 0.6375367641448975, |
|
"learning_rate": 2.671395194624779e-05, |
|
"loss": 0.6991, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.48233792027237904, |
|
"grad_norm": 0.6311667561531067, |
|
"learning_rate": 2.6627342364839604e-05, |
|
"loss": 0.6991, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4880124840402894, |
|
"grad_norm": 0.580328643321991, |
|
"learning_rate": 2.6539750508565683e-05, |
|
"loss": 0.7027, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.4936870478081997, |
|
"grad_norm": 0.6254743933677673, |
|
"learning_rate": 2.6451183777163316e-05, |
|
"loss": 0.6977, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.4993616115761101, |
|
"grad_norm": 0.8747753500938416, |
|
"learning_rate": 2.636164965272699e-05, |
|
"loss": 0.6974, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.5050361753440205, |
|
"grad_norm": 0.5931680798530579, |
|
"learning_rate": 2.6271155699076305e-05, |
|
"loss": 0.7001, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.5107107391119308, |
|
"grad_norm": 0.5763223767280579, |
|
"learning_rate": 2.6179709561116983e-05, |
|
"loss": 0.7023, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.5163853028798411, |
|
"grad_norm": 0.5211492776870728, |
|
"learning_rate": 2.6087318964195032e-05, |
|
"loss": 0.6957, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.5220598666477515, |
|
"grad_norm": 0.5684000253677368, |
|
"learning_rate": 2.59939917134441e-05, |
|
"loss": 0.6916, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.5277344304156618, |
|
"grad_norm": 0.6029589176177979, |
|
"learning_rate": 2.5899735693126113e-05, |
|
"loss": 0.6942, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5334089941835721, |
|
"grad_norm": 0.5765926837921143, |
|
"learning_rate": 2.5804558865965206e-05, |
|
"loss": 0.6973, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.5390835579514824, |
|
"grad_norm": 0.5227144956588745, |
|
"learning_rate": 2.5708469272475044e-05, |
|
"loss": 0.6929, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5447581217193929, |
|
"grad_norm": 0.6175386309623718, |
|
"learning_rate": 2.5611475030279546e-05, |
|
"loss": 0.6908, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.5504326854873032, |
|
"grad_norm": 0.5724866986274719, |
|
"learning_rate": 2.5513584333427125e-05, |
|
"loss": 0.6893, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.5561072492552135, |
|
"grad_norm": 0.5964395403862, |
|
"learning_rate": 2.541480545169846e-05, |
|
"loss": 0.6944, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.5617818130231238, |
|
"grad_norm": 0.6019209027290344, |
|
"learning_rate": 2.5315146729907827e-05, |
|
"loss": 0.6899, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.5674563767910342, |
|
"grad_norm": 0.6371375918388367, |
|
"learning_rate": 2.521461658719819e-05, |
|
"loss": 0.6904, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5731309405589445, |
|
"grad_norm": 0.5762882232666016, |
|
"learning_rate": 2.5113223516329924e-05, |
|
"loss": 0.6887, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.5788055043268548, |
|
"grad_norm": 0.591663122177124, |
|
"learning_rate": 2.501097608296334e-05, |
|
"loss": 0.6894, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.5844800680947652, |
|
"grad_norm": 0.5833630561828613, |
|
"learning_rate": 2.4907882924935072e-05, |
|
"loss": 0.6866, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.5901546318626756, |
|
"grad_norm": 0.5615355968475342, |
|
"learning_rate": 2.4803952751528363e-05, |
|
"loss": 0.6927, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5958291956305859, |
|
"grad_norm": 0.5507014989852905, |
|
"learning_rate": 2.4699194342737295e-05, |
|
"loss": 0.6934, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.6015037593984962, |
|
"grad_norm": 0.5132161974906921, |
|
"learning_rate": 2.459361654852505e-05, |
|
"loss": 0.688, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.6071783231664066, |
|
"grad_norm": 0.5238850116729736, |
|
"learning_rate": 2.4487228288076293e-05, |
|
"loss": 0.6804, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.6128528869343169, |
|
"grad_norm": 0.5849164724349976, |
|
"learning_rate": 2.438003854904366e-05, |
|
"loss": 0.6911, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.6185274507022273, |
|
"grad_norm": 0.5290674567222595, |
|
"learning_rate": 2.4272056386788485e-05, |
|
"loss": 0.6838, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.6242020144701376, |
|
"grad_norm": 0.5804121494293213, |
|
"learning_rate": 2.4163290923615814e-05, |
|
"loss": 0.6894, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.629876578238048, |
|
"grad_norm": 0.5559779405593872, |
|
"learning_rate": 2.4053751348003757e-05, |
|
"loss": 0.6859, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.6355511420059583, |
|
"grad_norm": 0.5486791133880615, |
|
"learning_rate": 2.394344691382723e-05, |
|
"loss": 0.6836, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.6412257057738686, |
|
"grad_norm": 0.5544127225875854, |
|
"learning_rate": 2.3832386939576214e-05, |
|
"loss": 0.681, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.6469002695417789, |
|
"grad_norm": 0.5256103277206421, |
|
"learning_rate": 2.3720580807568513e-05, |
|
"loss": 0.6823, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.6525748333096894, |
|
"grad_norm": 0.5488288402557373, |
|
"learning_rate": 2.3608037963157142e-05, |
|
"loss": 0.6818, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6582493970775997, |
|
"grad_norm": 0.5254908204078674, |
|
"learning_rate": 2.3494767913932393e-05, |
|
"loss": 0.6774, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.66392396084551, |
|
"grad_norm": 0.5880591869354248, |
|
"learning_rate": 2.338078022891864e-05, |
|
"loss": 0.6795, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.6695985246134204, |
|
"grad_norm": 0.5331950783729553, |
|
"learning_rate": 2.3266084537765924e-05, |
|
"loss": 0.6777, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.6752730883813307, |
|
"grad_norm": 0.5736955404281616, |
|
"learning_rate": 2.3150690529936475e-05, |
|
"loss": 0.6792, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.680947652149241, |
|
"grad_norm": 0.5705032348632812, |
|
"learning_rate": 2.303460795388613e-05, |
|
"loss": 0.6736, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6866222159171513, |
|
"grad_norm": 0.569355845451355, |
|
"learning_rate": 2.2917846616240784e-05, |
|
"loss": 0.6767, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.6922967796850618, |
|
"grad_norm": 1.2819143533706665, |
|
"learning_rate": 2.2800416380967952e-05, |
|
"loss": 0.6772, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.6979713434529721, |
|
"grad_norm": 0.5238373279571533, |
|
"learning_rate": 2.268232716854343e-05, |
|
"loss": 0.674, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.7036459072208824, |
|
"grad_norm": 0.5886688828468323, |
|
"learning_rate": 2.2563588955113246e-05, |
|
"loss": 0.6757, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.7093204709887927, |
|
"grad_norm": 0.5450348854064941, |
|
"learning_rate": 2.244421177165085e-05, |
|
"loss": 0.6691, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.7149950347567031, |
|
"grad_norm": 0.5553733706474304, |
|
"learning_rate": 2.232420570310974e-05, |
|
"loss": 0.6751, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.7206695985246134, |
|
"grad_norm": 0.5076789259910583, |
|
"learning_rate": 2.2203580887571423e-05, |
|
"loss": 0.6739, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.7263441622925237, |
|
"grad_norm": 0.5153952240943909, |
|
"learning_rate": 2.2082347515389027e-05, |
|
"loss": 0.6734, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.732018726060434, |
|
"grad_norm": 0.5176730155944824, |
|
"learning_rate": 2.1960515828326372e-05, |
|
"loss": 0.6706, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.7376932898283445, |
|
"grad_norm": 0.526030421257019, |
|
"learning_rate": 2.1838096118692768e-05, |
|
"loss": 0.6694, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.7433678535962548, |
|
"grad_norm": 0.6030652523040771, |
|
"learning_rate": 2.1715098728473518e-05, |
|
"loss": 0.6707, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.7490424173641651, |
|
"grad_norm": 0.6607082486152649, |
|
"learning_rate": 2.1591534048456225e-05, |
|
"loss": 0.6668, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.7547169811320755, |
|
"grad_norm": 0.5300272107124329, |
|
"learning_rate": 2.1467412517352996e-05, |
|
"loss": 0.6696, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.7603915448999858, |
|
"grad_norm": 0.5344169735908508, |
|
"learning_rate": 2.1342744620918568e-05, |
|
"loss": 0.6736, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.7660661086678962, |
|
"grad_norm": 0.5058417916297913, |
|
"learning_rate": 2.121754089106448e-05, |
|
"loss": 0.6681, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7717406724358065, |
|
"grad_norm": 0.5440433621406555, |
|
"learning_rate": 2.1091811904969344e-05, |
|
"loss": 0.6702, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.7774152362037169, |
|
"grad_norm": 0.5361486077308655, |
|
"learning_rate": 2.096556828418528e-05, |
|
"loss": 0.6686, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.7830897999716272, |
|
"grad_norm": 0.6350403428077698, |
|
"learning_rate": 2.0838820693740603e-05, |
|
"loss": 0.6678, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.7887643637395375, |
|
"grad_norm": 0.5326098203659058, |
|
"learning_rate": 2.0711579841238875e-05, |
|
"loss": 0.6711, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.7944389275074478, |
|
"grad_norm": 0.540676474571228, |
|
"learning_rate": 2.058385647595429e-05, |
|
"loss": 0.6705, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.8001134912753582, |
|
"grad_norm": 0.4930702745914459, |
|
"learning_rate": 2.045566138792361e-05, |
|
"loss": 0.6683, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.8057880550432686, |
|
"grad_norm": 0.5729920268058777, |
|
"learning_rate": 2.032700540703459e-05, |
|
"loss": 0.6646, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.8114626188111789, |
|
"grad_norm": 0.5179927945137024, |
|
"learning_rate": 2.0197899402111127e-05, |
|
"loss": 0.6632, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.8171371825790892, |
|
"grad_norm": 0.5147942900657654, |
|
"learning_rate": 2.0068354279995008e-05, |
|
"loss": 0.6558, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.8228117463469996, |
|
"grad_norm": 0.5044906735420227, |
|
"learning_rate": 1.9938380984624533e-05, |
|
"loss": 0.6634, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.8284863101149099, |
|
"grad_norm": 0.5231923460960388, |
|
"learning_rate": 1.9807990496109965e-05, |
|
"loss": 0.6698, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.8341608738828202, |
|
"grad_norm": 0.5322957634925842, |
|
"learning_rate": 1.967719382980594e-05, |
|
"loss": 0.6568, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.8398354376507307, |
|
"grad_norm": 0.512269139289856, |
|
"learning_rate": 1.9546002035380886e-05, |
|
"loss": 0.6654, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.845510001418641, |
|
"grad_norm": 0.508976399898529, |
|
"learning_rate": 1.9414426195883558e-05, |
|
"loss": 0.6552, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.8511845651865513, |
|
"grad_norm": 0.5061299204826355, |
|
"learning_rate": 1.9282477426806723e-05, |
|
"loss": 0.6599, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.8568591289544616, |
|
"grad_norm": 0.510822057723999, |
|
"learning_rate": 1.9150166875148155e-05, |
|
"loss": 0.6612, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.862533692722372, |
|
"grad_norm": 0.5578708648681641, |
|
"learning_rate": 1.9017505718468934e-05, |
|
"loss": 0.658, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.8682082564902823, |
|
"grad_norm": 0.5130868554115295, |
|
"learning_rate": 1.888450516394914e-05, |
|
"loss": 0.6541, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.8738828202581926, |
|
"grad_norm": 0.5147811770439148, |
|
"learning_rate": 1.8751176447441104e-05, |
|
"loss": 0.6586, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.879557384026103, |
|
"grad_norm": 0.5556140542030334, |
|
"learning_rate": 1.861753083252021e-05, |
|
"loss": 0.6535, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8852319477940134, |
|
"grad_norm": 0.509611964225769, |
|
"learning_rate": 1.8483579609533318e-05, |
|
"loss": 0.6537, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.8909065115619237, |
|
"grad_norm": 0.5088684558868408, |
|
"learning_rate": 1.834933409464499e-05, |
|
"loss": 0.6562, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.896581075329834, |
|
"grad_norm": 0.48405396938323975, |
|
"learning_rate": 1.821480562888148e-05, |
|
"loss": 0.6583, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.9022556390977443, |
|
"grad_norm": 0.5087782144546509, |
|
"learning_rate": 1.808000557717268e-05, |
|
"loss": 0.6558, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.9079302028656547, |
|
"grad_norm": 0.5303909778594971, |
|
"learning_rate": 1.7944945327391957e-05, |
|
"loss": 0.6517, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.913604766633565, |
|
"grad_norm": 0.5164442658424377, |
|
"learning_rate": 1.7809636289394185e-05, |
|
"loss": 0.6529, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.9192793304014754, |
|
"grad_norm": 0.5162308216094971, |
|
"learning_rate": 1.7674089894051774e-05, |
|
"loss": 0.6542, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.9249538941693858, |
|
"grad_norm": 0.545396625995636, |
|
"learning_rate": 1.753831759228903e-05, |
|
"loss": 0.6527, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.9306284579372961, |
|
"grad_norm": 0.5134595632553101, |
|
"learning_rate": 1.740233085411477e-05, |
|
"loss": 0.6555, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.9363030217052064, |
|
"grad_norm": 0.48815637826919556, |
|
"learning_rate": 1.7266141167653353e-05, |
|
"loss": 0.6554, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.9419775854731167, |
|
"grad_norm": 0.5034410953521729, |
|
"learning_rate": 1.7129760038174146e-05, |
|
"loss": 0.6514, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.9476521492410271, |
|
"grad_norm": 0.5322323441505432, |
|
"learning_rate": 1.6993198987119576e-05, |
|
"loss": 0.6533, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.9533267130089375, |
|
"grad_norm": 0.48363253474235535, |
|
"learning_rate": 1.6856469551131805e-05, |
|
"loss": 0.6468, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.9590012767768478, |
|
"grad_norm": 0.4600164592266083, |
|
"learning_rate": 1.67195832810781e-05, |
|
"loss": 0.6472, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.9646758405447581, |
|
"grad_norm": 0.49600768089294434, |
|
"learning_rate": 1.6582551741075033e-05, |
|
"loss": 0.6467, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.9703504043126685, |
|
"grad_norm": 0.7202423810958862, |
|
"learning_rate": 1.6445386507511546e-05, |
|
"loss": 0.6502, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.9760249680805788, |
|
"grad_norm": 0.502703070640564, |
|
"learning_rate": 1.630809916807098e-05, |
|
"loss": 0.6424, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.9816995318484891, |
|
"grad_norm": 0.49266818165779114, |
|
"learning_rate": 1.617070132075214e-05, |
|
"loss": 0.6485, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.9873740956163994, |
|
"grad_norm": 0.5194821357727051, |
|
"learning_rate": 1.6033204572889516e-05, |
|
"loss": 0.6499, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.9930486593843099, |
|
"grad_norm": 0.49109163880348206, |
|
"learning_rate": 1.5895620540172682e-05, |
|
"loss": 0.6506, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9987232231522202, |
|
"grad_norm": 0.5099320411682129, |
|
"learning_rate": 1.575796084566503e-05, |
|
"loss": 0.6466, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.0043977869201306, |
|
"grad_norm": 0.5476223230361938, |
|
"learning_rate": 1.562023711882182e-05, |
|
"loss": 0.5924, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.010072350688041, |
|
"grad_norm": 0.4934983551502228, |
|
"learning_rate": 1.548246099450776e-05, |
|
"loss": 0.5683, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.0157469144559512, |
|
"grad_norm": 0.5262681841850281, |
|
"learning_rate": 1.534464411201409e-05, |
|
"loss": 0.5733, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.0214214782238615, |
|
"grad_norm": 0.5271425843238831, |
|
"learning_rate": 1.520679811407526e-05, |
|
"loss": 0.5697, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.0270960419917718, |
|
"grad_norm": 0.5124356150627136, |
|
"learning_rate": 1.506893464588542e-05, |
|
"loss": 0.5653, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.0327706057596822, |
|
"grad_norm": 0.5131009817123413, |
|
"learning_rate": 1.4931065354114584e-05, |
|
"loss": 0.5669, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.0384451695275925, |
|
"grad_norm": 0.5003370046615601, |
|
"learning_rate": 1.4793201885924745e-05, |
|
"loss": 0.565, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.044119733295503, |
|
"grad_norm": 0.5440374612808228, |
|
"learning_rate": 1.465535588798592e-05, |
|
"loss": 0.5708, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.0497942970634133, |
|
"grad_norm": 0.5212259292602539, |
|
"learning_rate": 1.4517539005492237e-05, |
|
"loss": 0.57, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.0554688608313236, |
|
"grad_norm": 0.5004721879959106, |
|
"learning_rate": 1.4379762881178182e-05, |
|
"loss": 0.5692, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.061143424599234, |
|
"grad_norm": 0.5253936648368835, |
|
"learning_rate": 1.4242039154334973e-05, |
|
"loss": 0.5685, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.0668179883671443, |
|
"grad_norm": 0.5163034200668335, |
|
"learning_rate": 1.410437945982732e-05, |
|
"loss": 0.5706, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.0724925521350546, |
|
"grad_norm": 0.49630168080329895, |
|
"learning_rate": 1.3966795427110493e-05, |
|
"loss": 0.5725, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.0781671159029649, |
|
"grad_norm": 0.5117852091789246, |
|
"learning_rate": 1.3829298679247865e-05, |
|
"loss": 0.5646, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.0838416796708752, |
|
"grad_norm": 0.5082918405532837, |
|
"learning_rate": 1.369190083192902e-05, |
|
"loss": 0.5705, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.0895162434387857, |
|
"grad_norm": 0.5319990515708923, |
|
"learning_rate": 1.3554613492488453e-05, |
|
"loss": 0.5684, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.095190807206696, |
|
"grad_norm": 0.5344195365905762, |
|
"learning_rate": 1.3417448258924971e-05, |
|
"loss": 0.5658, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.1008653709746063, |
|
"grad_norm": 0.507433295249939, |
|
"learning_rate": 1.3280416718921902e-05, |
|
"loss": 0.5717, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.1065399347425167, |
|
"grad_norm": 0.5090216398239136, |
|
"learning_rate": 1.3143530448868198e-05, |
|
"loss": 0.5663, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.112214498510427, |
|
"grad_norm": 0.512146532535553, |
|
"learning_rate": 1.3006801012880425e-05, |
|
"loss": 0.5656, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.1178890622783373, |
|
"grad_norm": 0.5273200869560242, |
|
"learning_rate": 1.2870239961825853e-05, |
|
"loss": 0.5621, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.1235636260462476, |
|
"grad_norm": 0.5408139824867249, |
|
"learning_rate": 1.2733858832346648e-05, |
|
"loss": 0.5744, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.1292381898141581, |
|
"grad_norm": 0.4986436069011688, |
|
"learning_rate": 1.2597669145885231e-05, |
|
"loss": 0.5704, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.1349127535820684, |
|
"grad_norm": 0.5186699628829956, |
|
"learning_rate": 1.2461682407710973e-05, |
|
"loss": 0.5588, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.1405873173499788, |
|
"grad_norm": 0.5081115365028381, |
|
"learning_rate": 1.2325910105948229e-05, |
|
"loss": 0.5667, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.146261881117889, |
|
"grad_norm": 0.501616358757019, |
|
"learning_rate": 1.219036371060582e-05, |
|
"loss": 0.5628, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.1519364448857994, |
|
"grad_norm": 0.5288362503051758, |
|
"learning_rate": 1.2055054672608043e-05, |
|
"loss": 0.5642, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.1576110086537097, |
|
"grad_norm": 0.5392152070999146, |
|
"learning_rate": 1.1919994422827326e-05, |
|
"loss": 0.5606, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.16328557242162, |
|
"grad_norm": 0.514348030090332, |
|
"learning_rate": 1.1785194371118521e-05, |
|
"loss": 0.5653, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.1689601361895305, |
|
"grad_norm": 0.4942004978656769, |
|
"learning_rate": 1.1650665905355014e-05, |
|
"loss": 0.5622, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.1746346999574409, |
|
"grad_norm": 0.48802751302719116, |
|
"learning_rate": 1.1516420390466685e-05, |
|
"loss": 0.5613, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.1803092637253512, |
|
"grad_norm": 0.5025625228881836, |
|
"learning_rate": 1.1382469167479795e-05, |
|
"loss": 0.5656, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.1859838274932615, |
|
"grad_norm": 0.5276467204093933, |
|
"learning_rate": 1.1248823552558895e-05, |
|
"loss": 0.5639, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.1916583912611718, |
|
"grad_norm": 0.5035718083381653, |
|
"learning_rate": 1.1115494836050861e-05, |
|
"loss": 0.5612, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.197332955029082, |
|
"grad_norm": 0.5080997347831726, |
|
"learning_rate": 1.0982494281531069e-05, |
|
"loss": 0.5647, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.2030075187969924, |
|
"grad_norm": 0.505695104598999, |
|
"learning_rate": 1.0849833124851846e-05, |
|
"loss": 0.5681, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.2086820825649027, |
|
"grad_norm": 0.48905614018440247, |
|
"learning_rate": 1.0717522573193281e-05, |
|
"loss": 0.561, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.2143566463328133, |
|
"grad_norm": 0.49127668142318726, |
|
"learning_rate": 1.0585573804116448e-05, |
|
"loss": 0.5639, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.2200312101007236, |
|
"grad_norm": 0.5206524729728699, |
|
"learning_rate": 1.0453997964619112e-05, |
|
"loss": 0.5594, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.2257057738686339, |
|
"grad_norm": 0.48683062195777893, |
|
"learning_rate": 1.0322806170194061e-05, |
|
"loss": 0.5622, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.2313803376365442, |
|
"grad_norm": 0.532207190990448, |
|
"learning_rate": 1.0192009503890037e-05, |
|
"loss": 0.5581, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.2370549014044545, |
|
"grad_norm": 0.49200239777565, |
|
"learning_rate": 1.0061619015375473e-05, |
|
"loss": 0.5594, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.2427294651723648, |
|
"grad_norm": 0.504898190498352, |
|
"learning_rate": 9.931645720004995e-06, |
|
"loss": 0.5622, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.2484040289402751, |
|
"grad_norm": 0.5061923861503601, |
|
"learning_rate": 9.802100597888877e-06, |
|
"loss": 0.5572, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.2540785927081854, |
|
"grad_norm": 0.4961055815219879, |
|
"learning_rate": 9.672994592965409e-06, |
|
"loss": 0.5609, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.259753156476096, |
|
"grad_norm": 0.4930592477321625, |
|
"learning_rate": 9.544338612076396e-06, |
|
"loss": 0.5637, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.2654277202440063, |
|
"grad_norm": 0.4978179335594177, |
|
"learning_rate": 9.41614352404571e-06, |
|
"loss": 0.5615, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.2711022840119166, |
|
"grad_norm": 0.5112114548683167, |
|
"learning_rate": 9.288420158761127e-06, |
|
"loss": 0.558, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.276776847779827, |
|
"grad_norm": 0.5114573240280151, |
|
"learning_rate": 9.161179306259401e-06, |
|
"loss": 0.5561, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.2824514115477372, |
|
"grad_norm": 0.5023430585861206, |
|
"learning_rate": 9.034431715814726e-06, |
|
"loss": 0.5558, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.2881259753156475, |
|
"grad_norm": 0.503487765789032, |
|
"learning_rate": 8.908188095030655e-06, |
|
"loss": 0.5607, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 1.2938005390835579, |
|
"grad_norm": 0.5188455581665039, |
|
"learning_rate": 8.78245910893552e-06, |
|
"loss": 0.5639, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 1.2994751028514684, |
|
"grad_norm": 0.5216081738471985, |
|
"learning_rate": 8.657255379081438e-06, |
|
"loss": 0.5584, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 1.3051496666193787, |
|
"grad_norm": 0.5024508833885193, |
|
"learning_rate": 8.532587482647013e-06, |
|
"loss": 0.5604, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.310824230387289, |
|
"grad_norm": 0.5100445747375488, |
|
"learning_rate": 8.408465951543779e-06, |
|
"loss": 0.5596, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.3164987941551993, |
|
"grad_norm": 0.5005710124969482, |
|
"learning_rate": 8.284901271526481e-06, |
|
"loss": 0.5591, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.3221733579231096, |
|
"grad_norm": 0.5151055455207825, |
|
"learning_rate": 8.161903881307231e-06, |
|
"loss": 0.5462, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.32784792169102, |
|
"grad_norm": 0.4919968545436859, |
|
"learning_rate": 8.039484171673628e-06, |
|
"loss": 0.5523, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.3335224854589303, |
|
"grad_norm": 0.5007758140563965, |
|
"learning_rate": 7.917652484610975e-06, |
|
"loss": 0.5545, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.3391970492268408, |
|
"grad_norm": 0.4885912537574768, |
|
"learning_rate": 7.796419112428583e-06, |
|
"loss": 0.5582, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 1.344871612994751, |
|
"grad_norm": 0.4874049127101898, |
|
"learning_rate": 7.675794296890265e-06, |
|
"loss": 0.5505, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 1.3505461767626614, |
|
"grad_norm": 0.46998655796051025, |
|
"learning_rate": 7.555788228349143e-06, |
|
"loss": 0.554, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 1.3562207405305717, |
|
"grad_norm": 0.4996753931045532, |
|
"learning_rate": 7.436411044886753e-06, |
|
"loss": 0.5513, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 1.361895304298482, |
|
"grad_norm": 0.502571165561676, |
|
"learning_rate": 7.31767283145657e-06, |
|
"loss": 0.5547, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.3675698680663924, |
|
"grad_norm": 0.48792627453804016, |
|
"learning_rate": 7.199583619032052e-06, |
|
"loss": 0.5551, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.3732444318343027, |
|
"grad_norm": 0.48799988627433777, |
|
"learning_rate": 7.082153383759222e-06, |
|
"loss": 0.5524, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.3789189956022132, |
|
"grad_norm": 0.4976406991481781, |
|
"learning_rate": 6.9653920461138755e-06, |
|
"loss": 0.5548, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.3845935593701233, |
|
"grad_norm": 0.5006715655326843, |
|
"learning_rate": 6.849309470063529e-06, |
|
"loss": 0.5544, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.3902681231380338, |
|
"grad_norm": 0.4864628314971924, |
|
"learning_rate": 6.7339154622340754e-06, |
|
"loss": 0.5483, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.3959426869059441, |
|
"grad_norm": 0.48580724000930786, |
|
"learning_rate": 6.619219771081361e-06, |
|
"loss": 0.5544, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.4016172506738545, |
|
"grad_norm": 0.5042415857315063, |
|
"learning_rate": 6.505232086067607e-06, |
|
"loss": 0.5504, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.4072918144417648, |
|
"grad_norm": 0.4970082640647888, |
|
"learning_rate": 6.391962036842863e-06, |
|
"loss": 0.547, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.412966378209675, |
|
"grad_norm": 0.47866857051849365, |
|
"learning_rate": 6.279419192431494e-06, |
|
"loss": 0.5548, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.4186409419775854, |
|
"grad_norm": 0.4664076566696167, |
|
"learning_rate": 6.167613060423789e-06, |
|
"loss": 0.5454, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.4243155057454957, |
|
"grad_norm": 0.49711087346076965, |
|
"learning_rate": 6.0565530861727685e-06, |
|
"loss": 0.5519, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.4299900695134062, |
|
"grad_norm": 0.46965324878692627, |
|
"learning_rate": 5.946248651996244e-06, |
|
"loss": 0.5519, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.4356646332813165, |
|
"grad_norm": 0.505743145942688, |
|
"learning_rate": 5.836709076384188e-06, |
|
"loss": 0.5482, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.4413391970492269, |
|
"grad_norm": 0.5078002214431763, |
|
"learning_rate": 5.727943613211521e-06, |
|
"loss": 0.5575, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 1.4470137608171372, |
|
"grad_norm": 0.48647207021713257, |
|
"learning_rate": 5.619961450956347e-06, |
|
"loss": 0.5461, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.4526883245850475, |
|
"grad_norm": 0.4711668789386749, |
|
"learning_rate": 5.5127717119237084e-06, |
|
"loss": 0.5472, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 1.4583628883529578, |
|
"grad_norm": 0.518395721912384, |
|
"learning_rate": 5.406383451474948e-06, |
|
"loss": 0.5483, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 1.464037452120868, |
|
"grad_norm": 0.4849320948123932, |
|
"learning_rate": 5.300805657262706e-06, |
|
"loss": 0.5459, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 1.4697120158887786, |
|
"grad_norm": 0.501943826675415, |
|
"learning_rate": 5.1960472484716374e-06, |
|
"loss": 0.5482, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 1.475386579656689, |
|
"grad_norm": 0.48699691891670227, |
|
"learning_rate": 5.092117075064931e-06, |
|
"loss": 0.5522, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.4810611434245993, |
|
"grad_norm": 0.48894861340522766, |
|
"learning_rate": 4.989023917036667e-06, |
|
"loss": 0.5502, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 1.4867357071925096, |
|
"grad_norm": 0.49131521582603455, |
|
"learning_rate": 4.886776483670077e-06, |
|
"loss": 0.5466, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 1.49241027096042, |
|
"grad_norm": 0.47139400243759155, |
|
"learning_rate": 4.78538341280181e-06, |
|
"loss": 0.5473, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 1.4980848347283302, |
|
"grad_norm": 0.49604731798171997, |
|
"learning_rate": 4.684853270092173e-06, |
|
"loss": 0.5498, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 1.5037593984962405, |
|
"grad_norm": 0.4864351749420166, |
|
"learning_rate": 4.585194548301545e-06, |
|
"loss": 0.5448, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.509433962264151, |
|
"grad_norm": 0.48130905628204346, |
|
"learning_rate": 4.486415666572874e-06, |
|
"loss": 0.5469, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 1.5151085260320611, |
|
"grad_norm": 0.4783124625682831, |
|
"learning_rate": 4.388524969720458e-06, |
|
"loss": 0.546, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 1.5207830897999717, |
|
"grad_norm": 0.4969868063926697, |
|
"learning_rate": 4.2915307275249585e-06, |
|
"loss": 0.5453, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 1.526457653567882, |
|
"grad_norm": 0.4832542836666107, |
|
"learning_rate": 4.195441134034799e-06, |
|
"loss": 0.5463, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.5321322173357923, |
|
"grad_norm": 0.4712090790271759, |
|
"learning_rate": 4.10026430687389e-06, |
|
"loss": 0.5449, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.5378067811037026, |
|
"grad_norm": 0.4822421967983246, |
|
"learning_rate": 4.0060082865559035e-06, |
|
"loss": 0.5465, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.543481344871613, |
|
"grad_norm": 0.4809670150279999, |
|
"learning_rate": 3.912681035804971e-06, |
|
"loss": 0.5406, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.5491559086395235, |
|
"grad_norm": 0.4631410539150238, |
|
"learning_rate": 3.820290438883018e-06, |
|
"loss": 0.5461, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.5548304724074336, |
|
"grad_norm": 0.46498140692710876, |
|
"learning_rate": 3.728844300923694e-06, |
|
"loss": 0.5419, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.560505036175344, |
|
"grad_norm": 0.4786704480648041, |
|
"learning_rate": 3.6383503472730116e-06, |
|
"loss": 0.5476, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.5661795999432544, |
|
"grad_norm": 0.4655323624610901, |
|
"learning_rate": 3.548816222836688e-06, |
|
"loss": 0.5406, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.5718541637111647, |
|
"grad_norm": 0.46424925327301025, |
|
"learning_rate": 3.460249491434319e-06, |
|
"loss": 0.5415, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.577528727479075, |
|
"grad_norm": 0.45783787965774536, |
|
"learning_rate": 3.3726576351603985e-06, |
|
"loss": 0.5503, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.5832032912469853, |
|
"grad_norm": 0.49086692929267883, |
|
"learning_rate": 3.2860480537522103e-06, |
|
"loss": 0.543, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.5888778550148959, |
|
"grad_norm": 0.48474520444869995, |
|
"learning_rate": 3.2004280639647122e-06, |
|
"loss": 0.539, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.594552418782806, |
|
"grad_norm": 0.5037649869918823, |
|
"learning_rate": 3.115804898952434e-06, |
|
"loss": 0.5415, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.6002269825507165, |
|
"grad_norm": 0.4954313337802887, |
|
"learning_rate": 3.032185707658389e-06, |
|
"loss": 0.5487, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.6059015463186268, |
|
"grad_norm": 0.4597771465778351, |
|
"learning_rate": 2.949577554210157e-06, |
|
"loss": 0.5445, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.6115761100865371, |
|
"grad_norm": 0.4839852750301361, |
|
"learning_rate": 2.8679874173231137e-06, |
|
"loss": 0.5499, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.6172506738544474, |
|
"grad_norm": 0.4653310179710388, |
|
"learning_rate": 2.787422189710844e-06, |
|
"loss": 0.5453, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.6229252376223577, |
|
"grad_norm": 0.485579252243042, |
|
"learning_rate": 2.7078886775028693e-06, |
|
"loss": 0.5383, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.6285998013902683, |
|
"grad_norm": 0.4727838337421417, |
|
"learning_rate": 2.629393599669667e-06, |
|
"loss": 0.5421, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.6342743651581784, |
|
"grad_norm": 0.45239365100860596, |
|
"learning_rate": 2.5519435874550434e-06, |
|
"loss": 0.5357, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.639948928926089, |
|
"grad_norm": 0.4669874310493469, |
|
"learning_rate": 2.475545183815926e-06, |
|
"loss": 0.5385, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.645623492693999, |
|
"grad_norm": 0.4859563410282135, |
|
"learning_rate": 2.400204842869637e-06, |
|
"loss": 0.5446, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.6512980564619095, |
|
"grad_norm": 0.4492729902267456, |
|
"learning_rate": 2.3259289293486246e-06, |
|
"loss": 0.5418, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.6569726202298198, |
|
"grad_norm": 0.46383896470069885, |
|
"learning_rate": 2.252723718062787e-06, |
|
"loss": 0.5401, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.6626471839977301, |
|
"grad_norm": 0.48168492317199707, |
|
"learning_rate": 2.1805953933693835e-06, |
|
"loss": 0.5423, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.6683217477656405, |
|
"grad_norm": 0.46742239594459534, |
|
"learning_rate": 2.109550048650563e-06, |
|
"loss": 0.542, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.6739963115335508, |
|
"grad_norm": 0.46751725673675537, |
|
"learning_rate": 2.0395936857986125e-06, |
|
"loss": 0.5402, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.6796708753014613, |
|
"grad_norm": 0.49627310037612915, |
|
"learning_rate": 1.970732214708908e-06, |
|
"loss": 0.5461, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.6853454390693714, |
|
"grad_norm": 0.46826520562171936, |
|
"learning_rate": 1.9029714527806652e-06, |
|
"loss": 0.5385, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.691020002837282, |
|
"grad_norm": 0.4701858162879944, |
|
"learning_rate": 1.8363171244254606e-06, |
|
"loss": 0.5376, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.6966945666051922, |
|
"grad_norm": 0.4635229706764221, |
|
"learning_rate": 1.7707748605836632e-06, |
|
"loss": 0.5378, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.7023691303731026, |
|
"grad_norm": 0.4729613661766052, |
|
"learning_rate": 1.7063501982487135e-06, |
|
"loss": 0.5437, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.7080436941410129, |
|
"grad_norm": 0.4672451913356781, |
|
"learning_rate": 1.6430485799993673e-06, |
|
"loss": 0.5428, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 1.7137182579089232, |
|
"grad_norm": 0.46772390604019165, |
|
"learning_rate": 1.5808753535399022e-06, |
|
"loss": 0.5392, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 1.7193928216768337, |
|
"grad_norm": 0.46337825059890747, |
|
"learning_rate": 1.5198357712483629e-06, |
|
"loss": 0.5413, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 1.7250673854447438, |
|
"grad_norm": 0.48103076219558716, |
|
"learning_rate": 1.459934989732818e-06, |
|
"loss": 0.5416, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 1.7307419492126543, |
|
"grad_norm": 0.45769959688186646, |
|
"learning_rate": 1.4011780693957492e-06, |
|
"loss": 0.5436, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.7364165129805647, |
|
"grad_norm": 0.4552821218967438, |
|
"learning_rate": 1.3435699740065377e-06, |
|
"loss": 0.5425, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 1.742091076748475, |
|
"grad_norm": 0.48623600602149963, |
|
"learning_rate": 1.2871155702821324e-06, |
|
"loss": 0.5427, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 1.7477656405163853, |
|
"grad_norm": 0.5024483799934387, |
|
"learning_rate": 1.231819627475911e-06, |
|
"loss": 0.5384, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.7534402042842956, |
|
"grad_norm": 0.4556623101234436, |
|
"learning_rate": 1.1776868169747702e-06, |
|
"loss": 0.5393, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.7591147680522061, |
|
"grad_norm": 0.4748471677303314, |
|
"learning_rate": 1.1247217119044951e-06, |
|
"loss": 0.5385, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.7647893318201162, |
|
"grad_norm": 0.4622340500354767, |
|
"learning_rate": 1.07292878674342e-06, |
|
"loss": 0.5377, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.7704638955880267, |
|
"grad_norm": 0.4581329822540283, |
|
"learning_rate": 1.0223124169444236e-06, |
|
"loss": 0.5366, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.776138459355937, |
|
"grad_norm": 0.4667391777038574, |
|
"learning_rate": 9.72876878565287e-07, |
|
"loss": 0.539, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.7818130231238474, |
|
"grad_norm": 0.4563803970813751, |
|
"learning_rate": 9.246263479074663e-07, |
|
"loss": 0.5403, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.7874875868917577, |
|
"grad_norm": 0.44948819279670715, |
|
"learning_rate": 8.775649011632703e-07, |
|
"loss": 0.5392, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.793162150659668, |
|
"grad_norm": 0.4829549193382263, |
|
"learning_rate": 8.316965140715071e-07, |
|
"loss": 0.5373, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.7988367144275785, |
|
"grad_norm": 0.4718981683254242, |
|
"learning_rate": 7.870250615816182e-07, |
|
"loss": 0.5383, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.8045112781954886, |
|
"grad_norm": 0.4641667306423187, |
|
"learning_rate": 7.435543175263166e-07, |
|
"loss": 0.543, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.8101858419633992, |
|
"grad_norm": 0.45884087681770325, |
|
"learning_rate": 7.012879543027801e-07, |
|
"loss": 0.538, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.8158604057313092, |
|
"grad_norm": 0.4888609051704407, |
|
"learning_rate": 6.602295425624033e-07, |
|
"loss": 0.5366, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.8215349694992198, |
|
"grad_norm": 0.46243107318878174, |
|
"learning_rate": 6.20382550909157e-07, |
|
"loss": 0.5365, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.82720953326713, |
|
"grad_norm": 0.46520647406578064, |
|
"learning_rate": 5.817503456065559e-07, |
|
"loss": 0.5339, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.8328840970350404, |
|
"grad_norm": 0.47549664974212646, |
|
"learning_rate": 5.443361902932792e-07, |
|
"loss": 0.5361, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.838558660802951, |
|
"grad_norm": 0.4677965044975281, |
|
"learning_rate": 5.081432457074614e-07, |
|
"loss": 0.5394, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.844233224570861, |
|
"grad_norm": 0.46250638365745544, |
|
"learning_rate": 4.7317456941966597e-07, |
|
"loss": 0.5388, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.8499077883387716, |
|
"grad_norm": 0.4758864641189575, |
|
"learning_rate": 4.3943311557459177e-07, |
|
"loss": 0.534, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.8555823521066817, |
|
"grad_norm": 0.4370381832122803, |
|
"learning_rate": 4.069217346415027e-07, |
|
"loss": 0.5339, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.8612569158745922, |
|
"grad_norm": 0.4617324769496918, |
|
"learning_rate": 3.756431731734272e-07, |
|
"loss": 0.5396, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.8669314796425025, |
|
"grad_norm": 0.4532717168331146, |
|
"learning_rate": 3.4560007357511856e-07, |
|
"loss": 0.5393, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.8726060434104128, |
|
"grad_norm": 0.46486184000968933, |
|
"learning_rate": 3.16794973879837e-07, |
|
"loss": 0.5367, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.8782806071783231, |
|
"grad_norm": 0.44514200091362, |
|
"learning_rate": 2.8923030753492783e-07, |
|
"loss": 0.5384, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.8839551709462334, |
|
"grad_norm": 0.4737865924835205, |
|
"learning_rate": 2.6290840319625255e-07, |
|
"loss": 0.5355, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.889629734714144, |
|
"grad_norm": 0.45271801948547363, |
|
"learning_rate": 2.378314845314561e-07, |
|
"loss": 0.5451, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.895304298482054, |
|
"grad_norm": 0.46050384640693665, |
|
"learning_rate": 2.14001670032124e-07, |
|
"loss": 0.5347, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.9009788622499646, |
|
"grad_norm": 0.4726841151714325, |
|
"learning_rate": 1.9142097283479876e-07, |
|
"loss": 0.5428, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.906653426017875, |
|
"grad_norm": 0.4662003815174103, |
|
"learning_rate": 1.700913005509208e-07, |
|
"loss": 0.5407, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.9123279897857852, |
|
"grad_norm": 0.44422999024391174, |
|
"learning_rate": 1.500144551056709e-07, |
|
"loss": 0.535, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.9180025535536955, |
|
"grad_norm": 0.4599597752094269, |
|
"learning_rate": 1.3119213258574015e-07, |
|
"loss": 0.5376, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.9236771173216058, |
|
"grad_norm": 0.4735456705093384, |
|
"learning_rate": 1.1362592309605291e-07, |
|
"loss": 0.5392, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.9293516810895164, |
|
"grad_norm": 0.4692912995815277, |
|
"learning_rate": 9.731731062542604e-08, |
|
"loss": 0.5398, |
|
"step": 6800 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 7048, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.5124467391135325e+20, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|