|
{ |
|
"best_metric": 0.12199707, |
|
"best_model_checkpoint": "/data1/tzz/VQA/ckpt/llava_next_video/v2-20250226-080739/checkpoint-185", |
|
"epoch": 0.9966329966329966, |
|
"eval_steps": 500, |
|
"global_step": 185, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0053872053872053875, |
|
"grad_norm": 14.159545000064247, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 31.90625, |
|
"memory(GiB)": 22.53, |
|
"step": 1, |
|
"train_speed(iter/s)": 0.022985 |
|
}, |
|
{ |
|
"epoch": 0.010774410774410775, |
|
"grad_norm": 14.616283963493206, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 31.5234375, |
|
"memory(GiB)": 22.53, |
|
"step": 2, |
|
"train_speed(iter/s)": 0.028711 |
|
}, |
|
{ |
|
"epoch": 0.01616161616161616, |
|
"grad_norm": 13.121864716464238, |
|
"learning_rate": 3e-06, |
|
"loss": 33.6796875, |
|
"memory(GiB)": 22.53, |
|
"step": 3, |
|
"train_speed(iter/s)": 0.031289 |
|
}, |
|
{ |
|
"epoch": 0.02154882154882155, |
|
"grad_norm": 11.258740067609244, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 31.8203125, |
|
"memory(GiB)": 22.53, |
|
"step": 4, |
|
"train_speed(iter/s)": 0.032739 |
|
}, |
|
{ |
|
"epoch": 0.026936026936026935, |
|
"grad_norm": 13.170936715126654, |
|
"learning_rate": 5e-06, |
|
"loss": 29.2109375, |
|
"memory(GiB)": 22.55, |
|
"step": 5, |
|
"train_speed(iter/s)": 0.033213 |
|
}, |
|
{ |
|
"epoch": 0.03232323232323232, |
|
"grad_norm": 14.330929445232412, |
|
"learning_rate": 6e-06, |
|
"loss": 28.078125, |
|
"memory(GiB)": 22.55, |
|
"step": 6, |
|
"train_speed(iter/s)": 0.033152 |
|
}, |
|
{ |
|
"epoch": 0.03771043771043771, |
|
"grad_norm": 13.548506738998086, |
|
"learning_rate": 7e-06, |
|
"loss": 27.5078125, |
|
"memory(GiB)": 22.55, |
|
"step": 7, |
|
"train_speed(iter/s)": 0.033486 |
|
}, |
|
{ |
|
"epoch": 0.0430976430976431, |
|
"grad_norm": 8.666929263748118, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 24.9609375, |
|
"memory(GiB)": 22.56, |
|
"step": 8, |
|
"train_speed(iter/s)": 0.033686 |
|
}, |
|
{ |
|
"epoch": 0.048484848484848485, |
|
"grad_norm": 11.066925048714, |
|
"learning_rate": 9e-06, |
|
"loss": 19.890625, |
|
"memory(GiB)": 22.57, |
|
"step": 9, |
|
"train_speed(iter/s)": 0.0337 |
|
}, |
|
{ |
|
"epoch": 0.05387205387205387, |
|
"grad_norm": 8.973276554829988, |
|
"learning_rate": 1e-05, |
|
"loss": 14.328125, |
|
"memory(GiB)": 22.58, |
|
"step": 10, |
|
"train_speed(iter/s)": 0.03368 |
|
}, |
|
{ |
|
"epoch": 0.05925925925925926, |
|
"grad_norm": 4.92025256078084, |
|
"learning_rate": 9.999194339645292e-06, |
|
"loss": 11.275390625, |
|
"memory(GiB)": 22.58, |
|
"step": 11, |
|
"train_speed(iter/s)": 0.033773 |
|
}, |
|
{ |
|
"epoch": 0.06464646464646465, |
|
"grad_norm": 2.5239985209180706, |
|
"learning_rate": 9.996777618216608e-06, |
|
"loss": 9.6875, |
|
"memory(GiB)": 22.58, |
|
"step": 12, |
|
"train_speed(iter/s)": 0.034222 |
|
}, |
|
{ |
|
"epoch": 0.07003367003367003, |
|
"grad_norm": 3.3965201854332046, |
|
"learning_rate": 9.992750614536606e-06, |
|
"loss": 7.869140625, |
|
"memory(GiB)": 22.58, |
|
"step": 13, |
|
"train_speed(iter/s)": 0.034559 |
|
}, |
|
{ |
|
"epoch": 0.07542087542087542, |
|
"grad_norm": 3.83999730322345, |
|
"learning_rate": 9.987114626364172e-06, |
|
"loss": 7.22265625, |
|
"memory(GiB)": 22.58, |
|
"step": 14, |
|
"train_speed(iter/s)": 0.034521 |
|
}, |
|
{ |
|
"epoch": 0.08080808080808081, |
|
"grad_norm": 4.824626769554233, |
|
"learning_rate": 9.979871469976197e-06, |
|
"loss": 7.0576171875, |
|
"memory(GiB)": 22.58, |
|
"step": 15, |
|
"train_speed(iter/s)": 0.03441 |
|
}, |
|
{ |
|
"epoch": 0.0861952861952862, |
|
"grad_norm": 3.043760096951554, |
|
"learning_rate": 9.971023479582258e-06, |
|
"loss": 5.4990234375, |
|
"memory(GiB)": 22.58, |
|
"step": 16, |
|
"train_speed(iter/s)": 0.034356 |
|
}, |
|
{ |
|
"epoch": 0.09158249158249158, |
|
"grad_norm": 1.3971212531371173, |
|
"learning_rate": 9.960573506572391e-06, |
|
"loss": 4.044921875, |
|
"memory(GiB)": 22.58, |
|
"step": 17, |
|
"train_speed(iter/s)": 0.03432 |
|
}, |
|
{ |
|
"epoch": 0.09696969696969697, |
|
"grad_norm": 1.5367962214559587, |
|
"learning_rate": 9.948524918598175e-06, |
|
"loss": 3.44189453125, |
|
"memory(GiB)": 22.58, |
|
"step": 18, |
|
"train_speed(iter/s)": 0.034225 |
|
}, |
|
{ |
|
"epoch": 0.10235690235690235, |
|
"grad_norm": 1.2329087385122603, |
|
"learning_rate": 9.934881598487478e-06, |
|
"loss": 3.4072265625, |
|
"memory(GiB)": 22.58, |
|
"step": 19, |
|
"train_speed(iter/s)": 0.034123 |
|
}, |
|
{ |
|
"epoch": 0.10774410774410774, |
|
"grad_norm": 0.8648810367159049, |
|
"learning_rate": 9.91964794299315e-06, |
|
"loss": 3.0048828125, |
|
"memory(GiB)": 22.58, |
|
"step": 20, |
|
"train_speed(iter/s)": 0.03406 |
|
}, |
|
{ |
|
"epoch": 0.11313131313131314, |
|
"grad_norm": 1.1333084548737522, |
|
"learning_rate": 9.902828861376101e-06, |
|
"loss": 2.973876953125, |
|
"memory(GiB)": 22.58, |
|
"step": 21, |
|
"train_speed(iter/s)": 0.03407 |
|
}, |
|
{ |
|
"epoch": 0.11851851851851852, |
|
"grad_norm": 1.67328747436259, |
|
"learning_rate": 9.884429773823238e-06, |
|
"loss": 2.460693359375, |
|
"memory(GiB)": 22.58, |
|
"step": 22, |
|
"train_speed(iter/s)": 0.033985 |
|
}, |
|
{ |
|
"epoch": 0.12390572390572391, |
|
"grad_norm": 0.8370283899907709, |
|
"learning_rate": 9.864456609700726e-06, |
|
"loss": 2.162109375, |
|
"memory(GiB)": 22.58, |
|
"step": 23, |
|
"train_speed(iter/s)": 0.033817 |
|
}, |
|
{ |
|
"epoch": 0.1292929292929293, |
|
"grad_norm": 0.7984037408374535, |
|
"learning_rate": 9.842915805643156e-06, |
|
"loss": 2.711669921875, |
|
"memory(GiB)": 22.58, |
|
"step": 24, |
|
"train_speed(iter/s)": 0.033701 |
|
}, |
|
{ |
|
"epoch": 0.13468013468013468, |
|
"grad_norm": 0.5877571918682093, |
|
"learning_rate": 9.819814303479268e-06, |
|
"loss": 1.707275390625, |
|
"memory(GiB)": 22.58, |
|
"step": 25, |
|
"train_speed(iter/s)": 0.033872 |
|
}, |
|
{ |
|
"epoch": 0.14006734006734006, |
|
"grad_norm": 1.4800629465642858, |
|
"learning_rate": 9.79515954799483e-06, |
|
"loss": 2.813720703125, |
|
"memory(GiB)": 22.58, |
|
"step": 26, |
|
"train_speed(iter/s)": 0.034031 |
|
}, |
|
{ |
|
"epoch": 0.14545454545454545, |
|
"grad_norm": 2.1222533390916443, |
|
"learning_rate": 9.768959484533461e-06, |
|
"loss": 3.59912109375, |
|
"memory(GiB)": 22.58, |
|
"step": 27, |
|
"train_speed(iter/s)": 0.034169 |
|
}, |
|
{ |
|
"epoch": 0.15084175084175083, |
|
"grad_norm": 0.8369490081884605, |
|
"learning_rate": 9.741222556436132e-06, |
|
"loss": 1.89404296875, |
|
"memory(GiB)": 22.58, |
|
"step": 28, |
|
"train_speed(iter/s)": 0.034295 |
|
}, |
|
{ |
|
"epoch": 0.15622895622895622, |
|
"grad_norm": 0.5854633514891076, |
|
"learning_rate": 9.711957702320176e-06, |
|
"loss": 1.986328125, |
|
"memory(GiB)": 22.58, |
|
"step": 29, |
|
"train_speed(iter/s)": 0.034448 |
|
}, |
|
{ |
|
"epoch": 0.16161616161616163, |
|
"grad_norm": 0.35782476089852655, |
|
"learning_rate": 9.681174353198687e-06, |
|
"loss": 2.087890625, |
|
"memory(GiB)": 22.58, |
|
"step": 30, |
|
"train_speed(iter/s)": 0.034568 |
|
}, |
|
{ |
|
"epoch": 0.16700336700336701, |
|
"grad_norm": 0.7861618699933016, |
|
"learning_rate": 9.648882429441258e-06, |
|
"loss": 2.669921875, |
|
"memory(GiB)": 22.58, |
|
"step": 31, |
|
"train_speed(iter/s)": 0.034675 |
|
}, |
|
{ |
|
"epoch": 0.1723905723905724, |
|
"grad_norm": 0.536791680824106, |
|
"learning_rate": 9.615092337576987e-06, |
|
"loss": 2.203125, |
|
"memory(GiB)": 22.58, |
|
"step": 32, |
|
"train_speed(iter/s)": 0.034758 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 1.3726808261834198, |
|
"learning_rate": 9.579814966940833e-06, |
|
"loss": 2.114501953125, |
|
"memory(GiB)": 22.58, |
|
"step": 33, |
|
"train_speed(iter/s)": 0.034839 |
|
}, |
|
{ |
|
"epoch": 0.18316498316498317, |
|
"grad_norm": 0.8535138723050261, |
|
"learning_rate": 9.543061686164374e-06, |
|
"loss": 2.1591796875, |
|
"memory(GiB)": 22.58, |
|
"step": 34, |
|
"train_speed(iter/s)": 0.034969 |
|
}, |
|
{ |
|
"epoch": 0.18855218855218855, |
|
"grad_norm": 0.6726334477065563, |
|
"learning_rate": 9.504844339512096e-06, |
|
"loss": 2.35791015625, |
|
"memory(GiB)": 22.58, |
|
"step": 35, |
|
"train_speed(iter/s)": 0.035076 |
|
}, |
|
{ |
|
"epoch": 0.19393939393939394, |
|
"grad_norm": 0.7227226956981251, |
|
"learning_rate": 9.465175243064428e-06, |
|
"loss": 2.400390625, |
|
"memory(GiB)": 22.58, |
|
"step": 36, |
|
"train_speed(iter/s)": 0.035195 |
|
}, |
|
{ |
|
"epoch": 0.19932659932659932, |
|
"grad_norm": 0.7075241914063357, |
|
"learning_rate": 9.424067180748692e-06, |
|
"loss": 1.476318359375, |
|
"memory(GiB)": 22.58, |
|
"step": 37, |
|
"train_speed(iter/s)": 0.035278 |
|
}, |
|
{ |
|
"epoch": 0.2047138047138047, |
|
"grad_norm": 0.8285808812880359, |
|
"learning_rate": 9.381533400219319e-06, |
|
"loss": 2.50634765625, |
|
"memory(GiB)": 22.58, |
|
"step": 38, |
|
"train_speed(iter/s)": 0.035354 |
|
}, |
|
{ |
|
"epoch": 0.2101010101010101, |
|
"grad_norm": 0.747109858212397, |
|
"learning_rate": 9.337587608588588e-06, |
|
"loss": 2.397216796875, |
|
"memory(GiB)": 22.58, |
|
"step": 39, |
|
"train_speed(iter/s)": 0.035434 |
|
}, |
|
{ |
|
"epoch": 0.21548821548821548, |
|
"grad_norm": 0.8997236382866319, |
|
"learning_rate": 9.292243968009332e-06, |
|
"loss": 2.3466796875, |
|
"memory(GiB)": 22.58, |
|
"step": 40, |
|
"train_speed(iter/s)": 0.035447 |
|
}, |
|
{ |
|
"epoch": 0.22087542087542086, |
|
"grad_norm": 0.3854506877674985, |
|
"learning_rate": 9.24551709111097e-06, |
|
"loss": 1.607421875, |
|
"memory(GiB)": 22.58, |
|
"step": 41, |
|
"train_speed(iter/s)": 0.035398 |
|
}, |
|
{ |
|
"epoch": 0.22626262626262628, |
|
"grad_norm": 0.4259732475000951, |
|
"learning_rate": 9.197422036290386e-06, |
|
"loss": 1.921630859375, |
|
"memory(GiB)": 22.58, |
|
"step": 42, |
|
"train_speed(iter/s)": 0.035349 |
|
}, |
|
{ |
|
"epoch": 0.23164983164983166, |
|
"grad_norm": 0.46150408574103824, |
|
"learning_rate": 9.147974302859158e-06, |
|
"loss": 1.41650390625, |
|
"memory(GiB)": 22.58, |
|
"step": 43, |
|
"train_speed(iter/s)": 0.035321 |
|
}, |
|
{ |
|
"epoch": 0.23703703703703705, |
|
"grad_norm": 0.5918291232050616, |
|
"learning_rate": 9.09718982604866e-06, |
|
"loss": 1.58154296875, |
|
"memory(GiB)": 22.58, |
|
"step": 44, |
|
"train_speed(iter/s)": 0.03529 |
|
}, |
|
{ |
|
"epoch": 0.24242424242424243, |
|
"grad_norm": 1.1984794966626473, |
|
"learning_rate": 9.045084971874738e-06, |
|
"loss": 2.67236328125, |
|
"memory(GiB)": 22.58, |
|
"step": 45, |
|
"train_speed(iter/s)": 0.035244 |
|
}, |
|
{ |
|
"epoch": 0.24781144781144782, |
|
"grad_norm": 0.7304425352094286, |
|
"learning_rate": 8.991676531863507e-06, |
|
"loss": 1.993408203125, |
|
"memory(GiB)": 22.58, |
|
"step": 46, |
|
"train_speed(iter/s)": 0.0352 |
|
}, |
|
{ |
|
"epoch": 0.2531986531986532, |
|
"grad_norm": 0.8247667804924503, |
|
"learning_rate": 8.936981717640061e-06, |
|
"loss": 2.8740234375, |
|
"memory(GiB)": 22.58, |
|
"step": 47, |
|
"train_speed(iter/s)": 0.035111 |
|
}, |
|
{ |
|
"epoch": 0.2585858585858586, |
|
"grad_norm": 1.072788633508109, |
|
"learning_rate": 8.881018155381766e-06, |
|
"loss": 1.845458984375, |
|
"memory(GiB)": 22.58, |
|
"step": 48, |
|
"train_speed(iter/s)": 0.035139 |
|
}, |
|
{ |
|
"epoch": 0.26397306397306397, |
|
"grad_norm": 0.6949566674892941, |
|
"learning_rate": 8.823803880137993e-06, |
|
"loss": 2.345458984375, |
|
"memory(GiB)": 22.58, |
|
"step": 49, |
|
"train_speed(iter/s)": 0.035224 |
|
}, |
|
{ |
|
"epoch": 0.26936026936026936, |
|
"grad_norm": 0.3214051528089464, |
|
"learning_rate": 8.765357330018056e-06, |
|
"loss": 1.640869140625, |
|
"memory(GiB)": 22.58, |
|
"step": 50, |
|
"train_speed(iter/s)": 0.035311 |
|
}, |
|
{ |
|
"epoch": 0.27474747474747474, |
|
"grad_norm": 0.8127331172569063, |
|
"learning_rate": 8.705697340249275e-06, |
|
"loss": 2.334716796875, |
|
"memory(GiB)": 22.58, |
|
"step": 51, |
|
"train_speed(iter/s)": 0.035368 |
|
}, |
|
{ |
|
"epoch": 0.2801346801346801, |
|
"grad_norm": 0.6993353179443554, |
|
"learning_rate": 8.644843137107058e-06, |
|
"loss": 2.2666015625, |
|
"memory(GiB)": 22.58, |
|
"step": 52, |
|
"train_speed(iter/s)": 0.03541 |
|
}, |
|
{ |
|
"epoch": 0.2855218855218855, |
|
"grad_norm": 0.7930646229400613, |
|
"learning_rate": 8.582814331718961e-06, |
|
"loss": 1.73876953125, |
|
"memory(GiB)": 22.58, |
|
"step": 53, |
|
"train_speed(iter/s)": 0.035443 |
|
}, |
|
{ |
|
"epoch": 0.2909090909090909, |
|
"grad_norm": 0.47348696234661886, |
|
"learning_rate": 8.519630913744726e-06, |
|
"loss": 1.8544921875, |
|
"memory(GiB)": 22.58, |
|
"step": 54, |
|
"train_speed(iter/s)": 0.035485 |
|
}, |
|
{ |
|
"epoch": 0.2962962962962963, |
|
"grad_norm": 0.5105789152298116, |
|
"learning_rate": 8.455313244934324e-06, |
|
"loss": 2.10107421875, |
|
"memory(GiB)": 22.58, |
|
"step": 55, |
|
"train_speed(iter/s)": 0.03552 |
|
}, |
|
{ |
|
"epoch": 0.30168350168350166, |
|
"grad_norm": 0.48874730617457113, |
|
"learning_rate": 8.389882052566106e-06, |
|
"loss": 2.19189453125, |
|
"memory(GiB)": 22.58, |
|
"step": 56, |
|
"train_speed(iter/s)": 0.035547 |
|
}, |
|
{ |
|
"epoch": 0.30707070707070705, |
|
"grad_norm": 0.7017590448005361, |
|
"learning_rate": 8.32335842276713e-06, |
|
"loss": 1.605224609375, |
|
"memory(GiB)": 22.58, |
|
"step": 57, |
|
"train_speed(iter/s)": 0.035484 |
|
}, |
|
{ |
|
"epoch": 0.31245791245791243, |
|
"grad_norm": 0.7736924894631574, |
|
"learning_rate": 8.255763793717868e-06, |
|
"loss": 2.123779296875, |
|
"memory(GiB)": 22.58, |
|
"step": 58, |
|
"train_speed(iter/s)": 0.035432 |
|
}, |
|
{ |
|
"epoch": 0.3178451178451178, |
|
"grad_norm": 0.6091631207035194, |
|
"learning_rate": 8.18711994874345e-06, |
|
"loss": 1.8798828125, |
|
"memory(GiB)": 22.58, |
|
"step": 59, |
|
"train_speed(iter/s)": 0.035351 |
|
}, |
|
{ |
|
"epoch": 0.32323232323232326, |
|
"grad_norm": 0.6745360872937951, |
|
"learning_rate": 8.117449009293668e-06, |
|
"loss": 2.36767578125, |
|
"memory(GiB)": 22.58, |
|
"step": 60, |
|
"train_speed(iter/s)": 0.035291 |
|
}, |
|
{ |
|
"epoch": 0.32861952861952864, |
|
"grad_norm": 1.1170607516843722, |
|
"learning_rate": 8.046773427814043e-06, |
|
"loss": 2.153076171875, |
|
"memory(GiB)": 22.58, |
|
"step": 61, |
|
"train_speed(iter/s)": 0.035255 |
|
}, |
|
{ |
|
"epoch": 0.33400673400673403, |
|
"grad_norm": 0.42517306211931166, |
|
"learning_rate": 7.975115980510187e-06, |
|
"loss": 1.717041015625, |
|
"memory(GiB)": 22.58, |
|
"step": 62, |
|
"train_speed(iter/s)": 0.035224 |
|
}, |
|
{ |
|
"epoch": 0.3393939393939394, |
|
"grad_norm": 0.8043024113222557, |
|
"learning_rate": 7.902499760007867e-06, |
|
"loss": 1.85888671875, |
|
"memory(GiB)": 22.58, |
|
"step": 63, |
|
"train_speed(iter/s)": 0.035142 |
|
}, |
|
{ |
|
"epoch": 0.3447811447811448, |
|
"grad_norm": 0.9761638945939747, |
|
"learning_rate": 7.828948167911073e-06, |
|
"loss": 1.906005859375, |
|
"memory(GiB)": 22.58, |
|
"step": 64, |
|
"train_speed(iter/s)": 0.035063 |
|
}, |
|
{ |
|
"epoch": 0.3501683501683502, |
|
"grad_norm": 0.4137734068293326, |
|
"learning_rate": 7.754484907260513e-06, |
|
"loss": 2.05712890625, |
|
"memory(GiB)": 22.58, |
|
"step": 65, |
|
"train_speed(iter/s)": 0.034992 |
|
}, |
|
{ |
|
"epoch": 0.35555555555555557, |
|
"grad_norm": 0.6313489954771672, |
|
"learning_rate": 7.679133974894984e-06, |
|
"loss": 1.56591796875, |
|
"memory(GiB)": 22.58, |
|
"step": 66, |
|
"train_speed(iter/s)": 0.035062 |
|
}, |
|
{ |
|
"epoch": 0.36094276094276095, |
|
"grad_norm": 0.7916770866661113, |
|
"learning_rate": 7.602919653718044e-06, |
|
"loss": 1.32373046875, |
|
"memory(GiB)": 22.58, |
|
"step": 67, |
|
"train_speed(iter/s)": 0.035123 |
|
}, |
|
{ |
|
"epoch": 0.36632996632996634, |
|
"grad_norm": 0.7005145101509135, |
|
"learning_rate": 7.5258665048725065e-06, |
|
"loss": 1.677490234375, |
|
"memory(GiB)": 22.58, |
|
"step": 68, |
|
"train_speed(iter/s)": 0.035192 |
|
}, |
|
{ |
|
"epoch": 0.3717171717171717, |
|
"grad_norm": 0.5600472715983401, |
|
"learning_rate": 7.447999359825263e-06, |
|
"loss": 1.8934326171875, |
|
"memory(GiB)": 22.58, |
|
"step": 69, |
|
"train_speed(iter/s)": 0.035242 |
|
}, |
|
{ |
|
"epoch": 0.3771043771043771, |
|
"grad_norm": 0.7799156688047453, |
|
"learning_rate": 7.369343312364994e-06, |
|
"loss": 1.737060546875, |
|
"memory(GiB)": 22.58, |
|
"step": 70, |
|
"train_speed(iter/s)": 0.035303 |
|
}, |
|
{ |
|
"epoch": 0.3824915824915825, |
|
"grad_norm": 1.0088361337375438, |
|
"learning_rate": 7.289923710515338e-06, |
|
"loss": 2.55859375, |
|
"memory(GiB)": 22.58, |
|
"step": 71, |
|
"train_speed(iter/s)": 0.035339 |
|
}, |
|
{ |
|
"epoch": 0.3878787878787879, |
|
"grad_norm": 0.7778606766770365, |
|
"learning_rate": 7.2097661483661355e-06, |
|
"loss": 2.3927001953125, |
|
"memory(GiB)": 22.58, |
|
"step": 72, |
|
"train_speed(iter/s)": 0.035398 |
|
}, |
|
{ |
|
"epoch": 0.39326599326599326, |
|
"grad_norm": 0.7503526567701239, |
|
"learning_rate": 7.128896457825364e-06, |
|
"loss": 2.4095458984375, |
|
"memory(GiB)": 22.58, |
|
"step": 73, |
|
"train_speed(iter/s)": 0.035435 |
|
}, |
|
{ |
|
"epoch": 0.39865319865319865, |
|
"grad_norm": 0.9293852718192778, |
|
"learning_rate": 7.047340700294454e-06, |
|
"loss": 2.0943603515625, |
|
"memory(GiB)": 22.58, |
|
"step": 74, |
|
"train_speed(iter/s)": 0.035473 |
|
}, |
|
{ |
|
"epoch": 0.40404040404040403, |
|
"grad_norm": 1.2981158494810365, |
|
"learning_rate": 6.965125158269619e-06, |
|
"loss": 2.36279296875, |
|
"memory(GiB)": 22.58, |
|
"step": 75, |
|
"train_speed(iter/s)": 0.035498 |
|
}, |
|
{ |
|
"epoch": 0.4094276094276094, |
|
"grad_norm": 0.5915357318010657, |
|
"learning_rate": 6.88227632687196e-06, |
|
"loss": 1.13037109375, |
|
"memory(GiB)": 22.58, |
|
"step": 76, |
|
"train_speed(iter/s)": 0.035521 |
|
}, |
|
{ |
|
"epoch": 0.4148148148148148, |
|
"grad_norm": 0.8289109263502568, |
|
"learning_rate": 6.798820905309036e-06, |
|
"loss": 2.245849609375, |
|
"memory(GiB)": 22.58, |
|
"step": 77, |
|
"train_speed(iter/s)": 0.035549 |
|
}, |
|
{ |
|
"epoch": 0.4202020202020202, |
|
"grad_norm": 0.7332772758108902, |
|
"learning_rate": 6.714785788270658e-06, |
|
"loss": 1.794189453125, |
|
"memory(GiB)": 22.58, |
|
"step": 78, |
|
"train_speed(iter/s)": 0.035574 |
|
}, |
|
{ |
|
"epoch": 0.4255892255892256, |
|
"grad_norm": 0.8695389561000924, |
|
"learning_rate": 6.63019805726171e-06, |
|
"loss": 2.107177734375, |
|
"memory(GiB)": 22.58, |
|
"step": 79, |
|
"train_speed(iter/s)": 0.035564 |
|
}, |
|
{ |
|
"epoch": 0.43097643097643096, |
|
"grad_norm": 1.0578963540355828, |
|
"learning_rate": 6.545084971874738e-06, |
|
"loss": 2.2099609375, |
|
"memory(GiB)": 22.58, |
|
"step": 80, |
|
"train_speed(iter/s)": 0.035518 |
|
}, |
|
{ |
|
"epoch": 0.43636363636363634, |
|
"grad_norm": 0.5355473518839581, |
|
"learning_rate": 6.459473961005168e-06, |
|
"loss": 1.679931640625, |
|
"memory(GiB)": 22.58, |
|
"step": 81, |
|
"train_speed(iter/s)": 0.035449 |
|
}, |
|
{ |
|
"epoch": 0.4417508417508417, |
|
"grad_norm": 0.47562295475695077, |
|
"learning_rate": 6.373392614011952e-06, |
|
"loss": 1.548828125, |
|
"memory(GiB)": 22.58, |
|
"step": 82, |
|
"train_speed(iter/s)": 0.03541 |
|
}, |
|
{ |
|
"epoch": 0.4471380471380471, |
|
"grad_norm": 1.1873250939202482, |
|
"learning_rate": 6.286868671826513e-06, |
|
"loss": 2.3310546875, |
|
"memory(GiB)": 22.58, |
|
"step": 83, |
|
"train_speed(iter/s)": 0.035383 |
|
}, |
|
{ |
|
"epoch": 0.45252525252525255, |
|
"grad_norm": 0.6325848523967413, |
|
"learning_rate": 6.19993001801283e-06, |
|
"loss": 1.63232421875, |
|
"memory(GiB)": 22.58, |
|
"step": 84, |
|
"train_speed(iter/s)": 0.035357 |
|
}, |
|
{ |
|
"epoch": 0.45791245791245794, |
|
"grad_norm": 0.6180246232374331, |
|
"learning_rate": 6.112604669781572e-06, |
|
"loss": 2.5283203125, |
|
"memory(GiB)": 22.58, |
|
"step": 85, |
|
"train_speed(iter/s)": 0.035328 |
|
}, |
|
{ |
|
"epoch": 0.4632996632996633, |
|
"grad_norm": 0.9254342636136799, |
|
"learning_rate": 6.024920768961153e-06, |
|
"loss": 2.09814453125, |
|
"memory(GiB)": 22.58, |
|
"step": 86, |
|
"train_speed(iter/s)": 0.03531 |
|
}, |
|
{ |
|
"epoch": 0.4686868686868687, |
|
"grad_norm": 1.0220943585915119, |
|
"learning_rate": 5.936906572928625e-06, |
|
"loss": 1.8603515625, |
|
"memory(GiB)": 22.58, |
|
"step": 87, |
|
"train_speed(iter/s)": 0.035243 |
|
}, |
|
{ |
|
"epoch": 0.4740740740740741, |
|
"grad_norm": 0.547874150160307, |
|
"learning_rate": 5.848590445503345e-06, |
|
"loss": 2.2890625, |
|
"memory(GiB)": 22.58, |
|
"step": 88, |
|
"train_speed(iter/s)": 0.03516 |
|
}, |
|
{ |
|
"epoch": 0.4794612794612795, |
|
"grad_norm": 0.7203446700675221, |
|
"learning_rate": 5.760000847806337e-06, |
|
"loss": 1.68115234375, |
|
"memory(GiB)": 22.58, |
|
"step": 89, |
|
"train_speed(iter/s)": 0.035117 |
|
}, |
|
{ |
|
"epoch": 0.48484848484848486, |
|
"grad_norm": 0.7628245708662847, |
|
"learning_rate": 5.671166329088278e-06, |
|
"loss": 2.126953125, |
|
"memory(GiB)": 22.58, |
|
"step": 90, |
|
"train_speed(iter/s)": 0.035147 |
|
}, |
|
{ |
|
"epoch": 0.49023569023569025, |
|
"grad_norm": 0.8089999734614459, |
|
"learning_rate": 5.582115517529114e-06, |
|
"loss": 1.948486328125, |
|
"memory(GiB)": 22.58, |
|
"step": 91, |
|
"train_speed(iter/s)": 0.035179 |
|
}, |
|
{ |
|
"epoch": 0.49562289562289563, |
|
"grad_norm": 0.5039876551970663, |
|
"learning_rate": 5.4928771110122185e-06, |
|
"loss": 1.849853515625, |
|
"memory(GiB)": 22.58, |
|
"step": 92, |
|
"train_speed(iter/s)": 0.035212 |
|
}, |
|
{ |
|
"epoch": 0.501010101010101, |
|
"grad_norm": 0.9008917409254343, |
|
"learning_rate": 5.403479867876087e-06, |
|
"loss": 2.642578125, |
|
"memory(GiB)": 22.58, |
|
"step": 93, |
|
"train_speed(iter/s)": 0.035235 |
|
}, |
|
{ |
|
"epoch": 0.5063973063973064, |
|
"grad_norm": 1.1384096826151604, |
|
"learning_rate": 5.3139525976465675e-06, |
|
"loss": 2.49365234375, |
|
"memory(GiB)": 22.58, |
|
"step": 94, |
|
"train_speed(iter/s)": 0.035265 |
|
}, |
|
{ |
|
"epoch": 0.5117845117845118, |
|
"grad_norm": 0.7491826485818727, |
|
"learning_rate": 5.224324151752575e-06, |
|
"loss": 1.88037109375, |
|
"memory(GiB)": 22.58, |
|
"step": 95, |
|
"train_speed(iter/s)": 0.035291 |
|
}, |
|
{ |
|
"epoch": 0.5171717171717172, |
|
"grad_norm": 0.6169314437426718, |
|
"learning_rate": 5.134623414228315e-06, |
|
"loss": 1.485595703125, |
|
"memory(GiB)": 22.58, |
|
"step": 96, |
|
"train_speed(iter/s)": 0.035322 |
|
}, |
|
{ |
|
"epoch": 0.5225589225589226, |
|
"grad_norm": 0.7458411085328407, |
|
"learning_rate": 5.04487929240499e-06, |
|
"loss": 2.030517578125, |
|
"memory(GiB)": 22.58, |
|
"step": 97, |
|
"train_speed(iter/s)": 0.035358 |
|
}, |
|
{ |
|
"epoch": 0.5279461279461279, |
|
"grad_norm": 0.36969067992245414, |
|
"learning_rate": 4.955120707595011e-06, |
|
"loss": 1.82421875, |
|
"memory(GiB)": 22.58, |
|
"step": 98, |
|
"train_speed(iter/s)": 0.03539 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 0.7184526746731991, |
|
"learning_rate": 4.865376585771687e-06, |
|
"loss": 2.1650390625, |
|
"memory(GiB)": 22.58, |
|
"step": 99, |
|
"train_speed(iter/s)": 0.035417 |
|
}, |
|
{ |
|
"epoch": 0.5387205387205387, |
|
"grad_norm": 0.5860047275017632, |
|
"learning_rate": 4.775675848247427e-06, |
|
"loss": 2.016845703125, |
|
"memory(GiB)": 22.58, |
|
"step": 100, |
|
"train_speed(iter/s)": 0.035451 |
|
}, |
|
{ |
|
"epoch": 0.5441077441077441, |
|
"grad_norm": 0.6740666234718802, |
|
"learning_rate": 4.686047402353433e-06, |
|
"loss": 1.481689453125, |
|
"memory(GiB)": 22.58, |
|
"step": 101, |
|
"train_speed(iter/s)": 0.035484 |
|
}, |
|
{ |
|
"epoch": 0.5494949494949495, |
|
"grad_norm": 0.5962985498733315, |
|
"learning_rate": 4.596520132123915e-06, |
|
"loss": 2.225341796875, |
|
"memory(GiB)": 22.58, |
|
"step": 102, |
|
"train_speed(iter/s)": 0.035522 |
|
}, |
|
{ |
|
"epoch": 0.5548821548821549, |
|
"grad_norm": 0.6185754487719404, |
|
"learning_rate": 4.507122888987782e-06, |
|
"loss": 2.630615234375, |
|
"memory(GiB)": 22.58, |
|
"step": 103, |
|
"train_speed(iter/s)": 0.035566 |
|
}, |
|
{ |
|
"epoch": 0.5602693602693603, |
|
"grad_norm": 0.8891703200104817, |
|
"learning_rate": 4.417884482470887e-06, |
|
"loss": 1.98291015625, |
|
"memory(GiB)": 22.58, |
|
"step": 104, |
|
"train_speed(iter/s)": 0.03558 |
|
}, |
|
{ |
|
"epoch": 0.5656565656565656, |
|
"grad_norm": 0.5620520767612842, |
|
"learning_rate": 4.3288336709117246e-06, |
|
"loss": 1.933349609375, |
|
"memory(GiB)": 22.58, |
|
"step": 105, |
|
"train_speed(iter/s)": 0.035549 |
|
}, |
|
{ |
|
"epoch": 0.571043771043771, |
|
"grad_norm": 1.3690550098042635, |
|
"learning_rate": 4.239999152193664e-06, |
|
"loss": 2.217529296875, |
|
"memory(GiB)": 22.58, |
|
"step": 106, |
|
"train_speed(iter/s)": 0.035527 |
|
}, |
|
{ |
|
"epoch": 0.5764309764309764, |
|
"grad_norm": 0.4160377433886458, |
|
"learning_rate": 4.1514095544966556e-06, |
|
"loss": 1.737060546875, |
|
"memory(GiB)": 22.58, |
|
"step": 107, |
|
"train_speed(iter/s)": 0.03551 |
|
}, |
|
{ |
|
"epoch": 0.5818181818181818, |
|
"grad_norm": 0.8209806760015574, |
|
"learning_rate": 4.063093427071376e-06, |
|
"loss": 2.782470703125, |
|
"memory(GiB)": 22.58, |
|
"step": 108, |
|
"train_speed(iter/s)": 0.035486 |
|
}, |
|
{ |
|
"epoch": 0.5872053872053872, |
|
"grad_norm": 0.726795857048424, |
|
"learning_rate": 3.975079231038848e-06, |
|
"loss": 2.009521484375, |
|
"memory(GiB)": 22.58, |
|
"step": 109, |
|
"train_speed(iter/s)": 0.035449 |
|
}, |
|
{ |
|
"epoch": 0.5925925925925926, |
|
"grad_norm": 1.2624010183388914, |
|
"learning_rate": 3.887395330218429e-06, |
|
"loss": 2.59814453125, |
|
"memory(GiB)": 22.58, |
|
"step": 110, |
|
"train_speed(iter/s)": 0.035431 |
|
}, |
|
{ |
|
"epoch": 0.597979797979798, |
|
"grad_norm": 0.7513165711048129, |
|
"learning_rate": 3.8000699819871704e-06, |
|
"loss": 1.6396484375, |
|
"memory(GiB)": 22.58, |
|
"step": 111, |
|
"train_speed(iter/s)": 0.035402 |
|
}, |
|
{ |
|
"epoch": 0.6033670033670033, |
|
"grad_norm": 0.4587115862936887, |
|
"learning_rate": 3.7131313281734895e-06, |
|
"loss": 2.044189453125, |
|
"memory(GiB)": 22.58, |
|
"step": 112, |
|
"train_speed(iter/s)": 0.035373 |
|
}, |
|
{ |
|
"epoch": 0.6087542087542087, |
|
"grad_norm": 0.41256540620865373, |
|
"learning_rate": 3.62660738598805e-06, |
|
"loss": 1.9287109375, |
|
"memory(GiB)": 22.58, |
|
"step": 113, |
|
"train_speed(iter/s)": 0.03534 |
|
}, |
|
{ |
|
"epoch": 0.6141414141414141, |
|
"grad_norm": 0.4286929355926436, |
|
"learning_rate": 3.540526038994834e-06, |
|
"loss": 1.646728515625, |
|
"memory(GiB)": 22.58, |
|
"step": 114, |
|
"train_speed(iter/s)": 0.035359 |
|
}, |
|
{ |
|
"epoch": 0.6195286195286195, |
|
"grad_norm": 0.8246295061207459, |
|
"learning_rate": 3.4549150281252635e-06, |
|
"loss": 1.7587890625, |
|
"memory(GiB)": 22.58, |
|
"step": 115, |
|
"train_speed(iter/s)": 0.035387 |
|
}, |
|
{ |
|
"epoch": 0.6249158249158249, |
|
"grad_norm": 0.653674454928138, |
|
"learning_rate": 3.3698019427382912e-06, |
|
"loss": 1.9765625, |
|
"memory(GiB)": 22.58, |
|
"step": 116, |
|
"train_speed(iter/s)": 0.035417 |
|
}, |
|
{ |
|
"epoch": 0.6303030303030303, |
|
"grad_norm": 0.6402748838297282, |
|
"learning_rate": 3.2852142117293435e-06, |
|
"loss": 1.94970703125, |
|
"memory(GiB)": 22.58, |
|
"step": 117, |
|
"train_speed(iter/s)": 0.035431 |
|
}, |
|
{ |
|
"epoch": 0.6356902356902356, |
|
"grad_norm": 0.5582058394376362, |
|
"learning_rate": 3.2011790946909673e-06, |
|
"loss": 1.9755859375, |
|
"memory(GiB)": 22.58, |
|
"step": 118, |
|
"train_speed(iter/s)": 0.03546 |
|
}, |
|
{ |
|
"epoch": 0.641077441077441, |
|
"grad_norm": 0.8447371297083311, |
|
"learning_rate": 3.11772367312804e-06, |
|
"loss": 1.784423828125, |
|
"memory(GiB)": 22.58, |
|
"step": 119, |
|
"train_speed(iter/s)": 0.035493 |
|
}, |
|
{ |
|
"epoch": 0.6464646464646465, |
|
"grad_norm": 0.7640836687261319, |
|
"learning_rate": 3.0348748417303826e-06, |
|
"loss": 1.76171875, |
|
"memory(GiB)": 22.58, |
|
"step": 120, |
|
"train_speed(iter/s)": 0.035513 |
|
}, |
|
{ |
|
"epoch": 0.6518518518518519, |
|
"grad_norm": 0.6689239585125656, |
|
"learning_rate": 2.9526592997055488e-06, |
|
"loss": 2.076904296875, |
|
"memory(GiB)": 22.58, |
|
"step": 121, |
|
"train_speed(iter/s)": 0.03554 |
|
}, |
|
{ |
|
"epoch": 0.6572390572390573, |
|
"grad_norm": 0.8205443169011045, |
|
"learning_rate": 2.871103542174637e-06, |
|
"loss": 2.4423828125, |
|
"memory(GiB)": 22.58, |
|
"step": 122, |
|
"train_speed(iter/s)": 0.035564 |
|
}, |
|
{ |
|
"epoch": 0.6626262626262627, |
|
"grad_norm": 0.3861380215034983, |
|
"learning_rate": 2.790233851633868e-06, |
|
"loss": 1.405517578125, |
|
"memory(GiB)": 22.58, |
|
"step": 123, |
|
"train_speed(iter/s)": 0.035589 |
|
}, |
|
{ |
|
"epoch": 0.6680134680134681, |
|
"grad_norm": 0.9319720706049784, |
|
"learning_rate": 2.7100762894846633e-06, |
|
"loss": 1.884033203125, |
|
"memory(GiB)": 22.58, |
|
"step": 124, |
|
"train_speed(iter/s)": 0.035611 |
|
}, |
|
{ |
|
"epoch": 0.6734006734006734, |
|
"grad_norm": 0.4894495365923113, |
|
"learning_rate": 2.6306566876350072e-06, |
|
"loss": 1.992431640625, |
|
"memory(GiB)": 22.58, |
|
"step": 125, |
|
"train_speed(iter/s)": 0.035618 |
|
}, |
|
{ |
|
"epoch": 0.6787878787878788, |
|
"grad_norm": 0.5156966779296556, |
|
"learning_rate": 2.55200064017474e-06, |
|
"loss": 1.7987060546875, |
|
"memory(GiB)": 22.58, |
|
"step": 126, |
|
"train_speed(iter/s)": 0.035593 |
|
}, |
|
{ |
|
"epoch": 0.6841750841750842, |
|
"grad_norm": 0.39627149470201456, |
|
"learning_rate": 2.4741334951274948e-06, |
|
"loss": 1.779541015625, |
|
"memory(GiB)": 22.58, |
|
"step": 127, |
|
"train_speed(iter/s)": 0.035563 |
|
}, |
|
{ |
|
"epoch": 0.6895622895622896, |
|
"grad_norm": 0.7990132228587018, |
|
"learning_rate": 2.3970803462819586e-06, |
|
"loss": 2.385498046875, |
|
"memory(GiB)": 22.58, |
|
"step": 128, |
|
"train_speed(iter/s)": 0.035533 |
|
}, |
|
{ |
|
"epoch": 0.694949494949495, |
|
"grad_norm": 0.542336867995926, |
|
"learning_rate": 2.320866025105016e-06, |
|
"loss": 1.775390625, |
|
"memory(GiB)": 22.58, |
|
"step": 129, |
|
"train_speed(iter/s)": 0.035487 |
|
}, |
|
{ |
|
"epoch": 0.7003367003367004, |
|
"grad_norm": 0.40553603638944413, |
|
"learning_rate": 2.245515092739488e-06, |
|
"loss": 1.65771484375, |
|
"memory(GiB)": 22.58, |
|
"step": 130, |
|
"train_speed(iter/s)": 0.035457 |
|
}, |
|
{ |
|
"epoch": 0.7057239057239058, |
|
"grad_norm": 0.5705311307759141, |
|
"learning_rate": 2.171051832088928e-06, |
|
"loss": 1.392578125, |
|
"memory(GiB)": 22.58, |
|
"step": 131, |
|
"train_speed(iter/s)": 0.035439 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 0.5637194621292295, |
|
"learning_rate": 2.097500239992132e-06, |
|
"loss": 1.808349609375, |
|
"memory(GiB)": 22.58, |
|
"step": 132, |
|
"train_speed(iter/s)": 0.035412 |
|
}, |
|
{ |
|
"epoch": 0.7164983164983165, |
|
"grad_norm": 1.0166298249729564, |
|
"learning_rate": 2.0248840194898155e-06, |
|
"loss": 1.88232421875, |
|
"memory(GiB)": 22.58, |
|
"step": 133, |
|
"train_speed(iter/s)": 0.035367 |
|
}, |
|
{ |
|
"epoch": 0.7218855218855219, |
|
"grad_norm": 0.365517442677317, |
|
"learning_rate": 1.95322657218596e-06, |
|
"loss": 1.8359375, |
|
"memory(GiB)": 22.58, |
|
"step": 134, |
|
"train_speed(iter/s)": 0.035304 |
|
}, |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 0.5937921378630181, |
|
"learning_rate": 1.8825509907063328e-06, |
|
"loss": 2.16162109375, |
|
"memory(GiB)": 22.58, |
|
"step": 135, |
|
"train_speed(iter/s)": 0.035244 |
|
}, |
|
{ |
|
"epoch": 0.7326599326599327, |
|
"grad_norm": 0.5630691840328598, |
|
"learning_rate": 1.8128800512565514e-06, |
|
"loss": 1.953369140625, |
|
"memory(GiB)": 22.58, |
|
"step": 136, |
|
"train_speed(iter/s)": 0.035272 |
|
}, |
|
{ |
|
"epoch": 0.7380471380471381, |
|
"grad_norm": 0.9036946278139879, |
|
"learning_rate": 1.7442362062821323e-06, |
|
"loss": 3.1923828125, |
|
"memory(GiB)": 22.58, |
|
"step": 137, |
|
"train_speed(iter/s)": 0.035287 |
|
}, |
|
{ |
|
"epoch": 0.7434343434343434, |
|
"grad_norm": 0.5335511498935785, |
|
"learning_rate": 1.6766415772328732e-06, |
|
"loss": 1.705322265625, |
|
"memory(GiB)": 22.58, |
|
"step": 138, |
|
"train_speed(iter/s)": 0.035295 |
|
}, |
|
{ |
|
"epoch": 0.7488215488215488, |
|
"grad_norm": 0.8149099249815346, |
|
"learning_rate": 1.610117947433897e-06, |
|
"loss": 2.81689453125, |
|
"memory(GiB)": 22.58, |
|
"step": 139, |
|
"train_speed(iter/s)": 0.035308 |
|
}, |
|
{ |
|
"epoch": 0.7542087542087542, |
|
"grad_norm": 0.5287002241309334, |
|
"learning_rate": 1.544686755065677e-06, |
|
"loss": 1.266357421875, |
|
"memory(GiB)": 22.58, |
|
"step": 140, |
|
"train_speed(iter/s)": 0.035318 |
|
}, |
|
{ |
|
"epoch": 0.7595959595959596, |
|
"grad_norm": 0.6139302197140588, |
|
"learning_rate": 1.4803690862552755e-06, |
|
"loss": 1.817626953125, |
|
"memory(GiB)": 22.58, |
|
"step": 141, |
|
"train_speed(iter/s)": 0.035343 |
|
}, |
|
{ |
|
"epoch": 0.764983164983165, |
|
"grad_norm": 0.6333656991964685, |
|
"learning_rate": 1.4171856682810386e-06, |
|
"loss": 2.101806640625, |
|
"memory(GiB)": 22.58, |
|
"step": 142, |
|
"train_speed(iter/s)": 0.035364 |
|
}, |
|
{ |
|
"epoch": 0.7703703703703704, |
|
"grad_norm": 0.8829740683592863, |
|
"learning_rate": 1.3551568628929434e-06, |
|
"loss": 2.508056640625, |
|
"memory(GiB)": 22.58, |
|
"step": 143, |
|
"train_speed(iter/s)": 0.03539 |
|
}, |
|
{ |
|
"epoch": 0.7757575757575758, |
|
"grad_norm": 0.5801508492146695, |
|
"learning_rate": 1.2943026597507268e-06, |
|
"loss": 1.6142578125, |
|
"memory(GiB)": 22.58, |
|
"step": 144, |
|
"train_speed(iter/s)": 0.035413 |
|
}, |
|
{ |
|
"epoch": 0.7811447811447811, |
|
"grad_norm": 0.48056036748223746, |
|
"learning_rate": 1.234642669981946e-06, |
|
"loss": 1.942138671875, |
|
"memory(GiB)": 22.58, |
|
"step": 145, |
|
"train_speed(iter/s)": 0.035431 |
|
}, |
|
{ |
|
"epoch": 0.7865319865319865, |
|
"grad_norm": 0.5473637984491948, |
|
"learning_rate": 1.1761961198620081e-06, |
|
"loss": 1.748779296875, |
|
"memory(GiB)": 22.58, |
|
"step": 146, |
|
"train_speed(iter/s)": 0.035455 |
|
}, |
|
{ |
|
"epoch": 0.7919191919191919, |
|
"grad_norm": 0.7226102542834439, |
|
"learning_rate": 1.118981844618236e-06, |
|
"loss": 1.657470703125, |
|
"memory(GiB)": 22.58, |
|
"step": 147, |
|
"train_speed(iter/s)": 0.035472 |
|
}, |
|
{ |
|
"epoch": 0.7973063973063973, |
|
"grad_norm": 0.677002948688539, |
|
"learning_rate": 1.06301828235994e-06, |
|
"loss": 1.730224609375, |
|
"memory(GiB)": 22.58, |
|
"step": 148, |
|
"train_speed(iter/s)": 0.035492 |
|
}, |
|
{ |
|
"epoch": 0.8026936026936027, |
|
"grad_norm": 0.4690206204454014, |
|
"learning_rate": 1.0083234681364934e-06, |
|
"loss": 1.97509765625, |
|
"memory(GiB)": 22.58, |
|
"step": 149, |
|
"train_speed(iter/s)": 0.035513 |
|
}, |
|
{ |
|
"epoch": 0.8080808080808081, |
|
"grad_norm": 0.38431237166068455, |
|
"learning_rate": 9.549150281252633e-07, |
|
"loss": 1.977783203125, |
|
"memory(GiB)": 22.58, |
|
"step": 150, |
|
"train_speed(iter/s)": 0.035533 |
|
}, |
|
{ |
|
"epoch": 0.8134680134680135, |
|
"grad_norm": 1.4318443328161967, |
|
"learning_rate": 9.028101739513406e-07, |
|
"loss": 2.696533203125, |
|
"memory(GiB)": 22.58, |
|
"step": 151, |
|
"train_speed(iter/s)": 0.035549 |
|
}, |
|
{ |
|
"epoch": 0.8188552188552188, |
|
"grad_norm": 0.39825000243591335, |
|
"learning_rate": 8.520256971408453e-07, |
|
"loss": 1.52294921875, |
|
"memory(GiB)": 22.58, |
|
"step": 152, |
|
"train_speed(iter/s)": 0.035566 |
|
}, |
|
{ |
|
"epoch": 0.8242424242424242, |
|
"grad_norm": 0.403223921534723, |
|
"learning_rate": 8.025779637096138e-07, |
|
"loss": 2.0869140625, |
|
"memory(GiB)": 22.58, |
|
"step": 153, |
|
"train_speed(iter/s)": 0.035581 |
|
}, |
|
{ |
|
"epoch": 0.8296296296296296, |
|
"grad_norm": 0.39408518518211616, |
|
"learning_rate": 7.544829088890326e-07, |
|
"loss": 2.085693359375, |
|
"memory(GiB)": 22.58, |
|
"step": 154, |
|
"train_speed(iter/s)": 0.035601 |
|
}, |
|
{ |
|
"epoch": 0.835016835016835, |
|
"grad_norm": 0.6580639598152973, |
|
"learning_rate": 7.077560319906696e-07, |
|
"loss": 1.58740234375, |
|
"memory(GiB)": 22.58, |
|
"step": 155, |
|
"train_speed(iter/s)": 0.035601 |
|
}, |
|
{ |
|
"epoch": 0.8404040404040404, |
|
"grad_norm": 0.5455643216936216, |
|
"learning_rate": 6.624123914114122e-07, |
|
"loss": 1.76953125, |
|
"memory(GiB)": 22.58, |
|
"step": 156, |
|
"train_speed(iter/s)": 0.035584 |
|
}, |
|
{ |
|
"epoch": 0.8457912457912458, |
|
"grad_norm": 0.9580661740362665, |
|
"learning_rate": 6.184665997806832e-07, |
|
"loss": 2.3505859375, |
|
"memory(GiB)": 22.58, |
|
"step": 157, |
|
"train_speed(iter/s)": 0.035562 |
|
}, |
|
{ |
|
"epoch": 0.8511784511784511, |
|
"grad_norm": 0.49273093322057226, |
|
"learning_rate": 5.759328192513075e-07, |
|
"loss": 1.632080078125, |
|
"memory(GiB)": 22.58, |
|
"step": 158, |
|
"train_speed(iter/s)": 0.035543 |
|
}, |
|
{ |
|
"epoch": 0.8565656565656565, |
|
"grad_norm": 0.5074137587596991, |
|
"learning_rate": 5.348247569355736e-07, |
|
"loss": 1.71240234375, |
|
"memory(GiB)": 22.58, |
|
"step": 159, |
|
"train_speed(iter/s)": 0.03552 |
|
}, |
|
{ |
|
"epoch": 0.8619528619528619, |
|
"grad_norm": 0.7185716029221749, |
|
"learning_rate": 4.951556604879049e-07, |
|
"loss": 2.36669921875, |
|
"memory(GiB)": 22.58, |
|
"step": 160, |
|
"train_speed(iter/s)": 0.035503 |
|
}, |
|
{ |
|
"epoch": 0.8673400673400673, |
|
"grad_norm": 0.7811542866299452, |
|
"learning_rate": 4.569383138356276e-07, |
|
"loss": 1.678955078125, |
|
"memory(GiB)": 22.58, |
|
"step": 161, |
|
"train_speed(iter/s)": 0.035485 |
|
}, |
|
{ |
|
"epoch": 0.8727272727272727, |
|
"grad_norm": 0.5128778192942757, |
|
"learning_rate": 4.201850330591678e-07, |
|
"loss": 2.072998046875, |
|
"memory(GiB)": 22.58, |
|
"step": 162, |
|
"train_speed(iter/s)": 0.035459 |
|
}, |
|
{ |
|
"epoch": 0.8781144781144781, |
|
"grad_norm": 0.6851552480944826, |
|
"learning_rate": 3.8490766242301356e-07, |
|
"loss": 1.55322265625, |
|
"memory(GiB)": 22.58, |
|
"step": 163, |
|
"train_speed(iter/s)": 0.035422 |
|
}, |
|
{ |
|
"epoch": 0.8835016835016835, |
|
"grad_norm": 1.0656634793505568, |
|
"learning_rate": 3.511175705587433e-07, |
|
"loss": 2.09228515625, |
|
"memory(GiB)": 22.58, |
|
"step": 164, |
|
"train_speed(iter/s)": 0.035412 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 0.4704867924767551, |
|
"learning_rate": 3.18825646801314e-07, |
|
"loss": 2.178955078125, |
|
"memory(GiB)": 22.58, |
|
"step": 165, |
|
"train_speed(iter/s)": 0.03542 |
|
}, |
|
{ |
|
"epoch": 0.8942760942760942, |
|
"grad_norm": 0.3438531193817133, |
|
"learning_rate": 2.8804229767982637e-07, |
|
"loss": 1.828125, |
|
"memory(GiB)": 22.58, |
|
"step": 166, |
|
"train_speed(iter/s)": 0.035441 |
|
}, |
|
{ |
|
"epoch": 0.8996632996632996, |
|
"grad_norm": 0.9072486327466182, |
|
"learning_rate": 2.587774435638679e-07, |
|
"loss": 1.902099609375, |
|
"memory(GiB)": 22.58, |
|
"step": 167, |
|
"train_speed(iter/s)": 0.035458 |
|
}, |
|
{ |
|
"epoch": 0.9050505050505051, |
|
"grad_norm": 0.40209833194248146, |
|
"learning_rate": 2.3104051546654016e-07, |
|
"loss": 1.72314453125, |
|
"memory(GiB)": 22.58, |
|
"step": 168, |
|
"train_speed(iter/s)": 0.035472 |
|
}, |
|
{ |
|
"epoch": 0.9104377104377105, |
|
"grad_norm": 0.6534758670706157, |
|
"learning_rate": 2.0484045200517222e-07, |
|
"loss": 1.73095703125, |
|
"memory(GiB)": 22.58, |
|
"step": 169, |
|
"train_speed(iter/s)": 0.03548 |
|
}, |
|
{ |
|
"epoch": 0.9158249158249159, |
|
"grad_norm": 0.36229213242531244, |
|
"learning_rate": 1.801856965207338e-07, |
|
"loss": 1.954345703125, |
|
"memory(GiB)": 22.58, |
|
"step": 170, |
|
"train_speed(iter/s)": 0.035499 |
|
}, |
|
{ |
|
"epoch": 0.9212121212121213, |
|
"grad_norm": 0.41462023840060064, |
|
"learning_rate": 1.5708419435684463e-07, |
|
"loss": 1.726318359375, |
|
"memory(GiB)": 22.58, |
|
"step": 171, |
|
"train_speed(iter/s)": 0.035512 |
|
}, |
|
{ |
|
"epoch": 0.9265993265993266, |
|
"grad_norm": 0.793282464162017, |
|
"learning_rate": 1.3554339029927532e-07, |
|
"loss": 2.07861328125, |
|
"memory(GiB)": 22.58, |
|
"step": 172, |
|
"train_speed(iter/s)": 0.035526 |
|
}, |
|
{ |
|
"epoch": 0.931986531986532, |
|
"grad_norm": 0.4924403822397691, |
|
"learning_rate": 1.1557022617676217e-07, |
|
"loss": 1.400634765625, |
|
"memory(GiB)": 22.58, |
|
"step": 173, |
|
"train_speed(iter/s)": 0.035541 |
|
}, |
|
{ |
|
"epoch": 0.9373737373737374, |
|
"grad_norm": 0.41980069690346106, |
|
"learning_rate": 9.717113862389993e-08, |
|
"loss": 2.12158203125, |
|
"memory(GiB)": 22.58, |
|
"step": 174, |
|
"train_speed(iter/s)": 0.03556 |
|
}, |
|
{ |
|
"epoch": 0.9427609427609428, |
|
"grad_norm": 0.8809220146060189, |
|
"learning_rate": 8.035205700685167e-08, |
|
"loss": 2.621826171875, |
|
"memory(GiB)": 22.58, |
|
"step": 175, |
|
"train_speed(iter/s)": 0.035577 |
|
}, |
|
{ |
|
"epoch": 0.9481481481481482, |
|
"grad_norm": 0.6908254679787823, |
|
"learning_rate": 6.511840151252169e-08, |
|
"loss": 1.813232421875, |
|
"memory(GiB)": 22.58, |
|
"step": 176, |
|
"train_speed(iter/s)": 0.035597 |
|
}, |
|
{ |
|
"epoch": 0.9535353535353536, |
|
"grad_norm": 0.49484208186969647, |
|
"learning_rate": 5.1475081401825553e-08, |
|
"loss": 1.9814453125, |
|
"memory(GiB)": 22.58, |
|
"step": 177, |
|
"train_speed(iter/s)": 0.035578 |
|
}, |
|
{ |
|
"epoch": 0.958922558922559, |
|
"grad_norm": 0.6989450753180266, |
|
"learning_rate": 3.9426493427611177e-08, |
|
"loss": 1.78466796875, |
|
"memory(GiB)": 22.58, |
|
"step": 178, |
|
"train_speed(iter/s)": 0.035563 |
|
}, |
|
{ |
|
"epoch": 0.9643097643097643, |
|
"grad_norm": 0.5543481036485521, |
|
"learning_rate": 2.8976520417742794e-08, |
|
"loss": 1.727783203125, |
|
"memory(GiB)": 22.58, |
|
"step": 179, |
|
"train_speed(iter/s)": 0.035552 |
|
}, |
|
{ |
|
"epoch": 0.9696969696969697, |
|
"grad_norm": 0.5545843045026326, |
|
"learning_rate": 2.012853002380466e-08, |
|
"loss": 1.75634765625, |
|
"memory(GiB)": 22.58, |
|
"step": 180, |
|
"train_speed(iter/s)": 0.035543 |
|
}, |
|
{ |
|
"epoch": 0.9750841750841751, |
|
"grad_norm": 0.3433152184276571, |
|
"learning_rate": 1.2885373635829756e-08, |
|
"loss": 1.64208984375, |
|
"memory(GiB)": 22.58, |
|
"step": 181, |
|
"train_speed(iter/s)": 0.035533 |
|
}, |
|
{ |
|
"epoch": 0.9804713804713805, |
|
"grad_norm": 0.9002229182397717, |
|
"learning_rate": 7.249385463395375e-09, |
|
"loss": 2.177490234375, |
|
"memory(GiB)": 22.58, |
|
"step": 182, |
|
"train_speed(iter/s)": 0.035517 |
|
}, |
|
{ |
|
"epoch": 0.9858585858585859, |
|
"grad_norm": 0.5840020558119475, |
|
"learning_rate": 3.2223817833931803e-09, |
|
"loss": 1.4775390625, |
|
"memory(GiB)": 22.58, |
|
"step": 183, |
|
"train_speed(iter/s)": 0.035499 |
|
}, |
|
{ |
|
"epoch": 0.9912457912457913, |
|
"grad_norm": 0.31651969118225726, |
|
"learning_rate": 8.056603547090813e-10, |
|
"loss": 1.804931640625, |
|
"memory(GiB)": 22.58, |
|
"step": 184, |
|
"train_speed(iter/s)": 0.035513 |
|
}, |
|
{ |
|
"epoch": 0.9966329966329966, |
|
"grad_norm": 0.5699524292753597, |
|
"learning_rate": 0.0, |
|
"loss": 1.653076171875, |
|
"memory(GiB)": 22.58, |
|
"step": 185, |
|
"train_speed(iter/s)": 0.035529 |
|
}, |
|
{ |
|
"epoch": 0.9966329966329966, |
|
"eval_loss": 0.12199707329273224, |
|
"eval_runtime": 16.4404, |
|
"eval_samples_per_second": 1.825, |
|
"eval_steps_per_second": 1.825, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.9966329966329966, |
|
"eval_loss": 0.12199707329273224, |
|
"eval_runtime": 18.3596, |
|
"eval_samples_per_second": 1.634, |
|
"eval_steps_per_second": 1.634, |
|
"step": 185 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 185, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 664501364736.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|