Vigogne_Qwen2.5-1.5B / trainer_state.json

upload

52a3f37 verified 4 months ago

22.4 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 3.0,
	"eval_steps": 500.0,
	"global_step": 6237,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.02405002405002405,
	"grad_norm": 0.4139963388442993,
	"learning_rate": 0.00019996828714700116,
	"loss": 1.5971,
	"step": 50
	},
	{
	"epoch": 0.0481000481000481,
	"grad_norm": 0.3423018157482147,
	"learning_rate": 0.00019987316870210547,
	"loss": 1.274,
	"step": 100
	},
	{
	"epoch": 0.07215007215007214,
	"grad_norm": 0.3551710247993469,
	"learning_rate": 0.0001997147049948582,
	"loss": 1.2519,
	"step": 150
	},
	{
	"epoch": 0.0962000962000962,
	"grad_norm": 0.32329073548316956,
	"learning_rate": 0.0001994929965319844,
	"loss": 1.2382,
	"step": 200
	},
	{
	"epoch": 0.12025012025012025,
	"grad_norm": 0.48585018515586853,
	"learning_rate": 0.0001992081839336419,
	"loss": 1.2293,
	"step": 250
	},
	{
	"epoch": 0.1443001443001443,
	"grad_norm": 0.40136224031448364,
	"learning_rate": 0.00019886044784423197,
	"loss": 1.2214,
	"step": 300
	},
	{
	"epoch": 0.16835016835016836,
	"grad_norm": 0.574002206325531,
	"learning_rate": 0.00019845000881782432,
	"loss": 1.2184,
	"step": 350
	},
	{
	"epoch": 0.1924001924001924,
	"grad_norm": 0.4179827570915222,
	"learning_rate": 0.00019797712717826914,
	"loss": 1.2064,
	"step": 400
	},
	{
	"epoch": 0.21645021645021645,
	"grad_norm": 0.33033809065818787,
	"learning_rate": 0.00019744210285408488,
	"loss": 1.2055,
	"step": 450
	},
	{
	"epoch": 0.2405002405002405,
	"grad_norm": 0.2719138562679291,
	"learning_rate": 0.0001968452751882264,
	"loss": 1.2077,
	"step": 500
	},
	{
	"epoch": 0.26455026455026454,
	"grad_norm": 0.29797521233558655,
	"learning_rate": 0.00019618702272285434,
	"loss": 1.2096,
	"step": 550
	},
	{
	"epoch": 0.2886002886002886,
	"grad_norm": 0.3336372673511505,
	"learning_rate": 0.00019546776295924212,
	"loss": 1.2072,
	"step": 600
	},
	{
	"epoch": 0.3126503126503126,
	"grad_norm": 0.26755037903785706,
	"learning_rate": 0.0001946879520929728,
	"loss": 1.1974,
	"step": 650
	},
	{
	"epoch": 0.3367003367003367,
	"grad_norm": 0.36268576979637146,
	"learning_rate": 0.00019384808472459368,
	"loss": 1.2045,
	"step": 700
	},
	{
	"epoch": 0.36075036075036077,
	"grad_norm": 0.3121575713157654,
	"learning_rate": 0.0001929486935459127,
	"loss": 1.1889,
	"step": 750
	},
	{
	"epoch": 0.3848003848003848,
	"grad_norm": 0.3159404993057251,
	"learning_rate": 0.00019199034900213452,
	"loss": 1.1921,
	"step": 800
	},
	{
	"epoch": 0.40885040885040885,
	"grad_norm": 0.7236579060554504,
	"learning_rate": 0.000190973658930052,
	"loss": 1.194,
	"step": 850
	},
	{
	"epoch": 0.4329004329004329,
	"grad_norm": 0.24907168745994568,
	"learning_rate": 0.00018989926817252113,
	"loss": 1.191,
	"step": 900
	},
	{
	"epoch": 0.45695045695045694,
	"grad_norm": 0.24481187760829926,
	"learning_rate": 0.00018876785816946505,
	"loss": 1.1857,
	"step": 950
	},
	{
	"epoch": 0.481000481000481,
	"grad_norm": 0.2668200731277466,
	"learning_rate": 0.00018758014652566597,
	"loss": 1.1957,
	"step": 1000
	},
	{
	"epoch": 0.5050505050505051,
	"grad_norm": 0.2687171399593353,
	"learning_rate": 0.0001863368865556191,
	"loss": 1.1864,
	"step": 1050
	},
	{
	"epoch": 0.5291005291005291,
	"grad_norm": 0.23915782570838928,
	"learning_rate": 0.0001850388668057379,
	"loss": 1.184,
	"step": 1100
	},
	{
	"epoch": 0.5531505531505532,
	"grad_norm": 0.37159469723701477,
	"learning_rate": 0.0001836869105542127,
	"loss": 1.1849,
	"step": 1150
	},
	{
	"epoch": 0.5772005772005772,
	"grad_norm": 0.2752649784088135,
	"learning_rate": 0.0001822818752888408,
	"loss": 1.1843,
	"step": 1200
	},
	{
	"epoch": 0.6012506012506013,
	"grad_norm": 0.19733025133609772,
	"learning_rate": 0.00018082465216315882,
	"loss": 1.1766,
	"step": 1250
	},
	{
	"epoch": 0.6253006253006252,
	"grad_norm": 0.2180165797472,
	"learning_rate": 0.00017931616543122214,
	"loss": 1.1865,
	"step": 1300
	},
	{
	"epoch": 0.6493506493506493,
	"grad_norm": 0.25025510787963867,
	"learning_rate": 0.00017775737186139038,
	"loss": 1.1723,
	"step": 1350
	},
	{
	"epoch": 0.6734006734006734,
	"grad_norm": 0.2865007817745209,
	"learning_rate": 0.00017614926012949028,
	"loss": 1.172,
	"step": 1400
	},
	{
	"epoch": 0.6974506974506974,
	"grad_norm": 0.3406023681163788,
	"learning_rate": 0.00017449285019174098,
	"loss": 1.1795,
	"step": 1450
	},
	{
	"epoch": 0.7215007215007215,
	"grad_norm": 0.19766800105571747,
	"learning_rate": 0.00017278919263783978,
	"loss": 1.1784,
	"step": 1500
	},
	{
	"epoch": 0.7455507455507455,
	"grad_norm": 0.1965962052345276,
	"learning_rate": 0.00017103936802461797,
	"loss": 1.1754,
	"step": 1550
	},
	{
	"epoch": 0.7696007696007696,
	"grad_norm": 0.2381555736064911,
	"learning_rate": 0.00016924448619069023,
	"loss": 1.1671,
	"step": 1600
	},
	{
	"epoch": 0.7936507936507936,
	"grad_norm": 0.20156389474868774,
	"learning_rate": 0.00016740568555253155,
	"loss": 1.1738,
	"step": 1650
	},
	{
	"epoch": 0.8177008177008177,
	"grad_norm": 0.18294361233711243,
	"learning_rate": 0.00016552413238242857,
	"loss": 1.1727,
	"step": 1700
	},
	{
	"epoch": 0.8417508417508418,
	"grad_norm": 0.2975623309612274,
	"learning_rate": 0.00016360102006876317,
	"loss": 1.1677,
	"step": 1750
	},
	{
	"epoch": 0.8658008658008658,
	"grad_norm": 0.1871371865272522,
	"learning_rate": 0.0001616375683590974,
	"loss": 1.1689,
	"step": 1800
	},
	{
	"epoch": 0.8898508898508899,
	"grad_norm": 0.21457934379577637,
	"learning_rate": 0.00015963502258654005,
	"loss": 1.1605,
	"step": 1850
	},
	{
	"epoch": 0.9139009139009139,
	"grad_norm": 0.20261706411838531,
	"learning_rate": 0.0001575946528798853,
	"loss": 1.1627,
	"step": 1900
	},
	{
	"epoch": 0.937950937950938,
	"grad_norm": 0.17685186862945557,
	"learning_rate": 0.0001555177533580245,
	"loss": 1.1627,
	"step": 1950
	},
	{
	"epoch": 0.962000962000962,
	"grad_norm": 0.212468221783638,
	"learning_rate": 0.00015340564130914233,
	"loss": 1.161,
	"step": 2000
	},
	{
	"epoch": 0.9860509860509861,
	"grad_norm": 0.175174742937088,
	"learning_rate": 0.00015125965635521724,
	"loss": 1.1688,
	"step": 2050
	},
	{
	"epoch": 1.0101010101010102,
	"grad_norm": 0.19970253109931946,
	"learning_rate": 0.00014908115960235682,
	"loss": 1.142,
	"step": 2100
	},
	{
	"epoch": 1.034151034151034,
	"grad_norm": 0.21254608035087585,
	"learning_rate": 0.00014687153277750676,
	"loss": 1.1271,
	"step": 2150
	},
	{
	"epoch": 1.0582010582010581,
	"grad_norm": 0.1651500016450882,
	"learning_rate": 0.00014463217735208062,
	"loss": 1.121,
	"step": 2200
	},
	{
	"epoch": 1.0822510822510822,
	"grad_norm": 0.2405405044555664,
	"learning_rate": 0.00014236451365306674,
	"loss": 1.1313,
	"step": 2250
	},
	{
	"epoch": 1.1063011063011063,
	"grad_norm": 0.17223596572875977,
	"learning_rate": 0.00014006997996217593,
	"loss": 1.1344,
	"step": 2300
	},
	{
	"epoch": 1.1303511303511304,
	"grad_norm": 0.1969347894191742,
	"learning_rate": 0.00013775003160360096,
	"loss": 1.1176,
	"step": 2350
	},
	{
	"epoch": 1.1544011544011543,
	"grad_norm": 0.187143936753273,
	"learning_rate": 0.00013540614002096701,
	"loss": 1.1322,
	"step": 2400
	},
	{
	"epoch": 1.1784511784511784,
	"grad_norm": 0.1838238537311554,
	"learning_rate": 0.00013303979184405826,
	"loss": 1.1293,
	"step": 2450
	},
	{
	"epoch": 1.2025012025012025,
	"grad_norm": 0.17928341031074524,
	"learning_rate": 0.00013065248794591223,
	"loss": 1.1268,
	"step": 2500
	},
	{
	"epoch": 1.2265512265512266,
	"grad_norm": 0.2683047950267792,
	"learning_rate": 0.00012824574249088063,
	"loss": 1.1234,
	"step": 2550
	},
	{
	"epoch": 1.2506012506012505,
	"grad_norm": 0.18034860491752625,
	"learning_rate": 0.0001258210819742599,
	"loss": 1.125,
	"step": 2600
	},
	{
	"epoch": 1.2746512746512746,
	"grad_norm": 0.26357391476631165,
	"learning_rate": 0.00012338004425410074,
	"loss": 1.1217,
	"step": 2650
	},
	{
	"epoch": 1.2987012987012987,
	"grad_norm": 0.17828579246997833,
	"learning_rate": 0.00012092417757581085,
	"loss": 1.1262,
	"step": 2700
	},
	{
	"epoch": 1.3227513227513228,
	"grad_norm": 0.20247310400009155,
	"learning_rate": 0.00011845503959016928,
	"loss": 1.1246,
	"step": 2750
	},
	{
	"epoch": 1.3468013468013469,
	"grad_norm": 0.17381271719932556,
	"learning_rate": 0.0001159741963653755,
	"loss": 1.1181,
	"step": 2800
	},
	{
	"epoch": 1.370851370851371,
	"grad_norm": 0.19958114624023438,
	"learning_rate": 0.00011348322139375948,
	"loss": 1.1307,
	"step": 2850
	},
	{
	"epoch": 1.3949013949013949,
	"grad_norm": 0.21912401914596558,
	"learning_rate": 0.00011098369459378328,
	"loss": 1.1264,
	"step": 2900
	},
	{
	"epoch": 1.418951418951419,
	"grad_norm": 0.1694297194480896,
	"learning_rate": 0.00010847720130796631,
	"loss": 1.1256,
	"step": 2950
	},
	{
	"epoch": 1.443001443001443,
	"grad_norm": 0.13446395099163055,
	"learning_rate": 0.00010596533129737092,
	"loss": 1.1258,
	"step": 3000
	},
	{
	"epoch": 1.467051467051467,
	"grad_norm": 0.140371173620224,
	"learning_rate": 0.00010344967773328507,
	"loss": 1.1191,
	"step": 3050
	},
	{
	"epoch": 1.491101491101491,
	"grad_norm": 0.18016813695430756,
	"learning_rate": 0.00010093183618674224,
	"loss": 1.114,
	"step": 3100
	},
	{
	"epoch": 1.5151515151515151,
	"grad_norm": 0.17306862771511078,
	"learning_rate": 9.84134036165192e-05,
	"loss": 1.1149,
	"step": 3150
	},
	{
	"epoch": 1.5392015392015392,
	"grad_norm": 0.14116255939006805,
	"learning_rate": 9.589597735625377e-05,
	"loss": 1.123,
	"step": 3200
	},
	{
	"epoch": 1.5632515632515633,
	"grad_norm": 0.16819800436496735,
	"learning_rate": 9.338115410132441e-05,
	"loss": 1.1203,
	"step": 3250
	},
	{
	"epoch": 1.5873015873015874,
	"grad_norm": 0.21958529949188232,
	"learning_rate": 9.087052889613518e-05,
	"loss": 1.1226,
	"step": 3300
	},
	{
	"epoch": 1.6113516113516113,
	"grad_norm": 0.15786272287368774,
	"learning_rate": 8.836569412244745e-05,
	"loss": 1.1212,
	"step": 3350
	},
	{
	"epoch": 1.6354016354016354,
	"grad_norm": 0.17366796731948853,
	"learning_rate": 8.586823848940047e-05,
	"loss": 1.1129,
	"step": 3400
	},
	{
	"epoch": 1.6594516594516593,
	"grad_norm": 0.21448016166687012,
	"learning_rate": 8.337974602586152e-05,
	"loss": 1.1216,
	"step": 3450
	},
	{
	"epoch": 1.6835016835016834,
	"grad_norm": 0.17243099212646484,
	"learning_rate": 8.090179507574427e-05,
	"loss": 1.1096,
	"step": 3500
	},
	{
	"epoch": 1.7075517075517075,
	"grad_norm": 0.1429734081029892,
	"learning_rate": 7.843595729693316e-05,
	"loss": 1.1071,
	"step": 3550
	},
	{
	"epoch": 1.7316017316017316,
	"grad_norm": 0.15200386941432953,
	"learning_rate": 7.598379666444808e-05,
	"loss": 1.1158,
	"step": 3600
	},
	{
	"epoch": 1.7556517556517557,
	"grad_norm": 0.1442406326532364,
	"learning_rate": 7.354686847848242e-05,
	"loss": 1.112,
	"step": 3650
	},
	{
	"epoch": 1.7797017797017798,
	"grad_norm": 0.17678239941596985,
	"learning_rate": 7.11267183779428e-05,
	"loss": 1.1118,
	"step": 3700
	},
	{
	"epoch": 1.8037518037518039,
	"grad_norm": 0.147593155503273,
	"learning_rate": 6.872488136011667e-05,
	"loss": 1.1165,
	"step": 3750
	},
	{
	"epoch": 1.8278018278018278,
	"grad_norm": 0.1334652155637741,
	"learning_rate": 6.634288080708952e-05,
	"loss": 1.1135,
	"step": 3800
	},
	{
	"epoch": 1.8518518518518519,
	"grad_norm": 0.14890378713607788,
	"learning_rate": 6.398222751952899e-05,
	"loss": 1.1086,
	"step": 3850
	},
	{
	"epoch": 1.8759018759018757,
	"grad_norm": 0.1334807574748993,
	"learning_rate": 6.164441875844882e-05,
	"loss": 1.1144,
	"step": 3900
	},
	{
	"epoch": 1.8999518999518998,
	"grad_norm": 0.12897680699825287,
	"learning_rate": 5.933093729556062e-05,
	"loss": 1.1116,
	"step": 3950
	},
	{
	"epoch": 1.924001924001924,
	"grad_norm": 0.17530564963817596,
	"learning_rate": 5.7043250472815356e-05,
	"loss": 1.1039,
	"step": 4000
	},
	{
	"epoch": 1.948051948051948,
	"grad_norm": 0.15966495871543884,
	"learning_rate": 5.478280927173145e-05,
	"loss": 1.101,
	"step": 4050
	},
	{
	"epoch": 1.9721019721019721,
	"grad_norm": 0.18890446424484253,
	"learning_rate": 5.255104739309924e-05,
	"loss": 1.1077,
	"step": 4100
	},
	{
	"epoch": 1.9961519961519962,
	"grad_norm": 0.1547369807958603,
	"learning_rate": 5.0349380347646494e-05,
	"loss": 1.103,
	"step": 4150
	},
	{
	"epoch": 2.0202020202020203,
	"grad_norm": 0.13888758420944214,
	"learning_rate": 4.8179204558240444e-05,
	"loss": 1.0826,
	"step": 4200
	},
	{
	"epoch": 2.0442520442520444,
	"grad_norm": 0.11266086250543594,
	"learning_rate": 4.6041896474197e-05,
	"loss": 1.071,
	"step": 4250
	},
	{
	"epoch": 2.068302068302068,
	"grad_norm": 0.14245671033859253,
	"learning_rate": 4.393881169825779e-05,
	"loss": 1.0759,
	"step": 4300
	},
	{
	"epoch": 2.092352092352092,
	"grad_norm": 0.1226249411702156,
	"learning_rate": 4.187128412678969e-05,
	"loss": 1.0742,
	"step": 4350
	},
	{
	"epoch": 2.1164021164021163,
	"grad_norm": 0.12307476997375488,
	"learning_rate": 3.984062510375155e-05,
	"loss": 1.0721,
	"step": 4400
	},
	{
	"epoch": 2.1404521404521404,
	"grad_norm": 0.12813834846019745,
	"learning_rate": 3.7848122588965144e-05,
	"loss": 1.0726,
	"step": 4450
	},
	{
	"epoch": 2.1645021645021645,
	"grad_norm": 0.13432885706424713,
	"learning_rate": 3.5895040341217543e-05,
	"loss": 1.0745,
	"step": 4500
	},
	{
	"epoch": 2.1885521885521886,
	"grad_norm": 0.11649097502231598,
	"learning_rate": 3.398261711671309e-05,
	"loss": 1.079,
	"step": 4550
	},
	{
	"epoch": 2.2126022126022127,
	"grad_norm": 0.11140163242816925,
	"learning_rate": 3.211206588338358e-05,
	"loss": 1.0748,
	"step": 4600
	},
	{
	"epoch": 2.236652236652237,
	"grad_norm": 0.10978424549102783,
	"learning_rate": 3.028457305155483e-05,
	"loss": 1.0726,
	"step": 4650
	},
	{
	"epoch": 2.260702260702261,
	"grad_norm": 0.11395589262247086,
	"learning_rate": 2.8501297721457422e-05,
	"loss": 1.0656,
	"step": 4700
	},
	{
	"epoch": 2.284752284752285,
	"grad_norm": 0.10599405318498611,
	"learning_rate": 2.6763370948059353e-05,
	"loss": 1.0765,
	"step": 4750
	},
	{
	"epoch": 2.3088023088023086,
	"grad_norm": 0.11157254874706268,
	"learning_rate": 2.5071895023686442e-05,
	"loss": 1.0726,
	"step": 4800
	},
	{
	"epoch": 2.3328523328523327,
	"grad_norm": 0.1390163153409958,
	"learning_rate": 2.342794277888547e-05,
	"loss": 1.0731,
	"step": 4850
	},
	{
	"epoch": 2.356902356902357,
	"grad_norm": 0.1519329994916916,
	"learning_rate": 2.1832556901973965e-05,
	"loss": 1.0704,
	"step": 4900
	},
	{
	"epoch": 2.380952380952381,
	"grad_norm": 0.1278182566165924,
	"learning_rate": 2.0286749277707782e-05,
	"loss": 1.0661,
	"step": 4950
	},
	{
	"epoch": 2.405002405002405,
	"grad_norm": 0.10508263111114502,
	"learning_rate": 1.879150034548588e-05,
	"loss": 1.0758,
	"step": 5000
	},
	{
	"epoch": 2.429052429052429,
	"grad_norm": 0.09690719097852707,
	"learning_rate": 1.7347758477500044e-05,
	"loss": 1.0644,
	"step": 5050
	},
	{
	"epoch": 2.4531024531024532,
	"grad_norm": 0.10174595564603806,
	"learning_rate": 1.5956439377222798e-05,
	"loss": 1.0726,
	"step": 5100
	},
	{
	"epoch": 2.4771524771524773,
	"grad_norm": 0.10294167697429657,
	"learning_rate": 1.4618425498616162e-05,
	"loss": 1.0655,
	"step": 5150
	},
	{
	"epoch": 2.501202501202501,
	"grad_norm": 0.11103129386901855,
	"learning_rate": 1.3334565486428996e-05,
	"loss": 1.0651,
	"step": 5200
	},
	{
	"epoch": 2.525252525252525,
	"grad_norm": 0.10614852607250214,
	"learning_rate": 1.2105673637938053e-05,
	"loss": 1.0701,
	"step": 5250
	},
	{
	"epoch": 2.549302549302549,
	"grad_norm": 0.09437720477581024,
	"learning_rate": 1.0932529386474188e-05,
	"loss": 1.0673,
	"step": 5300
	},
	{
	"epoch": 2.5733525733525733,
	"grad_norm": 0.0965106412768364,
	"learning_rate": 9.815876807061264e-06,
	"loss": 1.0769,
	"step": 5350
	},
	{
	"epoch": 2.5974025974025974,
	"grad_norm": 0.09335634112358093,
	"learning_rate": 8.756424144481312e-06,
	"loss": 1.0646,
	"step": 5400
	},
	{
	"epoch": 2.6214526214526215,
	"grad_norm": 0.09890544414520264,
	"learning_rate": 7.75484336406529e-06,
	"loss": 1.0757,
	"step": 5450
	},
	{
	"epoch": 2.6455026455026456,
	"grad_norm": 0.09670912474393845,
	"learning_rate": 6.8117697254943106e-06,
	"loss": 1.0668,
	"step": 5500
	},
	{
	"epoch": 2.6695526695526697,
	"grad_norm": 0.09898468106985092,
	"learning_rate": 5.927801379881714e-06,
	"loss": 1.0745,
	"step": 5550
	},
	{
	"epoch": 2.6936026936026938,
	"grad_norm": 0.08697386831045151,
	"learning_rate": 5.103498990391509e-06,
	"loss": 1.0653,
	"step": 5600
	},
	{
	"epoch": 2.717652717652718,
	"grad_norm": 0.09457134455442429,
	"learning_rate": 4.339385376633775e-06,
	"loss": 1.0678,
	"step": 5650
	},
	{
	"epoch": 2.741702741702742,
	"grad_norm": 0.09092475473880768,
	"learning_rate": 3.6359451830626723e-06,
	"loss": 1.0635,
	"step": 5700
	},
	{
	"epoch": 2.7657527657527656,
	"grad_norm": 0.08736653625965118,
	"learning_rate": 2.993624571587239e-06,
	"loss": 1.0639,
	"step": 5750
	},
	{
	"epoch": 2.7898027898027897,
	"grad_norm": 0.09138292819261551,
	"learning_rate": 2.4128309385900717e-06,
	"loss": 1.065,
	"step": 5800
	},
	{
	"epoch": 2.813852813852814,
	"grad_norm": 0.08842656016349792,
	"learning_rate": 1.8939326565333037e-06,
	"loss": 1.0636,
	"step": 5850
	},
	{
	"epoch": 2.837902837902838,
	"grad_norm": 0.08870802819728851,
	"learning_rate": 1.437258840315714e-06,
	"loss": 1.0706,
	"step": 5900
	},
	{
	"epoch": 2.861952861952862,
	"grad_norm": 0.08659425377845764,
	"learning_rate": 1.0430991385293575e-06,
	"loss": 1.0673,
	"step": 5950
	},
	{
	"epoch": 2.886002886002886,
	"grad_norm": 0.08142086863517761,
	"learning_rate": 7.117035497478553e-07,
	"loss": 1.0697,
	"step": 6000
	},
	{
	"epoch": 2.91005291005291,
	"grad_norm": 0.080448217689991,
	"learning_rate": 4.432822639630407e-07,
	"loss": 1.0655,
	"step": 6050
	},
	{
	"epoch": 2.934102934102934,
	"grad_norm": 0.08980288356542587,
	"learning_rate": 2.380055292704575e-07,
	"loss": 1.0701,
	"step": 6100
	},
	{
	"epoch": 2.958152958152958,
	"grad_norm": 0.08309097588062286,
	"learning_rate": 9.600354388833443e-08,
	"loss": 1.0684,
	"step": 6150
	},
	{
	"epoch": 2.982202982202982,
	"grad_norm": 0.08456841111183167,
	"learning_rate": 1.7366373578442397e-08,
	"loss": 1.0684,
	"step": 6200
	}
	],
	"logging_steps": 50,
	"max_steps": 6237,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 2.056700790948663e+20,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}