|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.3412726626377533, |
|
"eval_steps": 500, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0011375755421258443, |
|
"grad_norm": 7.252706903111892, |
|
"learning_rate": 7.407407407407407e-07, |
|
"loss": 1.412, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0022751510842516885, |
|
"grad_norm": 8.622248143674112, |
|
"learning_rate": 1.4814814814814815e-06, |
|
"loss": 1.486, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0034127266263775328, |
|
"grad_norm": 7.023012815723608, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 1.4828, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.004550302168503377, |
|
"grad_norm": 6.665729995787034, |
|
"learning_rate": 2.962962962962963e-06, |
|
"loss": 1.4857, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.005687877710629221, |
|
"grad_norm": 6.544551660595376, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 1.2733, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0068254532527550656, |
|
"grad_norm": 5.76674887523466, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 1.3368, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00796302879488091, |
|
"grad_norm": 3.496668631685638, |
|
"learning_rate": 5.185185185185185e-06, |
|
"loss": 1.2026, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.009100604337006754, |
|
"grad_norm": 2.871434183971456, |
|
"learning_rate": 5.925925925925926e-06, |
|
"loss": 1.1447, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.010238179879132598, |
|
"grad_norm": 4.375669967214979, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.1398, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.011375755421258443, |
|
"grad_norm": 2.6750161678621494, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 1.1125, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.012513330963384287, |
|
"grad_norm": 2.5960245365420795, |
|
"learning_rate": 8.148148148148148e-06, |
|
"loss": 1.0485, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.013650906505510131, |
|
"grad_norm": 2.2437210938587584, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.9705, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.014788482047635975, |
|
"grad_norm": 1.7076930327589486, |
|
"learning_rate": 9.62962962962963e-06, |
|
"loss": 0.9693, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01592605758976182, |
|
"grad_norm": 1.5720016563835741, |
|
"learning_rate": 1.037037037037037e-05, |
|
"loss": 0.9268, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.017063633131887666, |
|
"grad_norm": 1.5049616059812596, |
|
"learning_rate": 1.1111111111111113e-05, |
|
"loss": 1.0427, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.018201208674013508, |
|
"grad_norm": 1.670625778019347, |
|
"learning_rate": 1.1851851851851852e-05, |
|
"loss": 0.7984, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.019338784216139354, |
|
"grad_norm": 1.5156920640819538, |
|
"learning_rate": 1.2592592592592593e-05, |
|
"loss": 0.8934, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.020476359758265197, |
|
"grad_norm": 1.3859154498920394, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.9602, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.021613935300391043, |
|
"grad_norm": 1.3086540529666517, |
|
"learning_rate": 1.4074074074074075e-05, |
|
"loss": 0.8253, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.022751510842516885, |
|
"grad_norm": 1.3062556535833154, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.8987, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02388908638464273, |
|
"grad_norm": 1.3475867604944103, |
|
"learning_rate": 1.555555555555556e-05, |
|
"loss": 0.8483, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.025026661926768574, |
|
"grad_norm": 1.4066019321159149, |
|
"learning_rate": 1.6296296296296297e-05, |
|
"loss": 0.8765, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.02616423746889442, |
|
"grad_norm": 1.356056753489974, |
|
"learning_rate": 1.7037037037037038e-05, |
|
"loss": 0.7376, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.027301813011020262, |
|
"grad_norm": 1.2273100266100156, |
|
"learning_rate": 1.7777777777777777e-05, |
|
"loss": 0.7735, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.028439388553146108, |
|
"grad_norm": 1.3325688789427055, |
|
"learning_rate": 1.851851851851852e-05, |
|
"loss": 0.9376, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02957696409527195, |
|
"grad_norm": 1.2459636758267958, |
|
"learning_rate": 1.925925925925926e-05, |
|
"loss": 0.7574, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.030714539637397797, |
|
"grad_norm": 1.2914174266728426, |
|
"learning_rate": 2e-05, |
|
"loss": 0.8459, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.03185211517952364, |
|
"grad_norm": 1.1150255281605015, |
|
"learning_rate": 1.999993201860564e-05, |
|
"loss": 0.9413, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.032989690721649485, |
|
"grad_norm": 1.1925573330854713, |
|
"learning_rate": 1.9999728075346848e-05, |
|
"loss": 0.7652, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.03412726626377533, |
|
"grad_norm": 1.1659470654782442, |
|
"learning_rate": 1.9999388172996495e-05, |
|
"loss": 0.9115, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03526484180590117, |
|
"grad_norm": 1.1687557894773881, |
|
"learning_rate": 1.999891231617599e-05, |
|
"loss": 0.8744, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.036402417348027016, |
|
"grad_norm": 1.1588348230291718, |
|
"learning_rate": 1.999830051135521e-05, |
|
"loss": 0.8677, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.03753999289015286, |
|
"grad_norm": 1.132149944460972, |
|
"learning_rate": 1.9997552766852434e-05, |
|
"loss": 0.9, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03867756843227871, |
|
"grad_norm": 1.1863064139153197, |
|
"learning_rate": 1.9996669092834194e-05, |
|
"loss": 0.8074, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03981514397440455, |
|
"grad_norm": 1.220841646039248, |
|
"learning_rate": 1.9995649501315172e-05, |
|
"loss": 0.8674, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04095271951653039, |
|
"grad_norm": 1.23854307526539, |
|
"learning_rate": 1.9994494006158018e-05, |
|
"loss": 0.9159, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.04209029505865624, |
|
"grad_norm": 1.090587831686784, |
|
"learning_rate": 1.9993202623073173e-05, |
|
"loss": 0.8779, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.043227870600782085, |
|
"grad_norm": 1.0923548595966273, |
|
"learning_rate": 1.999177536961863e-05, |
|
"loss": 0.8266, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.044365446142907924, |
|
"grad_norm": 1.1486258645801408, |
|
"learning_rate": 1.9990212265199738e-05, |
|
"loss": 0.8896, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.04550302168503377, |
|
"grad_norm": 1.1394380414139478, |
|
"learning_rate": 1.998851333106889e-05, |
|
"loss": 0.8282, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.046640597227159616, |
|
"grad_norm": 1.0624229488933081, |
|
"learning_rate": 1.9986678590325273e-05, |
|
"loss": 0.731, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.04777817276928546, |
|
"grad_norm": 1.0270694900173014, |
|
"learning_rate": 1.9984708067914533e-05, |
|
"loss": 0.8598, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0489157483114113, |
|
"grad_norm": 1.053755980685397, |
|
"learning_rate": 1.998260179062844e-05, |
|
"loss": 0.7883, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.05005332385353715, |
|
"grad_norm": 1.1027980315848285, |
|
"learning_rate": 1.9980359787104533e-05, |
|
"loss": 0.8248, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.05119089939566299, |
|
"grad_norm": 1.054761323115589, |
|
"learning_rate": 1.9977982087825714e-05, |
|
"loss": 0.8647, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.05232847493778884, |
|
"grad_norm": 1.0147641458879657, |
|
"learning_rate": 1.9975468725119842e-05, |
|
"loss": 0.8707, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.05346605047991468, |
|
"grad_norm": 1.0320732494589604, |
|
"learning_rate": 1.99728197331593e-05, |
|
"loss": 0.7407, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.054603626022040525, |
|
"grad_norm": 1.1478872729853458, |
|
"learning_rate": 1.9970035147960524e-05, |
|
"loss": 0.7818, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.05574120156416637, |
|
"grad_norm": 1.1399868087609295, |
|
"learning_rate": 1.996711500738351e-05, |
|
"loss": 0.8373, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.056878777106292217, |
|
"grad_norm": 1.0526771432986117, |
|
"learning_rate": 1.99640593511313e-05, |
|
"loss": 0.8203, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.058016352648418056, |
|
"grad_norm": 1.0653301252742073, |
|
"learning_rate": 1.996086822074945e-05, |
|
"loss": 0.8281, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0591539281905439, |
|
"grad_norm": 1.0561063282716783, |
|
"learning_rate": 1.995754165962546e-05, |
|
"loss": 0.72, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.06029150373266975, |
|
"grad_norm": 1.0839260823800712, |
|
"learning_rate": 1.9954079712988183e-05, |
|
"loss": 0.791, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.061429079274795594, |
|
"grad_norm": 1.0146390764424258, |
|
"learning_rate": 1.995048242790721e-05, |
|
"loss": 0.7455, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.06256665481692143, |
|
"grad_norm": 1.0962320578557068, |
|
"learning_rate": 1.9946749853292233e-05, |
|
"loss": 0.8347, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.06370423035904728, |
|
"grad_norm": 1.1182191442676654, |
|
"learning_rate": 1.9942882039892378e-05, |
|
"loss": 0.8298, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.06484180590117312, |
|
"grad_norm": 0.985261942271752, |
|
"learning_rate": 1.9938879040295508e-05, |
|
"loss": 0.7321, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.06597938144329897, |
|
"grad_norm": 1.0605922824848177, |
|
"learning_rate": 1.993474090892753e-05, |
|
"loss": 0.864, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.06711695698542482, |
|
"grad_norm": 1.1264056870803152, |
|
"learning_rate": 1.9930467702051632e-05, |
|
"loss": 0.8403, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.06825453252755066, |
|
"grad_norm": 1.0395490010303334, |
|
"learning_rate": 1.992605947776752e-05, |
|
"loss": 0.8243, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06939210806967651, |
|
"grad_norm": 1.0708140874889909, |
|
"learning_rate": 1.9921516296010645e-05, |
|
"loss": 0.7053, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.07052968361180234, |
|
"grad_norm": 1.1040066296881672, |
|
"learning_rate": 1.991683821855137e-05, |
|
"loss": 0.8089, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.07166725915392819, |
|
"grad_norm": 1.0964641303072902, |
|
"learning_rate": 1.9912025308994146e-05, |
|
"loss": 0.9017, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.07280483469605403, |
|
"grad_norm": 1.0643750074791034, |
|
"learning_rate": 1.9907077632776632e-05, |
|
"loss": 0.8678, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.07394241023817988, |
|
"grad_norm": 1.0767194772302087, |
|
"learning_rate": 1.9901995257168808e-05, |
|
"loss": 0.8098, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.07507998578030572, |
|
"grad_norm": 0.9814291648591228, |
|
"learning_rate": 1.989677825127208e-05, |
|
"loss": 0.9009, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.07621756132243157, |
|
"grad_norm": 1.0424860021982505, |
|
"learning_rate": 1.9891426686018308e-05, |
|
"loss": 0.7733, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.07735513686455742, |
|
"grad_norm": 1.0702692819494843, |
|
"learning_rate": 1.9885940634168863e-05, |
|
"loss": 0.8821, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.07849271240668326, |
|
"grad_norm": 1.1408260155810739, |
|
"learning_rate": 1.988032017031364e-05, |
|
"loss": 0.8307, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0796302879488091, |
|
"grad_norm": 1.0327202463855523, |
|
"learning_rate": 1.9874565370870037e-05, |
|
"loss": 0.8242, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08076786349093494, |
|
"grad_norm": 1.0467929360052353, |
|
"learning_rate": 1.9868676314081907e-05, |
|
"loss": 0.8197, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.08190543903306079, |
|
"grad_norm": 1.0401393378664532, |
|
"learning_rate": 1.9862653080018508e-05, |
|
"loss": 0.7963, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.08304301457518663, |
|
"grad_norm": 1.0193314873859287, |
|
"learning_rate": 1.985649575057341e-05, |
|
"loss": 0.7183, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.08418059011731248, |
|
"grad_norm": 1.1038745628343372, |
|
"learning_rate": 1.9850204409463387e-05, |
|
"loss": 0.8222, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.08531816565943832, |
|
"grad_norm": 1.2332738343888963, |
|
"learning_rate": 1.9843779142227258e-05, |
|
"loss": 0.9721, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.08645574120156417, |
|
"grad_norm": 1.091825396970584, |
|
"learning_rate": 1.9837220036224755e-05, |
|
"loss": 0.8735, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.08759331674369002, |
|
"grad_norm": 1.0836987966746667, |
|
"learning_rate": 1.983052718063531e-05, |
|
"loss": 0.8209, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.08873089228581585, |
|
"grad_norm": 1.1355629551230897, |
|
"learning_rate": 1.9823700666456854e-05, |
|
"loss": 0.8062, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0898684678279417, |
|
"grad_norm": 1.0972904393016396, |
|
"learning_rate": 1.981674058650458e-05, |
|
"loss": 0.79, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.09100604337006754, |
|
"grad_norm": 1.0822786867657195, |
|
"learning_rate": 1.9809647035409673e-05, |
|
"loss": 0.8685, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09214361891219339, |
|
"grad_norm": 1.0499861096027099, |
|
"learning_rate": 1.980242010961803e-05, |
|
"loss": 0.8869, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.09328119445431923, |
|
"grad_norm": 1.1565333695689672, |
|
"learning_rate": 1.9795059907388955e-05, |
|
"loss": 0.7683, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.09441876999644508, |
|
"grad_norm": 1.1404697343493142, |
|
"learning_rate": 1.9787566528793806e-05, |
|
"loss": 0.8392, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.09555634553857092, |
|
"grad_norm": 1.0072701040551242, |
|
"learning_rate": 1.977994007571465e-05, |
|
"loss": 0.7045, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.09669392108069677, |
|
"grad_norm": 1.019073621193679, |
|
"learning_rate": 1.977218065184287e-05, |
|
"loss": 0.7104, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0978314966228226, |
|
"grad_norm": 1.0557815862870046, |
|
"learning_rate": 1.9764288362677757e-05, |
|
"loss": 0.7672, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.09896907216494845, |
|
"grad_norm": 1.1380481668336246, |
|
"learning_rate": 1.975626331552507e-05, |
|
"loss": 0.9107, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.1001066477070743, |
|
"grad_norm": 1.093169407323706, |
|
"learning_rate": 1.9748105619495593e-05, |
|
"loss": 0.7914, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.10124422324920014, |
|
"grad_norm": 1.087351595099475, |
|
"learning_rate": 1.973981538550364e-05, |
|
"loss": 0.7814, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.10238179879132599, |
|
"grad_norm": 1.0728410820828704, |
|
"learning_rate": 1.9731392726265538e-05, |
|
"loss": 0.7555, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10351937433345183, |
|
"grad_norm": 1.2233426634088327, |
|
"learning_rate": 1.9722837756298112e-05, |
|
"loss": 0.818, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.10465694987557768, |
|
"grad_norm": 1.0575926671469376, |
|
"learning_rate": 1.971415059191712e-05, |
|
"loss": 0.7764, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.10579452541770352, |
|
"grad_norm": 1.088708118649049, |
|
"learning_rate": 1.9705331351235673e-05, |
|
"loss": 0.7902, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.10693210095982936, |
|
"grad_norm": 1.128811241106672, |
|
"learning_rate": 1.9696380154162628e-05, |
|
"loss": 0.7237, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.1080696765019552, |
|
"grad_norm": 1.193909132920539, |
|
"learning_rate": 1.9687297122400952e-05, |
|
"loss": 0.8515, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.10920725204408105, |
|
"grad_norm": 1.0847877557011065, |
|
"learning_rate": 1.967808237944608e-05, |
|
"loss": 0.7833, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.1103448275862069, |
|
"grad_norm": 1.0860109471141504, |
|
"learning_rate": 1.9668736050584224e-05, |
|
"loss": 0.87, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.11148240312833274, |
|
"grad_norm": 1.0232679660737842, |
|
"learning_rate": 1.9659258262890683e-05, |
|
"loss": 0.864, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.11261997867045859, |
|
"grad_norm": 1.0289877842833908, |
|
"learning_rate": 1.96496491452281e-05, |
|
"loss": 0.7928, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.11375755421258443, |
|
"grad_norm": 1.1247393636130563, |
|
"learning_rate": 1.963990882824472e-05, |
|
"loss": 0.7534, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11489512975471028, |
|
"grad_norm": 1.0570565178567373, |
|
"learning_rate": 1.96300374443726e-05, |
|
"loss": 0.8005, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.11603270529683611, |
|
"grad_norm": 0.9867357643569166, |
|
"learning_rate": 1.962003512782584e-05, |
|
"loss": 0.7223, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.11717028083896196, |
|
"grad_norm": 1.0928156643350972, |
|
"learning_rate": 1.960990201459872e-05, |
|
"loss": 0.7677, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.1183078563810878, |
|
"grad_norm": 1.021316668974268, |
|
"learning_rate": 1.959963824246387e-05, |
|
"loss": 0.7598, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.11944543192321365, |
|
"grad_norm": 1.003907848763008, |
|
"learning_rate": 1.95892439509704e-05, |
|
"loss": 0.7464, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.1205830074653395, |
|
"grad_norm": 1.0107143173657256, |
|
"learning_rate": 1.9578719281442002e-05, |
|
"loss": 0.8703, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.12172058300746534, |
|
"grad_norm": 1.0427993123321293, |
|
"learning_rate": 1.9568064376975013e-05, |
|
"loss": 0.8458, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.12285815854959119, |
|
"grad_norm": 0.9471895655219162, |
|
"learning_rate": 1.9557279382436483e-05, |
|
"loss": 0.8744, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.12399573409171703, |
|
"grad_norm": 1.0474061833411126, |
|
"learning_rate": 1.9546364444462207e-05, |
|
"loss": 0.7278, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.12513330963384287, |
|
"grad_norm": 0.9851209978746495, |
|
"learning_rate": 1.953531971145473e-05, |
|
"loss": 0.8167, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1262708851759687, |
|
"grad_norm": 1.0280704738378832, |
|
"learning_rate": 1.9524145333581315e-05, |
|
"loss": 0.7167, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.12740846071809456, |
|
"grad_norm": 0.9443890480132111, |
|
"learning_rate": 1.9512841462771927e-05, |
|
"loss": 0.8822, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.1285460362602204, |
|
"grad_norm": 0.9717455193006768, |
|
"learning_rate": 1.9501408252717136e-05, |
|
"loss": 0.8707, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.12968361180234625, |
|
"grad_norm": 1.092473392232703, |
|
"learning_rate": 1.9489845858866066e-05, |
|
"loss": 0.7959, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.1308211873444721, |
|
"grad_norm": 1.1056495100925654, |
|
"learning_rate": 1.947815443842424e-05, |
|
"loss": 0.7132, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.13195876288659794, |
|
"grad_norm": 1.0058537198089237, |
|
"learning_rate": 1.9466334150351475e-05, |
|
"loss": 0.8592, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.1330963384287238, |
|
"grad_norm": 1.0234629362307024, |
|
"learning_rate": 1.9454385155359704e-05, |
|
"loss": 0.7982, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.13423391397084963, |
|
"grad_norm": 0.9907144940751352, |
|
"learning_rate": 1.9442307615910793e-05, |
|
"loss": 0.7626, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.13537148951297548, |
|
"grad_norm": 1.0610884551525839, |
|
"learning_rate": 1.9430101696214335e-05, |
|
"loss": 0.7694, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.13650906505510133, |
|
"grad_norm": 1.136255401471595, |
|
"learning_rate": 1.9417767562225422e-05, |
|
"loss": 0.8427, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13764664059722717, |
|
"grad_norm": 0.983990428831828, |
|
"learning_rate": 1.9405305381642376e-05, |
|
"loss": 0.8446, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.13878421613935302, |
|
"grad_norm": 1.1301380622416057, |
|
"learning_rate": 1.9392715323904483e-05, |
|
"loss": 0.7658, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.13992179168147884, |
|
"grad_norm": 1.0031223316843825, |
|
"learning_rate": 1.9379997560189677e-05, |
|
"loss": 0.7684, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.14105936722360468, |
|
"grad_norm": 1.0654113241033707, |
|
"learning_rate": 1.936715226341222e-05, |
|
"loss": 0.781, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.14219694276573053, |
|
"grad_norm": 0.9271001409058074, |
|
"learning_rate": 1.9354179608220347e-05, |
|
"loss": 0.7363, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.14333451830785637, |
|
"grad_norm": 1.0866976883120223, |
|
"learning_rate": 1.93410797709939e-05, |
|
"loss": 0.8719, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.14447209384998222, |
|
"grad_norm": 1.0143165560250647, |
|
"learning_rate": 1.9327852929841918e-05, |
|
"loss": 0.7418, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.14560966939210807, |
|
"grad_norm": 1.0794908238570715, |
|
"learning_rate": 1.9314499264600218e-05, |
|
"loss": 0.9511, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.1467472449342339, |
|
"grad_norm": 1.0543849611899816, |
|
"learning_rate": 1.9301018956828966e-05, |
|
"loss": 0.7261, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.14788482047635976, |
|
"grad_norm": 0.9853708931945785, |
|
"learning_rate": 1.9287412189810174e-05, |
|
"loss": 0.7949, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1490223960184856, |
|
"grad_norm": 0.9782889131523694, |
|
"learning_rate": 1.9273679148545246e-05, |
|
"loss": 0.7894, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.15015997156061145, |
|
"grad_norm": 1.0694826655849514, |
|
"learning_rate": 1.9259820019752445e-05, |
|
"loss": 0.8288, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.1512975471027373, |
|
"grad_norm": 0.9707247690290827, |
|
"learning_rate": 1.9245834991864344e-05, |
|
"loss": 0.7973, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.15243512264486314, |
|
"grad_norm": 0.97334767003227, |
|
"learning_rate": 1.9231724255025286e-05, |
|
"loss": 0.8146, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.153572698186989, |
|
"grad_norm": 0.9354396394568825, |
|
"learning_rate": 1.9217488001088784e-05, |
|
"loss": 0.7446, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.15471027372911483, |
|
"grad_norm": 0.9395461051827337, |
|
"learning_rate": 1.9203126423614916e-05, |
|
"loss": 0.8077, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.15584784927124068, |
|
"grad_norm": 0.9750295198016627, |
|
"learning_rate": 1.9188639717867695e-05, |
|
"loss": 0.7134, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.15698542481336653, |
|
"grad_norm": 1.004306726572393, |
|
"learning_rate": 1.9174028080812415e-05, |
|
"loss": 0.7295, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.15812300035549234, |
|
"grad_norm": 1.0670488030887912, |
|
"learning_rate": 1.9159291711112962e-05, |
|
"loss": 0.9652, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.1592605758976182, |
|
"grad_norm": 0.959021129913344, |
|
"learning_rate": 1.914443080912913e-05, |
|
"loss": 0.7656, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16039815143974404, |
|
"grad_norm": 1.0733667782620868, |
|
"learning_rate": 1.9129445576913886e-05, |
|
"loss": 0.7651, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.16153572698186988, |
|
"grad_norm": 0.9954726556545452, |
|
"learning_rate": 1.9114336218210635e-05, |
|
"loss": 0.715, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.16267330252399573, |
|
"grad_norm": 0.9972323730973797, |
|
"learning_rate": 1.909910293845042e-05, |
|
"loss": 0.6975, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.16381087806612157, |
|
"grad_norm": 0.9913964052548584, |
|
"learning_rate": 1.9083745944749163e-05, |
|
"loss": 0.7253, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.16494845360824742, |
|
"grad_norm": 0.9819193678083898, |
|
"learning_rate": 1.9068265445904838e-05, |
|
"loss": 0.7467, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.16608602915037327, |
|
"grad_norm": 1.0151841007728057, |
|
"learning_rate": 1.905266165239462e-05, |
|
"loss": 0.8109, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.1672236046924991, |
|
"grad_norm": 0.9838304949608468, |
|
"learning_rate": 1.903693477637204e-05, |
|
"loss": 0.7765, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.16836118023462496, |
|
"grad_norm": 0.942271667758485, |
|
"learning_rate": 1.902108503166409e-05, |
|
"loss": 0.7697, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.1694987557767508, |
|
"grad_norm": 0.9992910291972015, |
|
"learning_rate": 1.9005112633768315e-05, |
|
"loss": 0.758, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.17063633131887665, |
|
"grad_norm": 1.034597439114661, |
|
"learning_rate": 1.8989017799849896e-05, |
|
"loss": 0.7424, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1717739068610025, |
|
"grad_norm": 0.9794734575939594, |
|
"learning_rate": 1.897280074873868e-05, |
|
"loss": 0.6354, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.17291148240312834, |
|
"grad_norm": 0.9930145480636745, |
|
"learning_rate": 1.8956461700926215e-05, |
|
"loss": 0.8608, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.1740490579452542, |
|
"grad_norm": 1.028876703411256, |
|
"learning_rate": 1.8940000878562758e-05, |
|
"loss": 0.717, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.17518663348738003, |
|
"grad_norm": 0.9475138167941232, |
|
"learning_rate": 1.8923418505454238e-05, |
|
"loss": 0.7061, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.17632420902950588, |
|
"grad_norm": 1.0267930452512193, |
|
"learning_rate": 1.8906714807059218e-05, |
|
"loss": 0.8186, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1774617845716317, |
|
"grad_norm": 1.0643772128505595, |
|
"learning_rate": 1.8889890010485847e-05, |
|
"loss": 0.6661, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.17859936011375754, |
|
"grad_norm": 1.066349966150257, |
|
"learning_rate": 1.8872944344488747e-05, |
|
"loss": 0.7617, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.1797369356558834, |
|
"grad_norm": 0.9619934443330037, |
|
"learning_rate": 1.885587803946592e-05, |
|
"loss": 0.7127, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.18087451119800924, |
|
"grad_norm": 1.004371603307917, |
|
"learning_rate": 1.883869132745561e-05, |
|
"loss": 0.7539, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.18201208674013508, |
|
"grad_norm": 1.0106558200612026, |
|
"learning_rate": 1.8821384442133145e-05, |
|
"loss": 0.8286, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18314966228226093, |
|
"grad_norm": 1.0380867326359198, |
|
"learning_rate": 1.8803957618807764e-05, |
|
"loss": 0.826, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.18428723782438677, |
|
"grad_norm": 1.0155843819783557, |
|
"learning_rate": 1.878641109441942e-05, |
|
"loss": 0.739, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.18542481336651262, |
|
"grad_norm": 0.9916382609123128, |
|
"learning_rate": 1.876874510753554e-05, |
|
"loss": 0.9736, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.18656238890863847, |
|
"grad_norm": 0.9382307275493679, |
|
"learning_rate": 1.8750959898347828e-05, |
|
"loss": 0.8839, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.1876999644507643, |
|
"grad_norm": 1.038587474509486, |
|
"learning_rate": 1.8733055708668928e-05, |
|
"loss": 0.8258, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.18883753999289016, |
|
"grad_norm": 1.133352025192882, |
|
"learning_rate": 1.871503278192921e-05, |
|
"loss": 0.7396, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.189975115535016, |
|
"grad_norm": 1.0035615462799168, |
|
"learning_rate": 1.8696891363173405e-05, |
|
"loss": 0.736, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.19111269107714185, |
|
"grad_norm": 0.9854553689824346, |
|
"learning_rate": 1.86786316990573e-05, |
|
"loss": 0.7483, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.1922502666192677, |
|
"grad_norm": 1.0713592511805936, |
|
"learning_rate": 1.866025403784439e-05, |
|
"loss": 0.7571, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.19338784216139354, |
|
"grad_norm": 1.0505443529749097, |
|
"learning_rate": 1.8641758629402468e-05, |
|
"loss": 0.8198, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1945254177035194, |
|
"grad_norm": 0.999013320748331, |
|
"learning_rate": 1.862314572520028e-05, |
|
"loss": 0.8081, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.1956629932456452, |
|
"grad_norm": 1.0043036302243753, |
|
"learning_rate": 1.8604415578304052e-05, |
|
"loss": 0.8008, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.19680056878777105, |
|
"grad_norm": 0.9628077082215138, |
|
"learning_rate": 1.8585568443374087e-05, |
|
"loss": 0.7795, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.1979381443298969, |
|
"grad_norm": 0.9838858995474212, |
|
"learning_rate": 1.8566604576661288e-05, |
|
"loss": 0.7328, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.19907571987202274, |
|
"grad_norm": 0.9813195544795628, |
|
"learning_rate": 1.8547524236003675e-05, |
|
"loss": 0.8816, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.2002132954141486, |
|
"grad_norm": 0.9499983935703465, |
|
"learning_rate": 1.852832768082288e-05, |
|
"loss": 0.8321, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.20135087095627444, |
|
"grad_norm": 1.0242912390679038, |
|
"learning_rate": 1.850901517212062e-05, |
|
"loss": 0.715, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.20248844649840028, |
|
"grad_norm": 1.0605111053806962, |
|
"learning_rate": 1.8489586972475154e-05, |
|
"loss": 0.7838, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.20362602204052613, |
|
"grad_norm": 0.9853924717398025, |
|
"learning_rate": 1.8470043346037698e-05, |
|
"loss": 0.8206, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.20476359758265197, |
|
"grad_norm": 0.9475510993055148, |
|
"learning_rate": 1.8450384558528848e-05, |
|
"loss": 0.8305, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.20590117312477782, |
|
"grad_norm": 0.9554542547364111, |
|
"learning_rate": 1.843061087723496e-05, |
|
"loss": 0.7735, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.20703874866690367, |
|
"grad_norm": 0.9392624041198877, |
|
"learning_rate": 1.8410722571004524e-05, |
|
"loss": 0.7179, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.2081763242090295, |
|
"grad_norm": 0.9205638857801508, |
|
"learning_rate": 1.8390719910244487e-05, |
|
"loss": 0.7482, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.20931389975115536, |
|
"grad_norm": 1.0155955750650456, |
|
"learning_rate": 1.8370603166916616e-05, |
|
"loss": 0.8632, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.2104514752932812, |
|
"grad_norm": 0.9256662805897953, |
|
"learning_rate": 1.8350372614533753e-05, |
|
"loss": 0.7618, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.21158905083540705, |
|
"grad_norm": 0.9614934193864958, |
|
"learning_rate": 1.8330028528156138e-05, |
|
"loss": 0.7873, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.2127266263775329, |
|
"grad_norm": 0.9409854803070374, |
|
"learning_rate": 1.830957118438764e-05, |
|
"loss": 0.7676, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.21386420191965871, |
|
"grad_norm": 1.0117832834333376, |
|
"learning_rate": 1.8289000861372006e-05, |
|
"loss": 0.6853, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.21500177746178456, |
|
"grad_norm": 0.9436331107279641, |
|
"learning_rate": 1.826831783878909e-05, |
|
"loss": 0.7664, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.2161393530039104, |
|
"grad_norm": 0.9518709375119045, |
|
"learning_rate": 1.8247522397851028e-05, |
|
"loss": 0.7979, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21727692854603625, |
|
"grad_norm": 0.950510190670522, |
|
"learning_rate": 1.8226614821298444e-05, |
|
"loss": 0.9146, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.2184145040881621, |
|
"grad_norm": 1.0001640495640078, |
|
"learning_rate": 1.820559539339657e-05, |
|
"loss": 0.8066, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.21955207963028794, |
|
"grad_norm": 1.0515205356048116, |
|
"learning_rate": 1.8184464399931414e-05, |
|
"loss": 0.7605, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.2206896551724138, |
|
"grad_norm": 0.9518221695117551, |
|
"learning_rate": 1.8163222128205853e-05, |
|
"loss": 0.77, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.22182723071453964, |
|
"grad_norm": 0.9658190130336755, |
|
"learning_rate": 1.8141868867035745e-05, |
|
"loss": 0.7553, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.22296480625666548, |
|
"grad_norm": 0.944099728059088, |
|
"learning_rate": 1.8120404906745973e-05, |
|
"loss": 0.7851, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.22410238179879133, |
|
"grad_norm": 0.9818110526183947, |
|
"learning_rate": 1.8098830539166536e-05, |
|
"loss": 0.7935, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.22523995734091717, |
|
"grad_norm": 1.0679918663170498, |
|
"learning_rate": 1.8077146057628547e-05, |
|
"loss": 0.7782, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.22637753288304302, |
|
"grad_norm": 0.9636654090223082, |
|
"learning_rate": 1.8055351756960262e-05, |
|
"loss": 0.7111, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.22751510842516887, |
|
"grad_norm": 1.0273549839107226, |
|
"learning_rate": 1.8033447933483076e-05, |
|
"loss": 0.7937, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2286526839672947, |
|
"grad_norm": 0.9768073027947863, |
|
"learning_rate": 1.8011434885007482e-05, |
|
"loss": 0.6585, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.22979025950942056, |
|
"grad_norm": 1.0757919457820178, |
|
"learning_rate": 1.7989312910829023e-05, |
|
"loss": 0.8112, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.2309278350515464, |
|
"grad_norm": 1.07129922697845, |
|
"learning_rate": 1.796708231172423e-05, |
|
"loss": 0.7197, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.23206541059367222, |
|
"grad_norm": 0.9064139244148179, |
|
"learning_rate": 1.7944743389946524e-05, |
|
"loss": 0.8407, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.23320298613579807, |
|
"grad_norm": 1.0566497130819101, |
|
"learning_rate": 1.792229644922212e-05, |
|
"loss": 0.6641, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.23434056167792391, |
|
"grad_norm": 1.1821144067979144, |
|
"learning_rate": 1.789974179474588e-05, |
|
"loss": 0.8014, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.23547813722004976, |
|
"grad_norm": 1.004667649949057, |
|
"learning_rate": 1.7877079733177185e-05, |
|
"loss": 0.7442, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.2366157127621756, |
|
"grad_norm": 1.0680994058446738, |
|
"learning_rate": 1.7854310572635733e-05, |
|
"loss": 0.8034, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.23775328830430145, |
|
"grad_norm": 1.0075812686052639, |
|
"learning_rate": 1.7831434622697386e-05, |
|
"loss": 0.6922, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.2388908638464273, |
|
"grad_norm": 1.0078124911781743, |
|
"learning_rate": 1.780845219438994e-05, |
|
"loss": 0.8086, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.24002843938855314, |
|
"grad_norm": 1.0012945867152467, |
|
"learning_rate": 1.7785363600188894e-05, |
|
"loss": 0.7705, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.241166014930679, |
|
"grad_norm": 1.1225575837862554, |
|
"learning_rate": 1.776216915401322e-05, |
|
"loss": 0.7568, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.24230359047280484, |
|
"grad_norm": 0.9417229426258661, |
|
"learning_rate": 1.773886917122107e-05, |
|
"loss": 0.7634, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.24344116601493068, |
|
"grad_norm": 1.0736164163302175, |
|
"learning_rate": 1.771546396860551e-05, |
|
"loss": 0.7334, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.24457874155705653, |
|
"grad_norm": 1.0980435254209089, |
|
"learning_rate": 1.7691953864390208e-05, |
|
"loss": 0.8121, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.24571631709918237, |
|
"grad_norm": 0.9581368725405702, |
|
"learning_rate": 1.766833917822509e-05, |
|
"loss": 0.7622, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.24685389264130822, |
|
"grad_norm": 0.94688646103546, |
|
"learning_rate": 1.7644620231182015e-05, |
|
"loss": 0.8606, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.24799146818343407, |
|
"grad_norm": 1.000027690442481, |
|
"learning_rate": 1.7620797345750403e-05, |
|
"loss": 0.6355, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.2491290437255599, |
|
"grad_norm": 1.0030472762272575, |
|
"learning_rate": 1.759687084583285e-05, |
|
"loss": 0.7331, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.25026661926768573, |
|
"grad_norm": 0.933093406691675, |
|
"learning_rate": 1.7572841056740722e-05, |
|
"loss": 0.8011, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.2514041948098116, |
|
"grad_norm": 0.9421445936097415, |
|
"learning_rate": 1.7548708305189724e-05, |
|
"loss": 0.7891, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.2525417703519374, |
|
"grad_norm": 0.9681375218542562, |
|
"learning_rate": 1.7524472919295488e-05, |
|
"loss": 0.8035, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.25367934589406327, |
|
"grad_norm": 0.9614473856459215, |
|
"learning_rate": 1.7500135228569067e-05, |
|
"loss": 0.7541, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.2548169214361891, |
|
"grad_norm": 1.0407809945974127, |
|
"learning_rate": 1.7475695563912506e-05, |
|
"loss": 0.7616, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.25595449697831496, |
|
"grad_norm": 0.9557104322965116, |
|
"learning_rate": 1.7451154257614287e-05, |
|
"loss": 0.7465, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.2570920725204408, |
|
"grad_norm": 1.0178852691492593, |
|
"learning_rate": 1.742651164334486e-05, |
|
"loss": 0.8887, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.25822964806256665, |
|
"grad_norm": 1.0183556677477257, |
|
"learning_rate": 1.7401768056152083e-05, |
|
"loss": 0.7316, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.2593672236046925, |
|
"grad_norm": 0.9967963519853643, |
|
"learning_rate": 1.7376923832456665e-05, |
|
"loss": 0.7435, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.26050479914681834, |
|
"grad_norm": 0.9398084773931321, |
|
"learning_rate": 1.7351979310047603e-05, |
|
"loss": 0.8236, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.2616423746889442, |
|
"grad_norm": 0.9702704929040705, |
|
"learning_rate": 1.7326934828077574e-05, |
|
"loss": 0.7236, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.26277995023107004, |
|
"grad_norm": 0.9607473172752993, |
|
"learning_rate": 1.7301790727058344e-05, |
|
"loss": 0.8514, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.2639175257731959, |
|
"grad_norm": 0.99121388037334, |
|
"learning_rate": 1.727654734885612e-05, |
|
"loss": 0.7258, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.26505510131532173, |
|
"grad_norm": 0.9178575823375061, |
|
"learning_rate": 1.7251205036686913e-05, |
|
"loss": 0.7234, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.2661926768574476, |
|
"grad_norm": 0.9301556936555667, |
|
"learning_rate": 1.7225764135111867e-05, |
|
"loss": 0.7478, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.2673302523995734, |
|
"grad_norm": 0.951109081614099, |
|
"learning_rate": 1.7200224990032577e-05, |
|
"loss": 0.7419, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.26846782794169927, |
|
"grad_norm": 0.9160961470408167, |
|
"learning_rate": 1.7174587948686375e-05, |
|
"loss": 0.826, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.2696054034838251, |
|
"grad_norm": 1.057499137690602, |
|
"learning_rate": 1.7148853359641627e-05, |
|
"loss": 0.7286, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.27074297902595096, |
|
"grad_norm": 0.9295330730635258, |
|
"learning_rate": 1.7123021572792982e-05, |
|
"loss": 0.7782, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.2718805545680768, |
|
"grad_norm": 0.9975452115416533, |
|
"learning_rate": 1.7097092939356622e-05, |
|
"loss": 0.7519, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.27301813011020265, |
|
"grad_norm": 0.9230592215185579, |
|
"learning_rate": 1.7071067811865477e-05, |
|
"loss": 0.7086, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2741557056523285, |
|
"grad_norm": 0.9191258196691511, |
|
"learning_rate": 1.7044946544164434e-05, |
|
"loss": 0.7791, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.27529328119445434, |
|
"grad_norm": 0.9937503180909131, |
|
"learning_rate": 1.7018729491405537e-05, |
|
"loss": 0.7141, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.2764308567365802, |
|
"grad_norm": 0.9546990458403812, |
|
"learning_rate": 1.6992417010043144e-05, |
|
"loss": 0.9387, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.27756843227870603, |
|
"grad_norm": 0.9694759038261794, |
|
"learning_rate": 1.6966009457829088e-05, |
|
"loss": 0.7102, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.2787060078208318, |
|
"grad_norm": 1.010514886312666, |
|
"learning_rate": 1.6939507193807818e-05, |
|
"loss": 0.7188, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.27984358336295767, |
|
"grad_norm": 0.9552012342525721, |
|
"learning_rate": 1.6912910578311503e-05, |
|
"loss": 0.7223, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.2809811589050835, |
|
"grad_norm": 0.9577918842168219, |
|
"learning_rate": 1.688621997295515e-05, |
|
"loss": 0.8556, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.28211873444720936, |
|
"grad_norm": 0.9347260963706611, |
|
"learning_rate": 1.685943574063166e-05, |
|
"loss": 0.6894, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.2832563099893352, |
|
"grad_norm": 0.9796117501965532, |
|
"learning_rate": 1.6832558245506937e-05, |
|
"loss": 0.8972, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.28439388553146105, |
|
"grad_norm": 0.9620774960278952, |
|
"learning_rate": 1.6805587853014895e-05, |
|
"loss": 0.8795, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2855314610735869, |
|
"grad_norm": 0.9779292355070162, |
|
"learning_rate": 1.6778524929852513e-05, |
|
"loss": 0.7849, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.28666903661571275, |
|
"grad_norm": 1.1391374042408926, |
|
"learning_rate": 1.6751369843974842e-05, |
|
"loss": 0.8935, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.2878066121578386, |
|
"grad_norm": 0.9763354939311976, |
|
"learning_rate": 1.6724122964590002e-05, |
|
"loss": 0.6899, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.28894418769996444, |
|
"grad_norm": 1.0505980346872885, |
|
"learning_rate": 1.6696784662154165e-05, |
|
"loss": 0.7091, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.2900817632420903, |
|
"grad_norm": 0.961207072486454, |
|
"learning_rate": 1.666935530836651e-05, |
|
"loss": 0.77, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.29121933878421613, |
|
"grad_norm": 1.022189843385111, |
|
"learning_rate": 1.6641835276164182e-05, |
|
"loss": 0.8021, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.292356914326342, |
|
"grad_norm": 0.9676687321518108, |
|
"learning_rate": 1.6614224939717217e-05, |
|
"loss": 0.7338, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.2934944898684678, |
|
"grad_norm": 1.031020389826052, |
|
"learning_rate": 1.658652467442345e-05, |
|
"loss": 0.8083, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.29463206541059367, |
|
"grad_norm": 0.9420183191034406, |
|
"learning_rate": 1.6558734856903406e-05, |
|
"loss": 0.651, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.2957696409527195, |
|
"grad_norm": 0.9746154980433869, |
|
"learning_rate": 1.6530855864995194e-05, |
|
"loss": 0.8235, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.29690721649484536, |
|
"grad_norm": 0.9637613954906972, |
|
"learning_rate": 1.650288807774937e-05, |
|
"loss": 0.7806, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.2980447920369712, |
|
"grad_norm": 0.9521280823765289, |
|
"learning_rate": 1.6474831875423765e-05, |
|
"loss": 0.7848, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.29918236757909705, |
|
"grad_norm": 0.9106705929775119, |
|
"learning_rate": 1.644668763947833e-05, |
|
"loss": 0.7158, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.3003199431212229, |
|
"grad_norm": 1.1111068932353543, |
|
"learning_rate": 1.6418455752569945e-05, |
|
"loss": 0.7949, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.30145751866334874, |
|
"grad_norm": 0.993566678238848, |
|
"learning_rate": 1.639013659854722e-05, |
|
"loss": 0.7514, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.3025950942054746, |
|
"grad_norm": 0.9530703838625129, |
|
"learning_rate": 1.6361730562445264e-05, |
|
"loss": 0.7807, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.30373266974760044, |
|
"grad_norm": 0.8949330809420218, |
|
"learning_rate": 1.6333238030480473e-05, |
|
"loss": 0.6888, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.3048702452897263, |
|
"grad_norm": 0.8988538789326558, |
|
"learning_rate": 1.6304659390045253e-05, |
|
"loss": 0.7264, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.30600782083185213, |
|
"grad_norm": 1.0074831146025507, |
|
"learning_rate": 1.627599502970277e-05, |
|
"loss": 0.7832, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.307145396373978, |
|
"grad_norm": 0.9898618247508415, |
|
"learning_rate": 1.624724533918166e-05, |
|
"loss": 0.7412, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3082829719161038, |
|
"grad_norm": 0.9571059882247707, |
|
"learning_rate": 1.6218410709370735e-05, |
|
"loss": 0.7214, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.30942054745822967, |
|
"grad_norm": 1.0048100811814527, |
|
"learning_rate": 1.6189491532313665e-05, |
|
"loss": 0.8121, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.3105581230003555, |
|
"grad_norm": 0.887082066422311, |
|
"learning_rate": 1.6160488201203643e-05, |
|
"loss": 0.8081, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.31169569854248136, |
|
"grad_norm": 1.003918167712565, |
|
"learning_rate": 1.6131401110378045e-05, |
|
"loss": 0.7442, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.3128332740846072, |
|
"grad_norm": 0.9479739767796763, |
|
"learning_rate": 1.6102230655313076e-05, |
|
"loss": 0.7854, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.31397084962673305, |
|
"grad_norm": 0.9644306636148626, |
|
"learning_rate": 1.607297723261837e-05, |
|
"loss": 0.852, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.3151084251688589, |
|
"grad_norm": 0.9590037676007116, |
|
"learning_rate": 1.6043641240031623e-05, |
|
"loss": 0.7551, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.3162460007109847, |
|
"grad_norm": 0.8703921927089772, |
|
"learning_rate": 1.6014223076413173e-05, |
|
"loss": 0.7418, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.31738357625311053, |
|
"grad_norm": 0.9581454527420551, |
|
"learning_rate": 1.5984723141740578e-05, |
|
"loss": 0.7117, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.3185211517952364, |
|
"grad_norm": 0.9806012064667501, |
|
"learning_rate": 1.595514183710317e-05, |
|
"loss": 0.8572, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3196587273373622, |
|
"grad_norm": 0.8953483309121879, |
|
"learning_rate": 1.592547956469662e-05, |
|
"loss": 0.8641, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.32079630287948807, |
|
"grad_norm": 0.8922059600985867, |
|
"learning_rate": 1.5895736727817457e-05, |
|
"loss": 0.7629, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.3219338784216139, |
|
"grad_norm": 0.924903003131493, |
|
"learning_rate": 1.5865913730857583e-05, |
|
"loss": 0.6772, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.32307145396373976, |
|
"grad_norm": 0.9253210263661452, |
|
"learning_rate": 1.5836010979298785e-05, |
|
"loss": 0.7024, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.3242090295058656, |
|
"grad_norm": 0.9598832346761834, |
|
"learning_rate": 1.580602887970721e-05, |
|
"loss": 0.6624, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.32534660504799146, |
|
"grad_norm": 0.9502477784841414, |
|
"learning_rate": 1.5775967839727843e-05, |
|
"loss": 0.7923, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.3264841805901173, |
|
"grad_norm": 0.9306848601779047, |
|
"learning_rate": 1.574582826807897e-05, |
|
"loss": 0.6541, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.32762175613224315, |
|
"grad_norm": 0.9355305194278158, |
|
"learning_rate": 1.5715610574546612e-05, |
|
"loss": 0.6918, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.328759331674369, |
|
"grad_norm": 1.0272254475487725, |
|
"learning_rate": 1.5685315169978955e-05, |
|
"loss": 0.8111, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.32989690721649484, |
|
"grad_norm": 1.0334588348504568, |
|
"learning_rate": 1.565494246628077e-05, |
|
"loss": 0.7289, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3310344827586207, |
|
"grad_norm": 0.9170599752827013, |
|
"learning_rate": 1.562449287640781e-05, |
|
"loss": 0.7852, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.33217205830074653, |
|
"grad_norm": 0.9222463038652008, |
|
"learning_rate": 1.5593966814361183e-05, |
|
"loss": 0.7118, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.3333096338428724, |
|
"grad_norm": 1.0228401075311697, |
|
"learning_rate": 1.556336469518174e-05, |
|
"loss": 0.7166, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.3344472093849982, |
|
"grad_norm": 0.9867811982354191, |
|
"learning_rate": 1.553268693494444e-05, |
|
"loss": 0.7237, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.33558478492712407, |
|
"grad_norm": 0.929004653861396, |
|
"learning_rate": 1.5501933950752655e-05, |
|
"loss": 0.7315, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.3367223604692499, |
|
"grad_norm": 0.9711989755609595, |
|
"learning_rate": 1.5471106160732543e-05, |
|
"loss": 0.6733, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.33785993601137576, |
|
"grad_norm": 1.0543195742090818, |
|
"learning_rate": 1.5440203984027323e-05, |
|
"loss": 0.7116, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.3389975115535016, |
|
"grad_norm": 1.0061655744197207, |
|
"learning_rate": 1.5409227840791617e-05, |
|
"loss": 0.7517, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.34013508709562745, |
|
"grad_norm": 0.9690488111835885, |
|
"learning_rate": 1.5378178152185703e-05, |
|
"loss": 0.8389, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.3412726626377533, |
|
"grad_norm": 0.9582246999248494, |
|
"learning_rate": 1.5347055340369806e-05, |
|
"loss": 0.798, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 879, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 435534387740672.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|