|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 677, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0014771048744460858, |
|
"grad_norm": 2.1287364959716797, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4334, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0029542097488921715, |
|
"grad_norm": 1.5033221244812012, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1642, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.004431314623338257, |
|
"grad_norm": 1.5286452770233154, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9292, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.005908419497784343, |
|
"grad_norm": 1.2362889051437378, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7608, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.007385524372230428, |
|
"grad_norm": 1.0593241453170776, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6653, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.008862629246676515, |
|
"grad_norm": 1.0034171342849731, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5738, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0103397341211226, |
|
"grad_norm": 0.723822832107544, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5328, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.011816838995568686, |
|
"grad_norm": 0.7309075593948364, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5088, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.013293943870014771, |
|
"grad_norm": 0.6442256569862366, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4999, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.014771048744460856, |
|
"grad_norm": 0.6145352721214294, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5046, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01624815361890694, |
|
"grad_norm": 0.5789129734039307, |
|
"learning_rate": 0.0002, |
|
"loss": 0.489, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.01772525849335303, |
|
"grad_norm": 0.5824376940727234, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5328, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.019202363367799114, |
|
"grad_norm": 0.5699394941329956, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4755, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0206794682422452, |
|
"grad_norm": 0.5292893052101135, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4108, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.022156573116691284, |
|
"grad_norm": 0.5537489056587219, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4807, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.023633677991137372, |
|
"grad_norm": 0.546784520149231, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4427, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.025110782865583457, |
|
"grad_norm": 0.5094020962715149, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4617, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.026587887740029542, |
|
"grad_norm": 0.549403190612793, |
|
"learning_rate": 0.0002, |
|
"loss": 0.452, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.028064992614475627, |
|
"grad_norm": 0.47281214594841003, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3916, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.029542097488921712, |
|
"grad_norm": 0.4933842122554779, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4344, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0310192023633678, |
|
"grad_norm": 0.5650342106819153, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5192, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03249630723781388, |
|
"grad_norm": 0.5102580189704895, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4521, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.033973412112259974, |
|
"grad_norm": 0.47124335169792175, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3719, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03545051698670606, |
|
"grad_norm": 0.4769236445426941, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4359, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03692762186115214, |
|
"grad_norm": 0.49603205919265747, |
|
"learning_rate": 0.0002, |
|
"loss": 0.438, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03840472673559823, |
|
"grad_norm": 0.42155203223228455, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3311, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03988183161004431, |
|
"grad_norm": 0.4394625723361969, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4033, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0413589364844904, |
|
"grad_norm": 0.4578387141227722, |
|
"learning_rate": 0.0002, |
|
"loss": 0.399, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04283604135893648, |
|
"grad_norm": 0.4147898256778717, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3599, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.04431314623338257, |
|
"grad_norm": 0.47084635496139526, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4668, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04579025110782865, |
|
"grad_norm": 0.399994820356369, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3108, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.047267355982274745, |
|
"grad_norm": 0.4256761074066162, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3928, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04874446085672083, |
|
"grad_norm": 0.4237106442451477, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4036, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.050221565731166914, |
|
"grad_norm": 0.4622955024242401, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4394, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.051698670605613, |
|
"grad_norm": 0.8845525979995728, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3714, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.053175775480059084, |
|
"grad_norm": 0.3846614360809326, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3625, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05465288035450517, |
|
"grad_norm": 0.41804981231689453, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4027, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.056129985228951254, |
|
"grad_norm": 0.3947773575782776, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3523, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.05760709010339734, |
|
"grad_norm": 0.3716173470020294, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3333, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.059084194977843424, |
|
"grad_norm": 0.4511498808860779, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4104, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.060561299852289516, |
|
"grad_norm": 0.4428117573261261, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4217, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0620384047267356, |
|
"grad_norm": 0.4312277138233185, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4458, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.06351550960118169, |
|
"grad_norm": 0.4207220673561096, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4206, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.06499261447562776, |
|
"grad_norm": 0.4463505744934082, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3911, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.06646971935007386, |
|
"grad_norm": 0.4605293869972229, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4154, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.06794682422451995, |
|
"grad_norm": 0.380751371383667, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3556, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.06942392909896603, |
|
"grad_norm": 0.3776094615459442, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3275, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.07090103397341212, |
|
"grad_norm": 0.39152535796165466, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3749, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0723781388478582, |
|
"grad_norm": 0.4888671338558197, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4408, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.07385524372230429, |
|
"grad_norm": 0.38958850502967834, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3551, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07533234859675036, |
|
"grad_norm": 0.39890560507774353, |
|
"learning_rate": 0.0002, |
|
"loss": 0.387, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.07680945347119646, |
|
"grad_norm": 0.4128841757774353, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3945, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.07828655834564253, |
|
"grad_norm": 0.45516759157180786, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4049, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.07976366322008863, |
|
"grad_norm": 0.4038144648075104, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3789, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.08124076809453472, |
|
"grad_norm": 0.37849175930023193, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3955, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0827178729689808, |
|
"grad_norm": 0.4295189082622528, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4112, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.08419497784342689, |
|
"grad_norm": 0.4347020387649536, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4542, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.08567208271787297, |
|
"grad_norm": 0.41407692432403564, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4035, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.08714918759231906, |
|
"grad_norm": 0.33283814787864685, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2851, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.08862629246676514, |
|
"grad_norm": 0.39427581429481506, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4494, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09010339734121123, |
|
"grad_norm": 1.3357727527618408, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3335, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0915805022156573, |
|
"grad_norm": 0.37050360441207886, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3224, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0930576070901034, |
|
"grad_norm": 0.36000698804855347, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3679, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.09453471196454949, |
|
"grad_norm": 0.3739371597766876, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4041, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.09601181683899557, |
|
"grad_norm": 0.3365491032600403, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3462, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.09748892171344166, |
|
"grad_norm": 0.3357471823692322, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3416, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.09896602658788774, |
|
"grad_norm": 0.38020288944244385, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3521, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.10044313146233383, |
|
"grad_norm": 0.37143656611442566, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3873, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.1019202363367799, |
|
"grad_norm": 0.3613298535346985, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3695, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.103397341211226, |
|
"grad_norm": 0.3881225287914276, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3686, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.10487444608567208, |
|
"grad_norm": 0.35213181376457214, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3276, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.10635155096011817, |
|
"grad_norm": 0.3477317988872528, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3261, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.10782865583456426, |
|
"grad_norm": 0.326730340719223, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2784, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.10930576070901034, |
|
"grad_norm": 0.3316071629524231, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3317, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.11078286558345643, |
|
"grad_norm": 0.37388283014297485, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3845, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.11225997045790251, |
|
"grad_norm": 0.39761313796043396, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4043, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1137370753323486, |
|
"grad_norm": 0.35033172369003296, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3212, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.11521418020679468, |
|
"grad_norm": 0.7551948428153992, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3387, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.11669128508124077, |
|
"grad_norm": 0.2940291166305542, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2742, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.11816838995568685, |
|
"grad_norm": 0.4048764407634735, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4176, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11964549483013294, |
|
"grad_norm": 0.36520177125930786, |
|
"learning_rate": 0.0002, |
|
"loss": 0.317, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.12112259970457903, |
|
"grad_norm": 0.3602144718170166, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3648, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.12259970457902511, |
|
"grad_norm": 0.34669214487075806, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3389, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.1240768094534712, |
|
"grad_norm": 0.34198257327079773, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3174, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.1255539143279173, |
|
"grad_norm": 0.3409755825996399, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3376, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.12703101920236337, |
|
"grad_norm": 0.38363194465637207, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4002, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.12850812407680945, |
|
"grad_norm": 0.35614731907844543, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3581, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.12998522895125553, |
|
"grad_norm": 0.3808327615261078, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3966, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.13146233382570163, |
|
"grad_norm": 0.3924517035484314, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4161, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.1329394387001477, |
|
"grad_norm": 0.3589531183242798, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3233, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1344165435745938, |
|
"grad_norm": 0.37429341673851013, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3778, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.1358936484490399, |
|
"grad_norm": 0.3594294488430023, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3472, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.13737075332348597, |
|
"grad_norm": 0.3481505215167999, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2961, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.13884785819793205, |
|
"grad_norm": 0.3697575330734253, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3954, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.14032496307237813, |
|
"grad_norm": 0.3154103457927704, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3148, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.14180206794682423, |
|
"grad_norm": 0.32966312766075134, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3211, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.1432791728212703, |
|
"grad_norm": 0.3409123718738556, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3318, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.1447562776957164, |
|
"grad_norm": 0.346122682094574, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3296, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.14623338257016247, |
|
"grad_norm": 0.35875195264816284, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3884, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.14771048744460857, |
|
"grad_norm": 0.3223486542701721, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3282, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14918759231905465, |
|
"grad_norm": 0.34657180309295654, |
|
"learning_rate": 0.0002, |
|
"loss": 0.364, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.15066469719350073, |
|
"grad_norm": 0.34456005692481995, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3541, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.15214180206794684, |
|
"grad_norm": 0.3482792377471924, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3435, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.1536189069423929, |
|
"grad_norm": 0.37781214714050293, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3716, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.155096011816839, |
|
"grad_norm": 0.46567779779434204, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3143, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.15657311669128507, |
|
"grad_norm": 0.32534581422805786, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3561, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.15805022156573117, |
|
"grad_norm": 0.3262612521648407, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3396, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.15952732644017725, |
|
"grad_norm": 0.3691346049308777, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4014, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.16100443131462333, |
|
"grad_norm": 0.36267197132110596, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3768, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.16248153618906944, |
|
"grad_norm": 0.3206377923488617, |
|
"learning_rate": 0.0002, |
|
"loss": 0.326, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.16395864106351551, |
|
"grad_norm": 0.32631710171699524, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3438, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.1654357459379616, |
|
"grad_norm": 0.33969393372535706, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3602, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.16691285081240767, |
|
"grad_norm": 0.361987829208374, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3378, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.16838995568685378, |
|
"grad_norm": 0.33116045594215393, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3444, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.16986706056129985, |
|
"grad_norm": 0.3474065363407135, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3717, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.17134416543574593, |
|
"grad_norm": 0.3335750699043274, |
|
"learning_rate": 0.0002, |
|
"loss": 0.351, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.172821270310192, |
|
"grad_norm": 0.34676527976989746, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3536, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.17429837518463812, |
|
"grad_norm": 0.36145490407943726, |
|
"learning_rate": 0.0002, |
|
"loss": 0.407, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.1757754800590842, |
|
"grad_norm": 0.3694964647293091, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4143, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.17725258493353027, |
|
"grad_norm": 0.31005293130874634, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3306, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.17872968980797638, |
|
"grad_norm": 0.32366085052490234, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3342, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.18020679468242246, |
|
"grad_norm": 0.3252504765987396, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3503, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.18168389955686853, |
|
"grad_norm": 0.32292550802230835, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3694, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.1831610044313146, |
|
"grad_norm": 0.32740291953086853, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3296, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.18463810930576072, |
|
"grad_norm": 0.3438139855861664, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3107, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1861152141802068, |
|
"grad_norm": 0.33904099464416504, |
|
"learning_rate": 0.0002, |
|
"loss": 0.39, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.18759231905465287, |
|
"grad_norm": 0.3464205265045166, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3679, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.18906942392909898, |
|
"grad_norm": 0.3387203514575958, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3375, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.19054652880354506, |
|
"grad_norm": 0.40050801634788513, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3965, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.19202363367799113, |
|
"grad_norm": 0.31067872047424316, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3108, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1935007385524372, |
|
"grad_norm": 0.35977062582969666, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4023, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.19497784342688332, |
|
"grad_norm": 0.3153740167617798, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3317, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.1964549483013294, |
|
"grad_norm": 0.3306857645511627, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3408, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.19793205317577547, |
|
"grad_norm": 0.32012930512428284, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3218, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.19940915805022155, |
|
"grad_norm": 0.3159703314304352, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3481, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.20088626292466766, |
|
"grad_norm": 0.3230080306529999, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3779, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.20236336779911374, |
|
"grad_norm": 0.34753701090812683, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3775, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.2038404726735598, |
|
"grad_norm": 0.3315640687942505, |
|
"learning_rate": 0.0002, |
|
"loss": 0.339, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.20531757754800592, |
|
"grad_norm": 0.33685439825057983, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3575, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.206794682422452, |
|
"grad_norm": 0.3179871439933777, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3338, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.20827178729689808, |
|
"grad_norm": 0.32391220331192017, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3665, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.20974889217134415, |
|
"grad_norm": 0.3102681338787079, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2948, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.21122599704579026, |
|
"grad_norm": 0.33224979043006897, |
|
"learning_rate": 0.0002, |
|
"loss": 0.392, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.21270310192023634, |
|
"grad_norm": 0.30173906683921814, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2775, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.21418020679468242, |
|
"grad_norm": 0.3212149739265442, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3408, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.21565731166912852, |
|
"grad_norm": 0.3113839328289032, |
|
"learning_rate": 0.0002, |
|
"loss": 0.314, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.2171344165435746, |
|
"grad_norm": 0.3435472548007965, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3617, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.21861152141802068, |
|
"grad_norm": 0.3423033058643341, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3523, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.22008862629246675, |
|
"grad_norm": 0.3202575445175171, |
|
"learning_rate": 0.0002, |
|
"loss": 0.349, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.22156573116691286, |
|
"grad_norm": 0.2999582886695862, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2906, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.22304283604135894, |
|
"grad_norm": 0.33576205372810364, |
|
"learning_rate": 0.0002, |
|
"loss": 0.329, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.22451994091580502, |
|
"grad_norm": 0.31811273097991943, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3151, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.2259970457902511, |
|
"grad_norm": 0.34126049280166626, |
|
"learning_rate": 0.0002, |
|
"loss": 0.335, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.2274741506646972, |
|
"grad_norm": 0.29068347811698914, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2996, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.22895125553914328, |
|
"grad_norm": 0.3677709698677063, |
|
"learning_rate": 0.0002, |
|
"loss": 0.357, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.23042836041358936, |
|
"grad_norm": 0.319380521774292, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3283, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.23190546528803546, |
|
"grad_norm": 0.2935948669910431, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2755, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.23338257016248154, |
|
"grad_norm": 0.30784815549850464, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3171, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.23485967503692762, |
|
"grad_norm": 0.3345930874347687, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3526, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.2363367799113737, |
|
"grad_norm": 0.3269497752189636, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3492, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2378138847858198, |
|
"grad_norm": 0.32217973470687866, |
|
"learning_rate": 0.0002, |
|
"loss": 0.36, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.23929098966026588, |
|
"grad_norm": 0.3381323516368866, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3534, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.24076809453471196, |
|
"grad_norm": 0.3131888210773468, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3224, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.24224519940915806, |
|
"grad_norm": 0.30917319655418396, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3132, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.24372230428360414, |
|
"grad_norm": 0.31469786167144775, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3218, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.24519940915805022, |
|
"grad_norm": 0.31420794129371643, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3471, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.2466765140324963, |
|
"grad_norm": 0.31471043825149536, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3056, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.2481536189069424, |
|
"grad_norm": 0.30315864086151123, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3355, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.24963072378138848, |
|
"grad_norm": 0.29710718989372253, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3077, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.2511078286558346, |
|
"grad_norm": 0.30408531427383423, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3087, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.25258493353028066, |
|
"grad_norm": 0.29702916741371155, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2993, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.25406203840472674, |
|
"grad_norm": 0.2939663827419281, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2996, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.2555391432791728, |
|
"grad_norm": 0.36591342091560364, |
|
"learning_rate": 0.0002, |
|
"loss": 0.356, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.2570162481536189, |
|
"grad_norm": 0.30867043137550354, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2961, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.258493353028065, |
|
"grad_norm": 0.34252026677131653, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3849, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.25997045790251105, |
|
"grad_norm": 0.34753838181495667, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3838, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.2614475627769572, |
|
"grad_norm": 0.31399980187416077, |
|
"learning_rate": 0.0002, |
|
"loss": 0.33, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.26292466765140327, |
|
"grad_norm": 0.32648637890815735, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3678, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.26440177252584934, |
|
"grad_norm": 0.2866675853729248, |
|
"learning_rate": 0.0002, |
|
"loss": 0.295, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.2658788774002954, |
|
"grad_norm": 0.32054954767227173, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3342, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2673559822747415, |
|
"grad_norm": 0.30476486682891846, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3381, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.2688330871491876, |
|
"grad_norm": 0.2891450524330139, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2984, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.27031019202363366, |
|
"grad_norm": 0.3023356795310974, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2991, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.2717872968980798, |
|
"grad_norm": 0.31025779247283936, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3198, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.27326440177252587, |
|
"grad_norm": 0.27903226017951965, |
|
"learning_rate": 0.0002, |
|
"loss": 0.274, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.27474150664697194, |
|
"grad_norm": 0.2925949692726135, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3051, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.276218611521418, |
|
"grad_norm": 0.3387667238712311, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3677, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.2776957163958641, |
|
"grad_norm": 0.316540390253067, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3196, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.2791728212703102, |
|
"grad_norm": 0.3089348375797272, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3338, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.28064992614475626, |
|
"grad_norm": 0.313431054353714, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3178, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2821270310192024, |
|
"grad_norm": 0.30025985836982727, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3086, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.28360413589364847, |
|
"grad_norm": 0.3058534860610962, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3128, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.28508124076809455, |
|
"grad_norm": 0.334710031747818, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3418, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.2865583456425406, |
|
"grad_norm": 0.3021548092365265, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2995, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.2880354505169867, |
|
"grad_norm": 0.27398747205734253, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2743, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.2895125553914328, |
|
"grad_norm": 0.33194372057914734, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2824, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.29098966026587886, |
|
"grad_norm": 0.3193664848804474, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3361, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.29246676514032494, |
|
"grad_norm": 0.3320102393627167, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3154, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.29394387001477107, |
|
"grad_norm": 0.2951314449310303, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2699, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.29542097488921715, |
|
"grad_norm": 0.3117165267467499, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3359, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2968980797636632, |
|
"grad_norm": 0.30885782837867737, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3181, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.2983751846381093, |
|
"grad_norm": 0.3114778399467468, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3409, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.2998522895125554, |
|
"grad_norm": 0.32142388820648193, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3491, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.30132939438700146, |
|
"grad_norm": 0.3159630000591278, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3176, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.30280649926144754, |
|
"grad_norm": 0.2813749313354492, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2745, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.30428360413589367, |
|
"grad_norm": 0.3174036145210266, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3527, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.30576070901033975, |
|
"grad_norm": 0.311678409576416, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3075, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.3072378138847858, |
|
"grad_norm": 0.2867993712425232, |
|
"learning_rate": 0.0002, |
|
"loss": 0.32, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.3087149187592319, |
|
"grad_norm": 0.29298824071884155, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3226, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.310192023633678, |
|
"grad_norm": 0.3173938989639282, |
|
"learning_rate": 0.0002, |
|
"loss": 0.32, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.31166912850812406, |
|
"grad_norm": 0.27944210171699524, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2825, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.31314623338257014, |
|
"grad_norm": 0.3196215331554413, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3321, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.31462333825701627, |
|
"grad_norm": 0.3193184733390808, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3394, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.31610044313146235, |
|
"grad_norm": 0.2783777713775635, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3134, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.3175775480059084, |
|
"grad_norm": 0.35627251863479614, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3973, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.3190546528803545, |
|
"grad_norm": 0.32312896847724915, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3388, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.3205317577548006, |
|
"grad_norm": 0.2931472659111023, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3134, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.32200886262924666, |
|
"grad_norm": 0.3059196174144745, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3249, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.32348596750369274, |
|
"grad_norm": 0.3171478807926178, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3459, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.3249630723781389, |
|
"grad_norm": 0.31810346245765686, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3455, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.32644017725258495, |
|
"grad_norm": 0.30696892738342285, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3037, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.32791728212703103, |
|
"grad_norm": 0.3519222140312195, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3585, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.3293943870014771, |
|
"grad_norm": 0.2762470245361328, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2615, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.3308714918759232, |
|
"grad_norm": 0.2909640967845917, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2994, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.33234859675036926, |
|
"grad_norm": 0.3310638964176178, |
|
"learning_rate": 0.0002, |
|
"loss": 0.368, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.33382570162481534, |
|
"grad_norm": 0.337228387594223, |
|
"learning_rate": 0.0002, |
|
"loss": 0.358, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.3353028064992615, |
|
"grad_norm": 0.3182266652584076, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3425, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.33677991137370755, |
|
"grad_norm": 0.32053616642951965, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3604, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.33825701624815363, |
|
"grad_norm": 0.3377324342727661, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3783, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.3397341211225997, |
|
"grad_norm": 0.28743067383766174, |
|
"learning_rate": 0.0002, |
|
"loss": 0.304, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3412112259970458, |
|
"grad_norm": 0.30108213424682617, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3129, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.34268833087149186, |
|
"grad_norm": 0.3191213607788086, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3331, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.34416543574593794, |
|
"grad_norm": 0.2999110519886017, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3074, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.345642540620384, |
|
"grad_norm": 0.2682500183582306, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2635, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.34711964549483015, |
|
"grad_norm": 0.2817941904067993, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3048, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.34859675036927623, |
|
"grad_norm": 0.3110464811325073, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3228, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.3500738552437223, |
|
"grad_norm": 0.3088606297969818, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3161, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.3515509601181684, |
|
"grad_norm": 0.2990322411060333, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3085, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.35302806499261447, |
|
"grad_norm": 0.33097386360168457, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3615, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.35450516986706054, |
|
"grad_norm": 0.3397606313228607, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3957, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.3559822747415066, |
|
"grad_norm": 0.2756197452545166, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2731, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.35745937961595275, |
|
"grad_norm": 0.3435852825641632, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3855, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.35893648449039883, |
|
"grad_norm": 0.33727383613586426, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3101, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.3604135893648449, |
|
"grad_norm": 0.3684369921684265, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3378, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.361890694239291, |
|
"grad_norm": 0.3006575107574463, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3295, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.36336779911373707, |
|
"grad_norm": 0.31223273277282715, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2977, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.36484490398818314, |
|
"grad_norm": 0.3001905381679535, |
|
"learning_rate": 0.0002, |
|
"loss": 0.294, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.3663220088626292, |
|
"grad_norm": 0.2907404899597168, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2839, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.36779911373707536, |
|
"grad_norm": 0.31060346961021423, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3333, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.36927621861152143, |
|
"grad_norm": 0.3394862413406372, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3217, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3707533234859675, |
|
"grad_norm": 0.2912856340408325, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3072, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.3722304283604136, |
|
"grad_norm": 0.2991478741168976, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3349, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.37370753323485967, |
|
"grad_norm": 0.304868221282959, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3142, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.37518463810930575, |
|
"grad_norm": 0.3008173704147339, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3166, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.3766617429837518, |
|
"grad_norm": 0.290526807308197, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3228, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.37813884785819796, |
|
"grad_norm": 0.2846904695034027, |
|
"learning_rate": 0.0002, |
|
"loss": 0.31, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.37961595273264404, |
|
"grad_norm": 0.306904137134552, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3238, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.3810930576070901, |
|
"grad_norm": 0.30683666467666626, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3327, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.3825701624815362, |
|
"grad_norm": 0.2824447751045227, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2962, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.38404726735598227, |
|
"grad_norm": 0.29804757237434387, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3025, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.38552437223042835, |
|
"grad_norm": 0.3133246600627899, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3095, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.3870014771048744, |
|
"grad_norm": 0.3098774254322052, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3031, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.38847858197932056, |
|
"grad_norm": 0.3248344361782074, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3402, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.38995568685376664, |
|
"grad_norm": 0.30645236372947693, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3277, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.3914327917282127, |
|
"grad_norm": 0.29753726720809937, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3322, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.3929098966026588, |
|
"grad_norm": 0.33593639731407166, |
|
"learning_rate": 0.0002, |
|
"loss": 0.337, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.39438700147710487, |
|
"grad_norm": 0.3059685528278351, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2896, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.39586410635155095, |
|
"grad_norm": 0.30055829882621765, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3385, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.397341211225997, |
|
"grad_norm": 0.27567949891090393, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2799, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.3988183161004431, |
|
"grad_norm": 0.33319681882858276, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3735, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.40029542097488924, |
|
"grad_norm": 0.28851690888404846, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2934, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.4017725258493353, |
|
"grad_norm": 0.3188093900680542, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3276, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.4032496307237814, |
|
"grad_norm": 0.29944342374801636, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3351, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.40472673559822747, |
|
"grad_norm": 0.31611138582229614, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3616, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.40620384047267355, |
|
"grad_norm": 0.3243541419506073, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3394, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.4076809453471196, |
|
"grad_norm": 0.31130653619766235, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3182, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.4091580502215657, |
|
"grad_norm": 0.2761830687522888, |
|
"learning_rate": 0.0002, |
|
"loss": 0.23, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.41063515509601184, |
|
"grad_norm": 0.3256094455718994, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3921, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.4121122599704579, |
|
"grad_norm": 0.30812302231788635, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3559, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.413589364844904, |
|
"grad_norm": 0.28198400139808655, |
|
"learning_rate": 0.0002, |
|
"loss": 0.285, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4150664697193501, |
|
"grad_norm": 0.2873023450374603, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2963, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.41654357459379615, |
|
"grad_norm": 0.29413530230522156, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2885, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.41802067946824223, |
|
"grad_norm": 0.2963588237762451, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2996, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.4194977843426883, |
|
"grad_norm": 0.2581465542316437, |
|
"learning_rate": 0.0002, |
|
"loss": 0.255, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.42097488921713444, |
|
"grad_norm": 0.3365771472454071, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3473, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.4224519940915805, |
|
"grad_norm": 0.3092253804206848, |
|
"learning_rate": 0.0002, |
|
"loss": 0.29, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.4239290989660266, |
|
"grad_norm": 0.300626277923584, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3183, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.4254062038404727, |
|
"grad_norm": 0.3320425748825073, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3322, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.42688330871491875, |
|
"grad_norm": 0.2749597728252411, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2995, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.42836041358936483, |
|
"grad_norm": 0.280134916305542, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2729, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4298375184638109, |
|
"grad_norm": 0.27060407400131226, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2694, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.43131462333825704, |
|
"grad_norm": 0.28500011563301086, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2852, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.4327917282127031, |
|
"grad_norm": 0.2733040452003479, |
|
"learning_rate": 0.0002, |
|
"loss": 0.276, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.4342688330871492, |
|
"grad_norm": 0.30365538597106934, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2806, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.4357459379615953, |
|
"grad_norm": 0.3079434335231781, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3014, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.43722304283604135, |
|
"grad_norm": 0.2746562659740448, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2601, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.43870014771048743, |
|
"grad_norm": 0.3027852475643158, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2978, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.4401772525849335, |
|
"grad_norm": 0.2862493395805359, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2712, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.44165435745937964, |
|
"grad_norm": 0.30820953845977783, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3198, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.4431314623338257, |
|
"grad_norm": 0.2891389727592468, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2914, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4446085672082718, |
|
"grad_norm": 0.29976293444633484, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3155, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.4460856720827179, |
|
"grad_norm": 0.26029616594314575, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2585, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.44756277695716395, |
|
"grad_norm": 0.2925141751766205, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2799, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.44903988183161003, |
|
"grad_norm": 0.3378995358943939, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3397, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.4505169867060561, |
|
"grad_norm": 0.3140377104282379, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3065, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.4519940915805022, |
|
"grad_norm": 0.30882659554481506, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2976, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.4534711964549483, |
|
"grad_norm": 0.2986995577812195, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3086, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.4549483013293944, |
|
"grad_norm": 0.37128734588623047, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3018, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.4564254062038405, |
|
"grad_norm": 0.2967352271080017, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2855, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.45790251107828656, |
|
"grad_norm": 0.3116573691368103, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3248, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.45937961595273263, |
|
"grad_norm": 0.27394649386405945, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2894, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.4608567208271787, |
|
"grad_norm": 0.31190183758735657, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3311, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.4623338257016248, |
|
"grad_norm": 0.28978461027145386, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2896, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.4638109305760709, |
|
"grad_norm": 0.29586443305015564, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3062, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.465288035450517, |
|
"grad_norm": 0.3034004271030426, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2797, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.4667651403249631, |
|
"grad_norm": 0.3083277940750122, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2912, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.46824224519940916, |
|
"grad_norm": 0.31153154373168945, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3403, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.46971935007385524, |
|
"grad_norm": 0.26065292954444885, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2289, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.4711964549483013, |
|
"grad_norm": 0.29736757278442383, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3202, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.4726735598227474, |
|
"grad_norm": 0.362541526556015, |
|
"learning_rate": 0.0002, |
|
"loss": 0.393, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.4741506646971935, |
|
"grad_norm": 0.3045463263988495, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2843, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.4756277695716396, |
|
"grad_norm": 0.33905521035194397, |
|
"learning_rate": 0.0002, |
|
"loss": 0.368, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.4771048744460857, |
|
"grad_norm": 0.3574953079223633, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2273, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.47858197932053176, |
|
"grad_norm": 0.3134016990661621, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3134, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.48005908419497784, |
|
"grad_norm": 0.32262158393859863, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3028, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.4815361890694239, |
|
"grad_norm": 0.26441511511802673, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2447, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.48301329394387, |
|
"grad_norm": 0.3419596552848816, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3258, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.4844903988183161, |
|
"grad_norm": 0.3031555116176605, |
|
"learning_rate": 0.0002, |
|
"loss": 0.322, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.4859675036927622, |
|
"grad_norm": 0.29226183891296387, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2803, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.4874446085672083, |
|
"grad_norm": 0.2874895930290222, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2824, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.48892171344165436, |
|
"grad_norm": 0.31009188294410706, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3218, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.49039881831610044, |
|
"grad_norm": 0.31250134110450745, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2972, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.4918759231905465, |
|
"grad_norm": 0.30231741070747375, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3255, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.4933530280649926, |
|
"grad_norm": 0.32139065861701965, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3712, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.4948301329394387, |
|
"grad_norm": 0.2788805365562439, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3069, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.4963072378138848, |
|
"grad_norm": 0.3206048011779785, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3519, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.4977843426883309, |
|
"grad_norm": 0.316514253616333, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3554, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.49926144756277696, |
|
"grad_norm": 0.3080296516418457, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3366, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.5007385524372231, |
|
"grad_norm": 0.3183678090572357, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2967, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.5022156573116692, |
|
"grad_norm": 0.31313014030456543, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3188, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5036927621861153, |
|
"grad_norm": 0.2989446520805359, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3235, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.5051698670605613, |
|
"grad_norm": 0.2817307412624359, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2959, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.5066469719350074, |
|
"grad_norm": 0.487758606672287, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3308, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.5081240768094535, |
|
"grad_norm": 0.26448920369148254, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2942, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.5096011816838996, |
|
"grad_norm": 0.3182467818260193, |
|
"learning_rate": 0.0002, |
|
"loss": 0.291, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.5110782865583456, |
|
"grad_norm": 0.2950560450553894, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3014, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.5125553914327917, |
|
"grad_norm": 0.3176344633102417, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3425, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.5140324963072378, |
|
"grad_norm": 0.30496424436569214, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3447, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.5155096011816839, |
|
"grad_norm": 0.28272292017936707, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2645, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.51698670605613, |
|
"grad_norm": 0.2600267231464386, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2525, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.518463810930576, |
|
"grad_norm": 0.2765870988368988, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2907, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.5199409158050221, |
|
"grad_norm": 0.30320316553115845, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3404, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.5214180206794683, |
|
"grad_norm": 0.33050844073295593, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3436, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.5228951255539144, |
|
"grad_norm": 0.2716812193393707, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2912, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.5243722304283605, |
|
"grad_norm": 0.2944520115852356, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3212, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.5258493353028065, |
|
"grad_norm": 0.334228515625, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3675, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.5273264401772526, |
|
"grad_norm": 0.27948203682899475, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2648, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.5288035450516987, |
|
"grad_norm": 0.32159537076950073, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3659, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.5302806499261448, |
|
"grad_norm": 0.29499179124832153, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2718, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.5317577548005908, |
|
"grad_norm": 0.3503305912017822, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2972, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5332348596750369, |
|
"grad_norm": 0.29388928413391113, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3063, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.534711964549483, |
|
"grad_norm": 0.2753749191761017, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2706, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.5361890694239291, |
|
"grad_norm": 0.2902815341949463, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2918, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.5376661742983752, |
|
"grad_norm": 0.2991829216480255, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3148, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.5391432791728212, |
|
"grad_norm": 0.3151837885379791, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3187, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.5406203840472673, |
|
"grad_norm": 0.2935662865638733, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3065, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.5420974889217134, |
|
"grad_norm": 0.2787752151489258, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2677, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.5435745937961596, |
|
"grad_norm": 0.2826704680919647, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2673, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.5450516986706057, |
|
"grad_norm": 0.3015994429588318, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3377, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.5465288035450517, |
|
"grad_norm": 0.27995777130126953, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2672, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5480059084194978, |
|
"grad_norm": 0.2902574837207794, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2684, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.5494830132939439, |
|
"grad_norm": 0.2957216501235962, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3061, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.55096011816839, |
|
"grad_norm": 0.2945306599140167, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3248, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.552437223042836, |
|
"grad_norm": 0.2922048568725586, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2987, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.5539143279172821, |
|
"grad_norm": 0.30333656072616577, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3072, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.5553914327917282, |
|
"grad_norm": 0.2855093479156494, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2758, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.5568685376661743, |
|
"grad_norm": 0.2911272943019867, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2722, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.5583456425406204, |
|
"grad_norm": 0.289193332195282, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3035, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.5598227474150664, |
|
"grad_norm": 0.2716032564640045, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2692, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.5612998522895125, |
|
"grad_norm": 0.33022886514663696, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3139, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5627769571639586, |
|
"grad_norm": 0.27433738112449646, |
|
"learning_rate": 0.0002, |
|
"loss": 0.262, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.5642540620384048, |
|
"grad_norm": 0.27598345279693604, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2657, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.5657311669128509, |
|
"grad_norm": 0.28790509700775146, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3024, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.5672082717872969, |
|
"grad_norm": 0.2914026379585266, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2972, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.568685376661743, |
|
"grad_norm": 0.3148682117462158, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2982, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.5701624815361891, |
|
"grad_norm": 0.29025575518608093, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2821, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.5716395864106352, |
|
"grad_norm": 0.267362117767334, |
|
"learning_rate": 0.0002, |
|
"loss": 0.244, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.5731166912850812, |
|
"grad_norm": 0.32638978958129883, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3058, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.5745937961595273, |
|
"grad_norm": 0.31582197546958923, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3285, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.5760709010339734, |
|
"grad_norm": 0.2933168113231659, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2794, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.5775480059084195, |
|
"grad_norm": 0.29435229301452637, |
|
"learning_rate": 0.0002, |
|
"loss": 0.287, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.5790251107828656, |
|
"grad_norm": 0.29208388924598694, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2786, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.5805022156573116, |
|
"grad_norm": 0.2712183892726898, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2708, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.5819793205317577, |
|
"grad_norm": 0.27574923634529114, |
|
"learning_rate": 0.0002, |
|
"loss": 0.269, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.5834564254062038, |
|
"grad_norm": 0.30967944860458374, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2827, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.5849335302806499, |
|
"grad_norm": 0.29655173420906067, |
|
"learning_rate": 0.0002, |
|
"loss": 0.306, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.5864106351550961, |
|
"grad_norm": 1.6516242027282715, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3294, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.5878877400295421, |
|
"grad_norm": 0.2701549828052521, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2451, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.5893648449039882, |
|
"grad_norm": 0.2530956268310547, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2341, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.5908419497784343, |
|
"grad_norm": 0.3096421957015991, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5923190546528804, |
|
"grad_norm": 0.3079342842102051, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3291, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.5937961595273265, |
|
"grad_norm": 0.29586726427078247, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3094, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.5952732644017725, |
|
"grad_norm": 0.28764981031417847, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2961, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.5967503692762186, |
|
"grad_norm": 0.30434954166412354, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2936, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.5982274741506647, |
|
"grad_norm": 0.2840517461299896, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2964, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.5997045790251108, |
|
"grad_norm": 0.2927243113517761, |
|
"learning_rate": 0.0002, |
|
"loss": 0.313, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.6011816838995568, |
|
"grad_norm": 0.26455628871917725, |
|
"learning_rate": 0.0002, |
|
"loss": 0.244, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.6026587887740029, |
|
"grad_norm": 0.327934592962265, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3271, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.604135893648449, |
|
"grad_norm": 0.28486961126327515, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2742, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.6056129985228951, |
|
"grad_norm": 0.3310534656047821, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2888, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6070901033973413, |
|
"grad_norm": 0.32391390204429626, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3123, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.6085672082717873, |
|
"grad_norm": 0.5019936561584473, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3494, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.6100443131462334, |
|
"grad_norm": 0.2915607988834381, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2845, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.6115214180206795, |
|
"grad_norm": 0.34125831723213196, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2985, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.6129985228951256, |
|
"grad_norm": 0.28235796093940735, |
|
"learning_rate": 0.0002, |
|
"loss": 0.29, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.6144756277695717, |
|
"grad_norm": 0.30712956190109253, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2863, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.6159527326440177, |
|
"grad_norm": 0.3005330562591553, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3186, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.6174298375184638, |
|
"grad_norm": 0.4083673655986786, |
|
"learning_rate": 0.0002, |
|
"loss": 0.31, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.6189069423929099, |
|
"grad_norm": 0.2704838812351227, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2649, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.620384047267356, |
|
"grad_norm": 0.29053810238838196, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2789, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.621861152141802, |
|
"grad_norm": 0.329973429441452, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3313, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.6233382570162481, |
|
"grad_norm": 0.31070685386657715, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3045, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.6248153618906942, |
|
"grad_norm": 0.3487679958343506, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3286, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.6262924667651403, |
|
"grad_norm": 0.3269588351249695, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3326, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.6277695716395865, |
|
"grad_norm": 0.26015186309814453, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2457, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.6292466765140325, |
|
"grad_norm": 0.2547609508037567, |
|
"learning_rate": 0.0002, |
|
"loss": 0.262, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.6307237813884786, |
|
"grad_norm": 0.2524930238723755, |
|
"learning_rate": 0.0002, |
|
"loss": 0.23, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.6322008862629247, |
|
"grad_norm": 0.3031904101371765, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3427, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.6336779911373708, |
|
"grad_norm": 0.3007690906524658, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2974, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.6351550960118169, |
|
"grad_norm": 0.28696200251579285, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2911, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.6366322008862629, |
|
"grad_norm": 0.2805304229259491, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2745, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.638109305760709, |
|
"grad_norm": 0.2757206857204437, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2517, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.6395864106351551, |
|
"grad_norm": 0.26851919293403625, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2537, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.6410635155096012, |
|
"grad_norm": 0.28059712052345276, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2616, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.6425406203840472, |
|
"grad_norm": 0.2718868553638458, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2652, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.6440177252584933, |
|
"grad_norm": 0.28253173828125, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2866, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.6454948301329394, |
|
"grad_norm": 0.3183034658432007, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3485, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.6469719350073855, |
|
"grad_norm": 0.2451733946800232, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2312, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.6484490398818316, |
|
"grad_norm": 0.3208939731121063, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3245, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.6499261447562777, |
|
"grad_norm": 0.26186874508857727, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2485, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.6514032496307238, |
|
"grad_norm": 0.27923303842544556, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3221, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.6528803545051699, |
|
"grad_norm": 0.28155946731567383, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2843, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.654357459379616, |
|
"grad_norm": 0.28456977009773254, |
|
"learning_rate": 0.0002, |
|
"loss": 0.296, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.6558345642540621, |
|
"grad_norm": 0.27252209186553955, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2765, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.6573116691285081, |
|
"grad_norm": 0.30992233753204346, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3055, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.6587887740029542, |
|
"grad_norm": 0.30148544907569885, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3059, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.6602658788774003, |
|
"grad_norm": 0.29087716341018677, |
|
"learning_rate": 0.0002, |
|
"loss": 0.257, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.6617429837518464, |
|
"grad_norm": 0.30917656421661377, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3096, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.6632200886262924, |
|
"grad_norm": 0.311759352684021, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2842, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.6646971935007385, |
|
"grad_norm": 0.2612153887748718, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2659, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6661742983751846, |
|
"grad_norm": 0.2954850196838379, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2755, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.6676514032496307, |
|
"grad_norm": 0.3181207776069641, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3163, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.6691285081240768, |
|
"grad_norm": 0.2802172899246216, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3007, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.670605612998523, |
|
"grad_norm": 0.2662009298801422, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2571, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.672082717872969, |
|
"grad_norm": 0.2844826579093933, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3074, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.6735598227474151, |
|
"grad_norm": 0.2758782207965851, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2773, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.6750369276218612, |
|
"grad_norm": 0.2567600607872009, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2742, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.6765140324963073, |
|
"grad_norm": 0.34004896879196167, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2288, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.6779911373707533, |
|
"grad_norm": 0.2983347475528717, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2812, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.6794682422451994, |
|
"grad_norm": 0.29728880524635315, |
|
"learning_rate": 0.0002, |
|
"loss": 0.307, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6809453471196455, |
|
"grad_norm": 0.31359198689460754, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3101, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.6824224519940916, |
|
"grad_norm": 0.27619168162345886, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2779, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.6838995568685377, |
|
"grad_norm": 0.3086981773376465, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3097, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.6853766617429837, |
|
"grad_norm": 0.25216472148895264, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2468, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.6868537666174298, |
|
"grad_norm": 0.26497989892959595, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2528, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.6883308714918759, |
|
"grad_norm": 0.27617159485816956, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2749, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.689807976366322, |
|
"grad_norm": 0.30501970648765564, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3091, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.691285081240768, |
|
"grad_norm": 0.3360370099544525, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3565, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.6927621861152142, |
|
"grad_norm": 0.27070116996765137, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2705, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.6942392909896603, |
|
"grad_norm": 0.29874977469444275, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2996, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.6957163958641064, |
|
"grad_norm": 0.294386088848114, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2894, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.6971935007385525, |
|
"grad_norm": 0.3233067989349365, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3024, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.6986706056129985, |
|
"grad_norm": 0.31051644682884216, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3339, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.7001477104874446, |
|
"grad_norm": 0.28541213274002075, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3097, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.7016248153618907, |
|
"grad_norm": 0.30758950114250183, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3221, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.7031019202363368, |
|
"grad_norm": 0.37882164120674133, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2922, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.7045790251107829, |
|
"grad_norm": 0.2521478533744812, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2535, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.7060561299852289, |
|
"grad_norm": 0.36088013648986816, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2863, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.707533234859675, |
|
"grad_norm": 0.31090793013572693, |
|
"learning_rate": 0.0002, |
|
"loss": 0.311, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.7090103397341211, |
|
"grad_norm": 0.2360762357711792, |
|
"learning_rate": 0.0002, |
|
"loss": 0.216, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.7104874446085672, |
|
"grad_norm": 0.34354060888290405, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2838, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.7119645494830132, |
|
"grad_norm": 0.2607513666152954, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2527, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.7134416543574594, |
|
"grad_norm": 0.3016189634799957, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2782, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.7149187592319055, |
|
"grad_norm": 3.6188247203826904, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2884, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.7163958641063516, |
|
"grad_norm": 0.3072677552700043, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3263, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.7178729689807977, |
|
"grad_norm": 0.28074517846107483, |
|
"learning_rate": 0.0002, |
|
"loss": 0.298, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.7193500738552437, |
|
"grad_norm": 0.3235277831554413, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2615, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.7208271787296898, |
|
"grad_norm": 2.001945734024048, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3925, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.7223042836041359, |
|
"grad_norm": 0.29725533723831177, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2874, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.723781388478582, |
|
"grad_norm": 0.28706061840057373, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2898, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.725258493353028, |
|
"grad_norm": 0.2864967882633209, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2894, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.7267355982274741, |
|
"grad_norm": 0.2824801802635193, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2706, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.7282127031019202, |
|
"grad_norm": 0.27492067217826843, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2516, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.7296898079763663, |
|
"grad_norm": 0.2876488268375397, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2862, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.7311669128508124, |
|
"grad_norm": 0.30311787128448486, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2953, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.7326440177252584, |
|
"grad_norm": 0.277235209941864, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2981, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.7341211225997046, |
|
"grad_norm": 0.30590546131134033, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3557, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.7355982274741507, |
|
"grad_norm": 0.3205493986606598, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3304, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.7370753323485968, |
|
"grad_norm": 0.2640839219093323, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2807, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.7385524372230429, |
|
"grad_norm": 0.27507102489471436, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2565, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.740029542097489, |
|
"grad_norm": 0.2716003954410553, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2684, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.741506646971935, |
|
"grad_norm": 0.2893518805503845, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2708, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.7429837518463811, |
|
"grad_norm": 0.2790103256702423, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2809, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.7444608567208272, |
|
"grad_norm": 0.29344794154167175, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2961, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.7459379615952733, |
|
"grad_norm": 0.3118347227573395, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3184, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.7474150664697193, |
|
"grad_norm": 0.29491183161735535, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3194, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.7488921713441654, |
|
"grad_norm": 0.3007814586162567, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.7503692762186115, |
|
"grad_norm": 0.3303704261779785, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2544, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.7518463810930576, |
|
"grad_norm": 0.28095510601997375, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2774, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.7533234859675036, |
|
"grad_norm": 0.2669844329357147, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2575, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.7548005908419497, |
|
"grad_norm": 0.29896053671836853, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2823, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.7562776957163959, |
|
"grad_norm": 0.27470019459724426, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2843, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.757754800590842, |
|
"grad_norm": 0.27731189131736755, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2504, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.7592319054652881, |
|
"grad_norm": 0.2816368639469147, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2877, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.7607090103397341, |
|
"grad_norm": 0.2858635485172272, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3348, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.7621861152141802, |
|
"grad_norm": 0.2964169979095459, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2775, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.7636632200886263, |
|
"grad_norm": 0.2534787058830261, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2274, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.7651403249630724, |
|
"grad_norm": 0.28982672095298767, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2939, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.7666174298375185, |
|
"grad_norm": 0.27323317527770996, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2842, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.7680945347119645, |
|
"grad_norm": 0.27642300724983215, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2966, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.7695716395864106, |
|
"grad_norm": 0.26599329710006714, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2326, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.7710487444608567, |
|
"grad_norm": 0.2631528079509735, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2771, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.7725258493353028, |
|
"grad_norm": 0.2790911793708801, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2898, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.7740029542097489, |
|
"grad_norm": 0.266379714012146, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2685, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.7754800590841949, |
|
"grad_norm": 0.30508288741111755, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2909, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.7769571639586411, |
|
"grad_norm": 0.2602393329143524, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2305, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.7784342688330872, |
|
"grad_norm": 0.3033619523048401, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2689, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.7799113737075333, |
|
"grad_norm": 0.2758871614933014, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2631, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.7813884785819794, |
|
"grad_norm": 0.2910580039024353, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2844, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.7828655834564254, |
|
"grad_norm": 0.33454883098602295, |
|
"learning_rate": 0.0002, |
|
"loss": 0.301, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.7843426883308715, |
|
"grad_norm": 0.31416234374046326, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2948, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.7858197932053176, |
|
"grad_norm": 0.3144732117652893, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2649, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.7872968980797637, |
|
"grad_norm": 0.2666049599647522, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2602, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.7887740029542097, |
|
"grad_norm": 0.26852795481681824, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2761, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.7902511078286558, |
|
"grad_norm": 0.2828836143016815, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2643, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.7917282127031019, |
|
"grad_norm": 0.24941638112068176, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2715, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.793205317577548, |
|
"grad_norm": 0.28167465329170227, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2886, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.794682422451994, |
|
"grad_norm": 0.27295514941215515, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2838, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.7961595273264401, |
|
"grad_norm": 0.28401198983192444, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3027, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.7976366322008862, |
|
"grad_norm": 0.36002475023269653, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2743, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.7991137370753324, |
|
"grad_norm": 0.24884235858917236, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2236, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.8005908419497785, |
|
"grad_norm": 0.29792970418930054, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2685, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.8020679468242246, |
|
"grad_norm": 0.293630450963974, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3121, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.8035450516986706, |
|
"grad_norm": 0.30826666951179504, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2886, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.8050221565731167, |
|
"grad_norm": 0.2855941355228424, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2947, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.8064992614475628, |
|
"grad_norm": 0.2649870812892914, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2655, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.8079763663220089, |
|
"grad_norm": 0.27176880836486816, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2715, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.8094534711964549, |
|
"grad_norm": 0.3225911557674408, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3404, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.810930576070901, |
|
"grad_norm": 0.30113476514816284, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3347, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.8124076809453471, |
|
"grad_norm": 0.2784980535507202, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2599, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.8138847858197932, |
|
"grad_norm": 0.2825387716293335, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2759, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.8153618906942393, |
|
"grad_norm": 0.26612088084220886, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2464, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.8168389955686853, |
|
"grad_norm": 0.2672181725502014, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2182, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.8183161004431314, |
|
"grad_norm": 0.28279784321784973, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2612, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.8197932053175776, |
|
"grad_norm": 0.277281790971756, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2647, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.8212703101920237, |
|
"grad_norm": 0.2784774899482727, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2738, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.8227474150664698, |
|
"grad_norm": 0.2438610941171646, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2274, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.8242245199409158, |
|
"grad_norm": 0.28168389201164246, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2604, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.8257016248153619, |
|
"grad_norm": 0.26112061738967896, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2215, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.827178729689808, |
|
"grad_norm": 0.25962984561920166, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2834, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.8286558345642541, |
|
"grad_norm": 0.27150726318359375, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2877, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.8301329394387001, |
|
"grad_norm": 0.2753923237323761, |
|
"learning_rate": 0.0002, |
|
"loss": 0.298, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.8316100443131462, |
|
"grad_norm": 0.37228959798812866, |
|
"learning_rate": 0.0002, |
|
"loss": 0.333, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.8330871491875923, |
|
"grad_norm": 0.27188584208488464, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2578, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.8345642540620384, |
|
"grad_norm": 0.2894970178604126, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3051, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.8360413589364845, |
|
"grad_norm": 0.2769443690776825, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2833, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.8375184638109305, |
|
"grad_norm": 0.25693845748901367, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2571, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.8389955686853766, |
|
"grad_norm": 0.27856937050819397, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2942, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.8404726735598228, |
|
"grad_norm": 0.2575175166130066, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2733, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.8419497784342689, |
|
"grad_norm": 0.27574828267097473, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2642, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.843426883308715, |
|
"grad_norm": 0.2522878646850586, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2377, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.844903988183161, |
|
"grad_norm": 0.26878973841667175, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2635, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.8463810930576071, |
|
"grad_norm": 0.25874340534210205, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2622, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.8478581979320532, |
|
"grad_norm": 0.2808675765991211, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2679, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.8493353028064993, |
|
"grad_norm": 0.3035877048969269, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3097, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.8508124076809453, |
|
"grad_norm": 0.2748059928417206, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2986, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.8522895125553914, |
|
"grad_norm": 0.2966136932373047, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2799, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.8537666174298375, |
|
"grad_norm": 1.3606016635894775, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2808, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.8552437223042836, |
|
"grad_norm": 0.2695050835609436, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2417, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.8567208271787297, |
|
"grad_norm": 0.26403385400772095, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2474, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.8581979320531757, |
|
"grad_norm": 0.2719348669052124, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2558, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.8596750369276218, |
|
"grad_norm": 0.2620692253112793, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2704, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.8611521418020679, |
|
"grad_norm": 0.3160097897052765, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2967, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.8626292466765141, |
|
"grad_norm": 0.27527111768722534, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2508, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.8641063515509602, |
|
"grad_norm": 0.27846094965934753, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2766, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.8655834564254062, |
|
"grad_norm": 0.2789734899997711, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2857, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.8670605612998523, |
|
"grad_norm": 0.30942806601524353, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3216, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.8685376661742984, |
|
"grad_norm": 0.27556589245796204, |
|
"learning_rate": 0.0002, |
|
"loss": 0.257, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.8700147710487445, |
|
"grad_norm": 0.2209852784872055, |
|
"learning_rate": 0.0002, |
|
"loss": 0.1862, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.8714918759231906, |
|
"grad_norm": 0.29638856649398804, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3049, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.8729689807976366, |
|
"grad_norm": 0.309600830078125, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2675, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.8744460856720827, |
|
"grad_norm": 0.28644561767578125, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2859, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.8759231905465288, |
|
"grad_norm": 0.25827983021736145, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2543, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.8774002954209749, |
|
"grad_norm": 0.2538520395755768, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2523, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.8788774002954209, |
|
"grad_norm": 0.26979878544807434, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2379, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.880354505169867, |
|
"grad_norm": 0.2815455496311188, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2793, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.8818316100443131, |
|
"grad_norm": 0.2549828588962555, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2743, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.8833087149187593, |
|
"grad_norm": 0.24497728049755096, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2234, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.8847858197932054, |
|
"grad_norm": 0.2854422628879547, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2858, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.8862629246676514, |
|
"grad_norm": 0.2807024121284485, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2608, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8877400295420975, |
|
"grad_norm": 0.2663458585739136, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2399, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.8892171344165436, |
|
"grad_norm": 0.2760714888572693, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2934, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.8906942392909897, |
|
"grad_norm": 0.3003925383090973, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2765, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.8921713441654358, |
|
"grad_norm": 0.32742151618003845, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3116, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.8936484490398818, |
|
"grad_norm": 0.29396241903305054, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2548, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.8951255539143279, |
|
"grad_norm": 0.28835952281951904, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2665, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.896602658788774, |
|
"grad_norm": 0.2689400017261505, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2554, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.8980797636632201, |
|
"grad_norm": 0.27350932359695435, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2474, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.8995568685376661, |
|
"grad_norm": 0.26769059896469116, |
|
"learning_rate": 0.0002, |
|
"loss": 0.286, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.9010339734121122, |
|
"grad_norm": 0.25921839475631714, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2438, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.9025110782865583, |
|
"grad_norm": 0.26628950238227844, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2713, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.9039881831610044, |
|
"grad_norm": 0.26283326745033264, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2696, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.9054652880354506, |
|
"grad_norm": 0.29980388283729553, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2581, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.9069423929098966, |
|
"grad_norm": 0.2768777310848236, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2853, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.9084194977843427, |
|
"grad_norm": 0.27376455068588257, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2836, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.9098966026587888, |
|
"grad_norm": 0.28933191299438477, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3071, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.9113737075332349, |
|
"grad_norm": 0.3081536293029785, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2461, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.912850812407681, |
|
"grad_norm": 0.2886345386505127, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.914327917282127, |
|
"grad_norm": 0.2829267680644989, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2746, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.9158050221565731, |
|
"grad_norm": 0.2512478232383728, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2435, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.9172821270310192, |
|
"grad_norm": 0.4229198694229126, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2847, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.9187592319054653, |
|
"grad_norm": 0.2993115186691284, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2869, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.9202363367799113, |
|
"grad_norm": 0.2935909330844879, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2691, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.9217134416543574, |
|
"grad_norm": 0.3156206011772156, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2936, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.9231905465288035, |
|
"grad_norm": 0.2829430401325226, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2782, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.9246676514032496, |
|
"grad_norm": 0.2769679129123688, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2751, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.9261447562776958, |
|
"grad_norm": 0.2695547044277191, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2768, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.9276218611521418, |
|
"grad_norm": 0.2564750909805298, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2691, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.9290989660265879, |
|
"grad_norm": 0.3216243386268616, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2895, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.930576070901034, |
|
"grad_norm": 0.26920050382614136, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2943, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.9320531757754801, |
|
"grad_norm": 0.23926717042922974, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2114, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.9335302806499262, |
|
"grad_norm": 0.3015134036540985, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2889, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.9350073855243722, |
|
"grad_norm": 0.29262953996658325, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2977, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.9364844903988183, |
|
"grad_norm": 0.27330338954925537, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2706, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.9379615952732644, |
|
"grad_norm": 0.2691650092601776, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2471, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.9394387001477105, |
|
"grad_norm": 0.30574268102645874, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2977, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.9409158050221565, |
|
"grad_norm": 0.2739352881908417, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2825, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.9423929098966026, |
|
"grad_norm": 0.3041648268699646, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3344, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.9438700147710487, |
|
"grad_norm": 0.2827674150466919, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2618, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.9453471196454948, |
|
"grad_norm": 0.25939705967903137, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2279, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.946824224519941, |
|
"grad_norm": 0.3013932406902313, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3114, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.948301329394387, |
|
"grad_norm": 4.885525703430176, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2818, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.9497784342688331, |
|
"grad_norm": 0.25590044260025024, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2427, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.9512555391432792, |
|
"grad_norm": 0.2372172772884369, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2397, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.9527326440177253, |
|
"grad_norm": 0.26376283168792725, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2624, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.9542097488921714, |
|
"grad_norm": 0.27342459559440613, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2652, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.9556868537666174, |
|
"grad_norm": 0.260745644569397, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2523, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.9571639586410635, |
|
"grad_norm": 0.24873754382133484, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2238, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.9586410635155096, |
|
"grad_norm": 0.281990110874176, |
|
"learning_rate": 0.0002, |
|
"loss": 0.235, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.9601181683899557, |
|
"grad_norm": 0.25676026940345764, |
|
"learning_rate": 0.0002, |
|
"loss": 0.215, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.9615952732644018, |
|
"grad_norm": 0.2927687466144562, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2764, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.9630723781388478, |
|
"grad_norm": 0.26322099566459656, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2511, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.9645494830132939, |
|
"grad_norm": 0.2764233350753784, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2439, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.96602658788774, |
|
"grad_norm": 0.29849788546562195, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2689, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.9675036927621861, |
|
"grad_norm": 0.2834247648715973, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3398, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.9689807976366323, |
|
"grad_norm": 0.28436174988746643, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3086, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.9704579025110783, |
|
"grad_norm": 0.24340803921222687, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2288, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.9719350073855244, |
|
"grad_norm": 0.2577742338180542, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2598, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.9734121122599705, |
|
"grad_norm": 0.28326281905174255, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2862, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.9748892171344166, |
|
"grad_norm": 0.27066269516944885, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2585, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.9763663220088626, |
|
"grad_norm": 0.26694634556770325, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2788, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.9778434268833087, |
|
"grad_norm": 0.2890130281448364, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3073, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.9793205317577548, |
|
"grad_norm": 0.26095882058143616, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2184, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.9807976366322009, |
|
"grad_norm": 0.2648635506629944, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2465, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.982274741506647, |
|
"grad_norm": 0.2354656457901001, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2457, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.983751846381093, |
|
"grad_norm": 0.2767215669155121, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2735, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.9852289512555391, |
|
"grad_norm": 0.27141231298446655, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2589, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.9867060561299852, |
|
"grad_norm": 0.254549115896225, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2785, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.9881831610044313, |
|
"grad_norm": 0.2712014317512512, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2579, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.9896602658788775, |
|
"grad_norm": 0.26712852716445923, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3165, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.9911373707533235, |
|
"grad_norm": 0.2829815447330475, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2438, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.9926144756277696, |
|
"grad_norm": 0.27326712012290955, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2622, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.9940915805022157, |
|
"grad_norm": 0.2569233179092407, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2353, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.9955686853766618, |
|
"grad_norm": 0.28441140055656433, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2732, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.9970457902511078, |
|
"grad_norm": 0.2831505835056305, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2605, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.9985228951255539, |
|
"grad_norm": 0.26520466804504395, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2614, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.7976667881011963, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3423, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 677, |
|
"total_flos": 1.1323313955746611e+17, |
|
"train_loss": 0.3172708253432588, |
|
"train_runtime": 2971.8662, |
|
"train_samples_per_second": 1.82, |
|
"train_steps_per_second": 0.228 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 677, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1323313955746611e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|