Training in progress, step 198, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +340 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4fb3a1c034c512f3602bb7eb9746a25ce70d3c1c2463811bd29bb2e039a6ac96
 size 47724600

 version https://git-lfs.github.com/spec/v1
+oid sha256:78caa2ecb707469c3b9436373b3cfd3d25d6860d81dfdd1fcff1dd3975613d82
 size 47724600

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a4ee21da39425c292a1b2d46b9c4ec0a440be08be6cebb00065c8e33bd4773e
 size 25331516

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1cd66b51dd9ce1e23fb67cd242e2eb29e6b5e6b2e592166f989f3c2e9e97255
 size 25331516

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3bbf00cc7d26b5ba1f1bfe59564ee6b340d81d2d6e92ca2595dc7bce3ba71015
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:63afe6b53664eea8898016edb2f2324259424202f671640bd9eb38d5de37a2a7
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae4f1bd750c09fc9bb727cae976f56e1bbe0dff5c4d4e1a6eec209a810ae59b2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:588b963689e2bc6a644ef6e066b36a07667462b36247fb966e7188944b9c91f2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7575757575757576,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1081,6 +1081,342 @@
       "eval_samples_per_second": 6.856,
       "eval_steps_per_second": 3.428,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1095,12 +1431,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.13169452613632e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 50,
+  "global_step": 198,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 6.856,
       "eval_steps_per_second": 3.428,
       "step": 150
+    },
+    {
+      "epoch": 0.7626262626262627,
+      "grad_norm": 9.82388687133789,
+      "learning_rate": 2.9289321881345254e-05,
+      "loss": 0.302,
+      "step": 151
+    },
+    {
+      "epoch": 0.7676767676767676,
+      "grad_norm": 12.946221351623535,
+      "learning_rate": 2.8117631612207084e-05,
+      "loss": 0.4361,
+      "step": 152
+    },
+    {
+      "epoch": 0.7727272727272727,
+      "grad_norm": 9.764445304870605,
+      "learning_rate": 2.6966013605133088e-05,
+      "loss": 0.3317,
+      "step": 153
+    },
+    {
+      "epoch": 0.7777777777777778,
+      "grad_norm": 14.5607271194458,
+      "learning_rate": 2.5834789435204243e-05,
+      "loss": 0.3403,
+      "step": 154
+    },
+    {
+      "epoch": 0.7828282828282829,
+      "grad_norm": 11.619050979614258,
+      "learning_rate": 2.4724274982774865e-05,
+      "loss": 0.4053,
+      "step": 155
+    },
+    {
+      "epoch": 0.7878787878787878,
+      "grad_norm": 14.43159008026123,
+      "learning_rate": 2.3634780345266806e-05,
+      "loss": 0.3843,
+      "step": 156
+    },
+    {
+      "epoch": 0.7929292929292929,
+      "grad_norm": 15.145977020263672,
+      "learning_rate": 2.2566609750578673e-05,
+      "loss": 0.4364,
+      "step": 157
+    },
+    {
+      "epoch": 0.797979797979798,
+      "grad_norm": 11.833556175231934,
+      "learning_rate": 2.1520061472133902e-05,
+      "loss": 0.3154,
+      "step": 158
+    },
+    {
+      "epoch": 0.803030303030303,
+      "grad_norm": 12.835911750793457,
+      "learning_rate": 2.04954277455917e-05,
+      "loss": 0.3429,
+      "step": 159
+    },
+    {
+      "epoch": 0.8080808080808081,
+      "grad_norm": 11.492507934570312,
+      "learning_rate": 1.9492994687243714e-05,
+      "loss": 0.3757,
+      "step": 160
+    },
+    {
+      "epoch": 0.8131313131313131,
+      "grad_norm": 13.120503425598145,
+      "learning_rate": 1.851304221411967e-05,
+      "loss": 0.4127,
+      "step": 161
+    },
+    {
+      "epoch": 0.8181818181818182,
+      "grad_norm": 20.865995407104492,
+      "learning_rate": 1.7555843965823992e-05,
+      "loss": 0.4,
+      "step": 162
+    },
+    {
+      "epoch": 0.8232323232323232,
+      "grad_norm": 11.817997932434082,
+      "learning_rate": 1.6621667228125302e-05,
+      "loss": 0.4647,
+      "step": 163
+    },
+    {
+      "epoch": 0.8282828282828283,
+      "grad_norm": 10.320054054260254,
+      "learning_rate": 1.57107728583203e-05,
+      "loss": 0.3413,
+      "step": 164
+    },
+    {
+      "epoch": 0.8333333333333334,
+      "grad_norm": 11.233556747436523,
+      "learning_rate": 1.4823415212392377e-05,
+      "loss": 0.2859,
+      "step": 165
+    },
+    {
+      "epoch": 0.8383838383838383,
+      "grad_norm": 10.825108528137207,
+      "learning_rate": 1.3959842073986085e-05,
+      "loss": 0.2771,
+      "step": 166
+    },
+    {
+      "epoch": 0.8434343434343434,
+      "grad_norm": 11.109395027160645,
+      "learning_rate": 1.3120294585216353e-05,
+      "loss": 0.3611,
+      "step": 167
+    },
+    {
+      "epoch": 0.8484848484848485,
+      "grad_norm": 10.571539878845215,
+      "learning_rate": 1.230500717933285e-05,
+      "loss": 0.3888,
+      "step": 168
+    },
+    {
+      "epoch": 0.8535353535353535,
+      "grad_norm": 11.521129608154297,
+      "learning_rate": 1.1514207515257147e-05,
+      "loss": 0.3247,
+      "step": 169
+    },
+    {
+      "epoch": 0.8585858585858586,
+      "grad_norm": 11.530449867248535,
+      "learning_rate": 1.0748116414011888e-05,
+      "loss": 0.2623,
+      "step": 170
+    },
+    {
+      "epoch": 0.8636363636363636,
+      "grad_norm": 10.529777526855469,
+      "learning_rate": 1.0006947797059219e-05,
+      "loss": 0.3313,
+      "step": 171
+    },
+    {
+      "epoch": 0.8686868686868687,
+      "grad_norm": 9.755709648132324,
+      "learning_rate": 9.29090862656593e-06,
+      "loss": 0.3945,
+      "step": 172
+    },
+    {
+      "epoch": 0.8737373737373737,
+      "grad_norm": 10.26276969909668,
+      "learning_rate": 8.600198847611729e-06,
+      "loss": 0.2629,
+      "step": 173
+    },
+    {
+      "epoch": 0.8787878787878788,
+      "grad_norm": 18.38811492919922,
+      "learning_rate": 7.935011332357112e-06,
+      "loss": 0.408,
+      "step": 174
+    },
+    {
+      "epoch": 0.8838383838383839,
+      "grad_norm": 11.992220878601074,
+      "learning_rate": 7.295531826186264e-06,
+      "loss": 0.4615,
+      "step": 175
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 14.000274658203125,
+      "learning_rate": 6.681938895839746e-06,
+      "loss": 0.3583,
+      "step": 176
+    },
+    {
+      "epoch": 0.8939393939393939,
+      "grad_norm": 11.123028755187988,
+      "learning_rate": 6.094403879552213e-06,
+      "loss": 0.2686,
+      "step": 177
+    },
+    {
+      "epoch": 0.898989898989899,
+      "grad_norm": 11.463796615600586,
+      "learning_rate": 5.533090839208133e-06,
+      "loss": 0.3694,
+      "step": 178
+    },
+    {
+      "epoch": 0.9040404040404041,
+      "grad_norm": 14.426941871643066,
+      "learning_rate": 4.998156514529595e-06,
+      "loss": 0.4453,
+      "step": 179
+    },
+    {
+      "epoch": 0.9090909090909091,
+      "grad_norm": 11.056812286376953,
+      "learning_rate": 4.489750279308757e-06,
+      "loss": 0.3627,
+      "step": 180
+    },
+    {
+      "epoch": 0.9141414141414141,
+      "grad_norm": 15.307928085327148,
+      "learning_rate": 4.008014099696922e-06,
+      "loss": 0.5187,
+      "step": 181
+    },
+    {
+      "epoch": 0.9191919191919192,
+      "grad_norm": 16.49620246887207,
+      "learning_rate": 3.5530824945623542e-06,
+      "loss": 0.4363,
+      "step": 182
+    },
+    {
+      "epoch": 0.9242424242424242,
+      "grad_norm": 14.4350004196167,
+      "learning_rate": 3.1250824979274675e-06,
+      "loss": 0.422,
+      "step": 183
+    },
+    {
+      "epoch": 0.9292929292929293,
+      "grad_norm": 8.324783325195312,
+      "learning_rate": 2.7241336234962944e-06,
+      "loss": 0.3985,
+      "step": 184
+    },
+    {
+      "epoch": 0.9343434343434344,
+      "grad_norm": 11.766496658325195,
+      "learning_rate": 2.3503478312815298e-06,
+      "loss": 0.3377,
+      "step": 185
+    },
+    {
+      "epoch": 0.9393939393939394,
+      "grad_norm": 15.193984031677246,
+      "learning_rate": 2.003829496341325e-06,
+      "loss": 0.3802,
+      "step": 186
+    },
+    {
+      "epoch": 0.9444444444444444,
+      "grad_norm": 20.59272003173828,
+      "learning_rate": 1.684675379633649e-06,
+      "loss": 0.4727,
+      "step": 187
+    },
+    {
+      "epoch": 0.9494949494949495,
+      "grad_norm": 10.777263641357422,
+      "learning_rate": 1.3929746009971433e-06,
+      "loss": 0.3044,
+      "step": 188
+    },
+    {
+      "epoch": 0.9545454545454546,
+      "grad_norm": 20.804996490478516,
+      "learning_rate": 1.1288086142653864e-06,
+      "loss": 0.3692,
+      "step": 189
+    },
+    {
+      "epoch": 0.9595959595959596,
+      "grad_norm": 19.473365783691406,
+      "learning_rate": 8.922511845219971e-07,
+      "loss": 0.321,
+      "step": 190
+    },
+    {
+      "epoch": 0.9646464646464646,
+      "grad_norm": 10.078756332397461,
+      "learning_rate": 6.833683675025904e-07,
+      "loss": 0.3689,
+      "step": 191
+    },
+    {
+      "epoch": 0.9696969696969697,
+      "grad_norm": 10.66713809967041,
+      "learning_rate": 5.022184911495864e-07,
+      "loss": 0.2831,
+      "step": 192
+    },
+    {
+      "epoch": 0.9747474747474747,
+      "grad_norm": 13.519158363342285,
+      "learning_rate": 3.488521393248401e-07,
+      "loss": 0.3693,
+      "step": 193
+    },
+    {
+      "epoch": 0.9797979797979798,
+      "grad_norm": 18.351518630981445,
+      "learning_rate": 2.2331213768468363e-07,
+      "loss": 0.5692,
+      "step": 194
+    },
+    {
+      "epoch": 0.9848484848484849,
+      "grad_norm": 20.144264221191406,
+      "learning_rate": 1.2563354172142606e-07,
+      "loss": 0.4896,
+      "step": 195
+    },
+    {
+      "epoch": 0.98989898989899,
+      "grad_norm": 21.47068977355957,
+      "learning_rate": 5.584362697453882e-08,
+      "loss": 0.4884,
+      "step": 196
+    },
+    {
+      "epoch": 0.9949494949494949,
+      "grad_norm": 14.894364356994629,
+      "learning_rate": 1.3961881414292778e-08,
+      "loss": 0.4101,
+      "step": 197
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 20.294527053833008,
+      "learning_rate": 0.0,
+      "loss": 0.5037,
+      "step": 198
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.812060362394829e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null