End of training

Browse files

Files changed (8) hide show

all_results.json +8 -0
preprocessor_config.json +28 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +58 -0
train_results.json +8 -0
trainer_state.json +743 -0
vocab.txt +0 -0

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 99.48249027237354,
+    "total_flos": 1.0064407234989773e+18,
+    "train_loss": 2.0492228651046753,
+    "train_runtime": 20851.1432,
+    "train_samples_per_second": 184.834,
+    "train_steps_per_second": 0.921
+}

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "crop_size": {
+    "height": 224,
+    "width": 224
+  },
+  "do_center_crop": false,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "ChineseCLIPImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 99.48249027237354,
+    "total_flos": 1.0064407234989773e+18,
+    "train_loss": 2.0492228651046753,
+    "train_runtime": 20851.1432,
+    "train_samples_per_second": 184.834,
+    "train_steps_per_second": 0.921
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,743 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 99.48249027237354,
+  "eval_steps": 500,
+  "global_step": 19200,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.9961089494163424,
+      "grad_norm": 0.9873842597007751,
+      "learning_rate": 9.901562500000001e-06,
+      "loss": 2.5953,
+      "step": 192
+    },
+    {
+      "epoch": 1.9909208819714657,
+      "grad_norm": 1.439842700958252,
+      "learning_rate": 9.8015625e-06,
+      "loss": 2.462,
+      "step": 384
+    },
+    {
+      "epoch": 2.985732814526589,
+      "grad_norm": 2.307729482650757,
+      "learning_rate": 9.701562500000001e-06,
+      "loss": 2.3659,
+      "step": 576
+    },
+    {
+      "epoch": 3.980544747081712,
+      "grad_norm": 4.006823539733887,
+      "learning_rate": 9.6015625e-06,
+      "loss": 2.3475,
+      "step": 768
+    },
+    {
+      "epoch": 4.975356679636835,
+      "grad_norm": 2.971815824508667,
+      "learning_rate": 9.501562500000001e-06,
+      "loss": 2.3162,
+      "step": 960
+    },
+    {
+      "epoch": 5.970168612191959,
+      "grad_norm": 3.6012327671051025,
+      "learning_rate": 9.4015625e-06,
+      "loss": 2.3049,
+      "step": 1152
+    },
+    {
+      "epoch": 6.964980544747082,
+      "grad_norm": 3.803527355194092,
+      "learning_rate": 9.301562500000002e-06,
+      "loss": 2.2733,
+      "step": 1344
+    },
+    {
+      "epoch": 7.959792477302205,
+      "grad_norm": 3.0761234760284424,
+      "learning_rate": 9.201562500000001e-06,
+      "loss": 2.2576,
+      "step": 1536
+    },
+    {
+      "epoch": 8.954604409857328,
+      "grad_norm": 2.6708500385284424,
+      "learning_rate": 9.1015625e-06,
+      "loss": 2.2367,
+      "step": 1728
+    },
+    {
+      "epoch": 9.949416342412452,
+      "grad_norm": 3.482462167739868,
+      "learning_rate": 9.001562500000001e-06,
+      "loss": 2.176,
+      "step": 1920
+    },
+    {
+      "epoch": 10.944228274967575,
+      "grad_norm": 2.826169967651367,
+      "learning_rate": 8.902083333333334e-06,
+      "loss": 2.2051,
+      "step": 2112
+    },
+    {
+      "epoch": 11.939040207522698,
+      "grad_norm": 3.164243698120117,
+      "learning_rate": 8.802083333333335e-06,
+      "loss": 2.1874,
+      "step": 2304
+    },
+    {
+      "epoch": 12.933852140077821,
+      "grad_norm": 4.725254058837891,
+      "learning_rate": 8.702083333333334e-06,
+      "loss": 2.1809,
+      "step": 2496
+    },
+    {
+      "epoch": 13.928664072632944,
+      "grad_norm": 3.7806782722473145,
+      "learning_rate": 8.602083333333333e-06,
+      "loss": 2.16,
+      "step": 2688
+    },
+    {
+      "epoch": 14.923476005188068,
+      "grad_norm": 3.735746145248413,
+      "learning_rate": 8.502083333333334e-06,
+      "loss": 2.1663,
+      "step": 2880
+    },
+    {
+      "epoch": 15.918287937743191,
+      "grad_norm": 3.401533365249634,
+      "learning_rate": 8.402083333333334e-06,
+      "loss": 2.1353,
+      "step": 3072
+    },
+    {
+      "epoch": 16.913099870298314,
+      "grad_norm": 3.1076550483703613,
+      "learning_rate": 8.302083333333335e-06,
+      "loss": 2.1277,
+      "step": 3264
+    },
+    {
+      "epoch": 17.907911802853437,
+      "grad_norm": 3.2938179969787598,
+      "learning_rate": 8.202083333333334e-06,
+      "loss": 2.1165,
+      "step": 3456
+    },
+    {
+      "epoch": 18.90272373540856,
+      "grad_norm": 3.4992620944976807,
+      "learning_rate": 8.102083333333333e-06,
+      "loss": 2.1407,
+      "step": 3648
+    },
+    {
+      "epoch": 19.897535667963684,
+      "grad_norm": 5.144476413726807,
+      "learning_rate": 8.002083333333334e-06,
+      "loss": 2.1124,
+      "step": 3840
+    },
+    {
+      "epoch": 20.892347600518807,
+      "grad_norm": 3.369130849838257,
+      "learning_rate": 7.902083333333333e-06,
+      "loss": 2.0994,
+      "step": 4032
+    },
+    {
+      "epoch": 21.88715953307393,
+      "grad_norm": 2.5219101905822754,
+      "learning_rate": 7.802604166666668e-06,
+      "loss": 2.086,
+      "step": 4224
+    },
+    {
+      "epoch": 22.881971465629054,
+      "grad_norm": 2.416583776473999,
+      "learning_rate": 7.702604166666667e-06,
+      "loss": 2.1011,
+      "step": 4416
+    },
+    {
+      "epoch": 23.876783398184177,
+      "grad_norm": 2.880645513534546,
+      "learning_rate": 7.602604166666667e-06,
+      "loss": 2.069,
+      "step": 4608
+    },
+    {
+      "epoch": 24.8715953307393,
+      "grad_norm": 3.6908700466156006,
+      "learning_rate": 7.5026041666666675e-06,
+      "loss": 2.0878,
+      "step": 4800
+    },
+    {
+      "epoch": 25.866407263294423,
+      "grad_norm": 3.9834940433502197,
+      "learning_rate": 7.402604166666667e-06,
+      "loss": 2.0832,
+      "step": 4992
+    },
+    {
+      "epoch": 26.861219195849547,
+      "grad_norm": 4.434403896331787,
+      "learning_rate": 7.302604166666667e-06,
+      "loss": 2.0618,
+      "step": 5184
+    },
+    {
+      "epoch": 27.85603112840467,
+      "grad_norm": 2.6179683208465576,
+      "learning_rate": 7.202604166666667e-06,
+      "loss": 2.0698,
+      "step": 5376
+    },
+    {
+      "epoch": 28.850843060959793,
+      "grad_norm": 3.7101306915283203,
+      "learning_rate": 7.102604166666667e-06,
+      "loss": 2.0561,
+      "step": 5568
+    },
+    {
+      "epoch": 29.845654993514916,
+      "grad_norm": 2.940573215484619,
+      "learning_rate": 7.002604166666668e-06,
+      "loss": 2.0499,
+      "step": 5760
+    },
+    {
+      "epoch": 30.84046692607004,
+      "grad_norm": 3.1798958778381348,
+      "learning_rate": 6.902604166666667e-06,
+      "loss": 2.049,
+      "step": 5952
+    },
+    {
+      "epoch": 31.835278858625163,
+      "grad_norm": 3.288163423538208,
+      "learning_rate": 6.802604166666667e-06,
+      "loss": 2.0326,
+      "step": 6144
+    },
+    {
+      "epoch": 32.83009079118028,
+      "grad_norm": 2.8764078617095947,
+      "learning_rate": 6.703125e-06,
+      "loss": 2.0371,
+      "step": 6336
+    },
+    {
+      "epoch": 33.824902723735406,
+      "grad_norm": 2.9869441986083984,
+      "learning_rate": 6.603125e-06,
+      "loss": 2.0611,
+      "step": 6528
+    },
+    {
+      "epoch": 34.81971465629053,
+      "grad_norm": 3.1720468997955322,
+      "learning_rate": 6.5031250000000005e-06,
+      "loss": 2.0541,
+      "step": 6720
+    },
+    {
+      "epoch": 35.81452658884565,
+      "grad_norm": 3.7449281215667725,
+      "learning_rate": 6.403125e-06,
+      "loss": 2.0177,
+      "step": 6912
+    },
+    {
+      "epoch": 36.809338521400775,
+      "grad_norm": 2.036007881164551,
+      "learning_rate": 6.303125000000001e-06,
+      "loss": 2.006,
+      "step": 7104
+    },
+    {
+      "epoch": 37.8041504539559,
+      "grad_norm": 2.677340507507324,
+      "learning_rate": 6.203125000000001e-06,
+      "loss": 2.0329,
+      "step": 7296
+    },
+    {
+      "epoch": 38.79896238651102,
+      "grad_norm": 2.8975298404693604,
+      "learning_rate": 6.103125000000001e-06,
+      "loss": 2.0347,
+      "step": 7488
+    },
+    {
+      "epoch": 39.793774319066145,
+      "grad_norm": 1.8516769409179688,
+      "learning_rate": 6.003125000000001e-06,
+      "loss": 2.0179,
+      "step": 7680
+    },
+    {
+      "epoch": 40.78858625162127,
+      "grad_norm": 2.984468698501587,
+      "learning_rate": 5.903125e-06,
+      "loss": 2.0002,
+      "step": 7872
+    },
+    {
+      "epoch": 41.78339818417639,
+      "grad_norm": 4.378921031951904,
+      "learning_rate": 5.8031250000000004e-06,
+      "loss": 2.0032,
+      "step": 8064
+    },
+    {
+      "epoch": 42.778210116731515,
+      "grad_norm": 3.600853204727173,
+      "learning_rate": 5.7031250000000006e-06,
+      "loss": 2.0131,
+      "step": 8256
+    },
+    {
+      "epoch": 43.77302204928664,
+      "grad_norm": 3.475604295730591,
+      "learning_rate": 5.603125000000001e-06,
+      "loss": 2.0167,
+      "step": 8448
+    },
+    {
+      "epoch": 44.76783398184176,
+      "grad_norm": 3.7388381958007812,
+      "learning_rate": 5.503125e-06,
+      "loss": 2.0286,
+      "step": 8640
+    },
+    {
+      "epoch": 45.762645914396884,
+      "grad_norm": 3.0160775184631348,
+      "learning_rate": 5.403125e-06,
+      "loss": 2.0327,
+      "step": 8832
+    },
+    {
+      "epoch": 46.75745784695201,
+      "grad_norm": 2.4288928508758545,
+      "learning_rate": 5.303645833333334e-06,
+      "loss": 1.9837,
+      "step": 9024
+    },
+    {
+      "epoch": 47.75226977950713,
+      "grad_norm": 2.6072142124176025,
+      "learning_rate": 5.203645833333334e-06,
+      "loss": 2.022,
+      "step": 9216
+    },
+    {
+      "epoch": 48.747081712062254,
+      "grad_norm": 3.0784308910369873,
+      "learning_rate": 5.103645833333334e-06,
+      "loss": 1.9946,
+      "step": 9408
+    },
+    {
+      "epoch": 49.74189364461738,
+      "grad_norm": 2.552196979522705,
+      "learning_rate": 5.003645833333334e-06,
+      "loss": 1.9857,
+      "step": 9600
+    },
+    {
+      "epoch": 50.7367055771725,
+      "grad_norm": 5.163851261138916,
+      "learning_rate": 4.903645833333333e-06,
+      "loss": 2.0038,
+      "step": 9792
+    },
+    {
+      "epoch": 51.731517509727624,
+      "grad_norm": 3.9303905963897705,
+      "learning_rate": 4.8036458333333335e-06,
+      "loss": 2.0327,
+      "step": 9984
+    },
+    {
+      "epoch": 52.72632944228275,
+      "grad_norm": 4.269293308258057,
+      "learning_rate": 4.703645833333334e-06,
+      "loss": 1.978,
+      "step": 10176
+    },
+    {
+      "epoch": 53.72114137483787,
+      "grad_norm": 3.4755337238311768,
+      "learning_rate": 4.603645833333334e-06,
+      "loss": 2.0154,
+      "step": 10368
+    },
+    {
+      "epoch": 54.715953307392994,
+      "grad_norm": 2.2201080322265625,
+      "learning_rate": 4.503645833333334e-06,
+      "loss": 1.9945,
+      "step": 10560
+    },
+    {
+      "epoch": 55.71076523994812,
+      "grad_norm": 3.3768227100372314,
+      "learning_rate": 4.403645833333334e-06,
+      "loss": 1.9764,
+      "step": 10752
+    },
+    {
+      "epoch": 56.70557717250324,
+      "grad_norm": 2.9863104820251465,
+      "learning_rate": 4.303645833333334e-06,
+      "loss": 2.0031,
+      "step": 10944
+    },
+    {
+      "epoch": 57.70038910505836,
+      "grad_norm": 3.351330280303955,
+      "learning_rate": 4.203645833333333e-06,
+      "loss": 1.9915,
+      "step": 11136
+    },
+    {
+      "epoch": 58.69520103761349,
+      "grad_norm": 2.745793104171753,
+      "learning_rate": 4.103645833333333e-06,
+      "loss": 2.0054,
+      "step": 11328
+    },
+    {
+      "epoch": 59.69001297016861,
+      "grad_norm": 4.333703517913818,
+      "learning_rate": 4.0036458333333335e-06,
+      "loss": 1.9629,
+      "step": 11520
+    },
+    {
+      "epoch": 60.68482490272373,
+      "grad_norm": 3.1686012744903564,
+      "learning_rate": 3.903645833333334e-06,
+      "loss": 2.0011,
+      "step": 11712
+    },
+    {
+      "epoch": 61.679636835278856,
+      "grad_norm": 2.1179568767547607,
+      "learning_rate": 3.804166666666667e-06,
+      "loss": 1.9949,
+      "step": 11904
+    },
+    {
+      "epoch": 62.67444876783398,
+      "grad_norm": 2.4998276233673096,
+      "learning_rate": 3.704166666666667e-06,
+      "loss": 1.9791,
+      "step": 12096
+    },
+    {
+      "epoch": 63.6692607003891,
+      "grad_norm": 3.710357189178467,
+      "learning_rate": 3.6041666666666667e-06,
+      "loss": 1.9757,
+      "step": 12288
+    },
+    {
+      "epoch": 64.66407263294423,
+      "grad_norm": 3.4601991176605225,
+      "learning_rate": 3.504166666666667e-06,
+      "loss": 2.025,
+      "step": 12480
+    },
+    {
+      "epoch": 65.65888456549935,
+      "grad_norm": 2.626007318496704,
+      "learning_rate": 3.4041666666666665e-06,
+      "loss": 1.9755,
+      "step": 12672
+    },
+    {
+      "epoch": 66.65369649805447,
+      "grad_norm": 3.1454813480377197,
+      "learning_rate": 3.304166666666667e-06,
+      "loss": 1.9984,
+      "step": 12864
+    },
+    {
+      "epoch": 67.6485084306096,
+      "grad_norm": 7.283568859100342,
+      "learning_rate": 3.204166666666667e-06,
+      "loss": 1.9776,
+      "step": 13056
+    },
+    {
+      "epoch": 68.64332036316472,
+      "grad_norm": 2.031538724899292,
+      "learning_rate": 3.104166666666667e-06,
+      "loss": 1.9939,
+      "step": 13248
+    },
+    {
+      "epoch": 69.63813229571984,
+      "grad_norm": 2.370424270629883,
+      "learning_rate": 3.004166666666667e-06,
+      "loss": 1.9805,
+      "step": 13440
+    },
+    {
+      "epoch": 70.63294422827497,
+      "grad_norm": 3.6357874870300293,
+      "learning_rate": 2.9041666666666667e-06,
+      "loss": 1.9998,
+      "step": 13632
+    },
+    {
+      "epoch": 71.62775616083009,
+      "grad_norm": 3.61807918548584,
+      "learning_rate": 2.8041666666666668e-06,
+      "loss": 1.9615,
+      "step": 13824
+    },
+    {
+      "epoch": 72.62256809338521,
+      "grad_norm": 2.440490484237671,
+      "learning_rate": 2.7041666666666673e-06,
+      "loss": 1.9715,
+      "step": 14016
+    },
+    {
+      "epoch": 73.61738002594034,
+      "grad_norm": 2.490007162094116,
+      "learning_rate": 2.6046875000000006e-06,
+      "loss": 1.9765,
+      "step": 14208
+    },
+    {
+      "epoch": 74.61219195849546,
+      "grad_norm": 2.368825674057007,
+      "learning_rate": 2.5046875000000003e-06,
+      "loss": 1.9819,
+      "step": 14400
+    },
+    {
+      "epoch": 75.60700389105058,
+      "grad_norm": 2.9740967750549316,
+      "learning_rate": 2.4046875000000004e-06,
+      "loss": 1.9702,
+      "step": 14592
+    },
+    {
+      "epoch": 76.6018158236057,
+      "grad_norm": 3.215397834777832,
+      "learning_rate": 2.3046875e-06,
+      "loss": 1.9913,
+      "step": 14784
+    },
+    {
+      "epoch": 77.59662775616083,
+      "grad_norm": 2.465824604034424,
+      "learning_rate": 2.2046875000000002e-06,
+      "loss": 1.9751,
+      "step": 14976
+    },
+    {
+      "epoch": 78.59143968871595,
+      "grad_norm": 2.2337405681610107,
+      "learning_rate": 2.1046875000000003e-06,
+      "loss": 1.9496,
+      "step": 15168
+    },
+    {
+      "epoch": 79.58625162127107,
+      "grad_norm": 2.875598907470703,
+      "learning_rate": 2.0046875e-06,
+      "loss": 2.0066,
+      "step": 15360
+    },
+    {
+      "epoch": 80.5810635538262,
+      "grad_norm": 2.8990750312805176,
+      "learning_rate": 1.9046875000000001e-06,
+      "loss": 1.9466,
+      "step": 15552
+    },
+    {
+      "epoch": 81.57587548638132,
+      "grad_norm": 4.734499454498291,
+      "learning_rate": 1.8046875000000002e-06,
+      "loss": 1.9636,
+      "step": 15744
+    },
+    {
+      "epoch": 82.57068741893644,
+      "grad_norm": 2.9932515621185303,
+      "learning_rate": 1.7046875000000001e-06,
+      "loss": 1.973,
+      "step": 15936
+    },
+    {
+      "epoch": 83.56549935149157,
+      "grad_norm": 4.040909767150879,
+      "learning_rate": 1.6046875e-06,
+      "loss": 1.9678,
+      "step": 16128
+    },
+    {
+      "epoch": 84.56031128404669,
+      "grad_norm": 3.0035159587860107,
+      "learning_rate": 1.5046875000000002e-06,
+      "loss": 1.9457,
+      "step": 16320
+    },
+    {
+      "epoch": 85.55512321660181,
+      "grad_norm": 2.5469093322753906,
+      "learning_rate": 1.4046875e-06,
+      "loss": 1.9642,
+      "step": 16512
+    },
+    {
+      "epoch": 86.54993514915694,
+      "grad_norm": 2.850858688354492,
+      "learning_rate": 1.3046875e-06,
+      "loss": 1.9638,
+      "step": 16704
+    },
+    {
+      "epoch": 87.54474708171206,
+      "grad_norm": 2.5728836059570312,
+      "learning_rate": 1.2046875e-06,
+      "loss": 1.9681,
+      "step": 16896
+    },
+    {
+      "epoch": 88.53955901426718,
+      "grad_norm": 1.4205690622329712,
+      "learning_rate": 1.1046875000000002e-06,
+      "loss": 1.9585,
+      "step": 17088
+    },
+    {
+      "epoch": 89.5343709468223,
+      "grad_norm": 1.9591172933578491,
+      "learning_rate": 1.0046875e-06,
+      "loss": 1.9894,
+      "step": 17280
+    },
+    {
+      "epoch": 90.52918287937743,
+      "grad_norm": 3.981717348098755,
+      "learning_rate": 9.046875000000001e-07,
+      "loss": 1.9388,
+      "step": 17472
+    },
+    {
+      "epoch": 91.52399481193255,
+      "grad_norm": 2.348127841949463,
+      "learning_rate": 8.046875000000001e-07,
+      "loss": 1.9677,
+      "step": 17664
+    },
+    {
+      "epoch": 92.51880674448768,
+      "grad_norm": 3.0600669384002686,
+      "learning_rate": 7.046875e-07,
+      "loss": 1.9747,
+      "step": 17856
+    },
+    {
+      "epoch": 93.5136186770428,
+      "grad_norm": 3.865741491317749,
+      "learning_rate": 6.046875000000001e-07,
+      "loss": 1.958,
+      "step": 18048
+    },
+    {
+      "epoch": 94.50843060959792,
+      "grad_norm": 2.755720853805542,
+      "learning_rate": 5.052083333333334e-07,
+      "loss": 1.975,
+      "step": 18240
+    },
+    {
+      "epoch": 95.50324254215305,
+      "grad_norm": 3.4946985244750977,
+      "learning_rate": 4.0520833333333335e-07,
+      "loss": 1.9426,
+      "step": 18432
+    },
+    {
+      "epoch": 96.49805447470817,
+      "grad_norm": 2.993678331375122,
+      "learning_rate": 3.0520833333333336e-07,
+      "loss": 1.9497,
+      "step": 18624
+    },
+    {
+      "epoch": 97.49286640726329,
+      "grad_norm": 3.959327459335327,
+      "learning_rate": 2.0520833333333334e-07,
+      "loss": 1.9409,
+      "step": 18816
+    },
+    {
+      "epoch": 98.48767833981842,
+      "grad_norm": 2.147462844848633,
+      "learning_rate": 1.0520833333333334e-07,
+      "loss": 1.9485,
+      "step": 19008
+    },
+    {
+      "epoch": 99.48249027237354,
+      "grad_norm": 2.728996992111206,
+      "learning_rate": 5.208333333333334e-09,
+      "loss": 1.981,
+      "step": 19200
+    },
+    {
+      "epoch": 99.48249027237354,
+      "step": 19200,
+      "total_flos": 1.0064407234989773e+18,
+      "train_loss": 2.0492228651046753,
+      "train_runtime": 20851.1432,
+      "train_samples_per_second": 184.834,
+      "train_steps_per_second": 0.921
+    }
+  ],
+  "logging_steps": 192,
+  "max_steps": 19200,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 100,
+  "save_steps": 6800,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.0064407234989773e+18,
+  "train_batch_size": 50,
+  "trial_name": null,
+  "trial_params": null
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff