Thalesian commited on
Commit
de81a01
·
verified ·
1 Parent(s): b9485cb

End of training

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -14,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  This model was trained from scratch on the None dataset.
16
  It achieves the following results on the evaluation set:
17
- - Loss: 0.1641
18
 
19
  ## Model description
20
 
@@ -33,45 +33,40 @@ More information needed
33
  ### Training hyperparameters
34
 
35
  The following hyperparameters were used during training:
36
- - learning_rate: 0.0006047816549758072
37
- - train_batch_size: 8
38
- - eval_batch_size: 8
39
  - seed: 42
40
- - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.964129172421366,0.8471340191802936) and epsilon=1.51279024695782e-08 and optimizer_args=No additional optimizer arguments
41
  - lr_scheduler_type: linear
42
- - lr_scheduler_warmup_steps: 2593
43
- - num_epochs: 500
44
 
45
  ### Training results
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:------:|:---------------:|
49
- | 0.2789 | 1.0 | 6351 | 0.1682 |
50
- | 0.2849 | 2.0 | 12702 | 0.1695 |
51
- | 0.2948 | 3.0 | 19053 | 0.1728 |
52
- | 0.2806 | 4.0 | 25404 | 0.1641 |
53
- | 0.2941 | 5.0 | 31755 | 0.1645 |
54
- | 0.2926 | 6.0 | 38106 | 0.1533 |
55
- | 0.2997 | 7.0 | 44457 | 0.1724 |
56
- | 0.2867 | 8.0 | 50808 | 0.1663 |
57
- | 0.2739 | 9.0 | 57159 | 0.1562 |
58
- | 0.2884 | 10.0 | 63510 | 0.1708 |
59
- | 0.306 | 11.0 | 69861 | 0.1569 |
60
- | 0.2888 | 12.0 | 76212 | 0.1854 |
61
- | 0.2933 | 13.0 | 82563 | 0.1756 |
62
- | 0.2953 | 14.0 | 88914 | 0.1599 |
63
- | 0.2987 | 15.0 | 95265 | 0.1701 |
64
- | 0.3047 | 16.0 | 101616 | 0.2214 |
65
- | 0.3126 | 17.0 | 107967 | 0.1564 |
66
- | 0.3066 | 18.0 | 114318 | 0.2439 |
67
- | 0.2861 | 19.0 | 120669 | 0.1590 |
68
- | 0.3045 | 20.0 | 127020 | 0.3101 |
69
- | 0.3045 | 21.0 | 133371 | 0.1641 |
70
 
71
 
72
  ### Framework versions
73
 
74
- - Transformers 4.49.0
75
- - Pytorch 2.6.0.dev20241217
76
- - Datasets 2.20.0
77
  - Tokenizers 0.21.0
 
14
 
15
  This model was trained from scratch on the None dataset.
16
  It achieves the following results on the evaluation set:
17
+ - Loss: 0.1053
18
 
19
  ## Model description
20
 
 
33
  ### Training hyperparameters
34
 
35
  The following hyperparameters were used during training:
36
+ - learning_rate: 0.000302456543043843
37
+ - train_batch_size: 96
38
+ - eval_batch_size: 96
39
  - seed: 42
40
+ - optimizer: Use OptimizerNames.ADAMW_APEX_FUSED with betas=(0.826646043090655,0.991636944120939) and epsilon=3.4341677539323e-07 and optimizer_args=No additional optimizer arguments
41
  - lr_scheduler_type: linear
42
+ - lr_scheduler_warmup_steps: 1537
43
+ - num_epochs: 200
44
 
45
  ### Training results
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:------:|:---------------:|
49
+ | 0.088 | 1.0 | 43681 | 0.1022 |
50
+ | 0.0839 | 2.0 | 87362 | 0.1026 |
51
+ | 0.0835 | 3.0 | 131043 | 0.1034 |
52
+ | 0.0832 | 4.0 | 174724 | 0.1029 |
53
+ | 0.0864 | 5.0 | 218405 | 0.1039 |
54
+ | 0.0873 | 6.0 | 262086 | 0.1042 |
55
+ | 0.0851 | 7.0 | 305767 | 0.1036 |
56
+ | 0.0851 | 8.0 | 349448 | 0.1043 |
57
+ | 0.0834 | 9.0 | 393129 | 0.1045 |
58
+ | 0.0818 | 10.0 | 436810 | 0.1046 |
59
+ | 0.0816 | 11.0 | 480491 | 0.1056 |
60
+ | 0.0809 | 12.0 | 524172 | 0.1050 |
61
+ | 0.0783 | 13.0 | 567853 | 0.1042 |
62
+ | 0.0789 | 14.0 | 611534 | 0.1053 |
63
+ | 0.0776 | 15.0 | 655215 | 0.1041 |
64
+ | 0.0756 | 16.0 | 698896 | 0.1053 |
 
 
 
 
 
65
 
66
 
67
  ### Framework versions
68
 
69
+ - Transformers 4.49.0.dev0
70
+ - Pytorch 2.6.0+cu126
71
+ - Datasets 3.3.0
72
  - Tokenizers 0.21.0
added_tokens.json CHANGED
@@ -1,247 +1,657 @@
1
  {
2
- "±": 32314,
3
- "": 32189,
4
- "": 32266,
5
- "": 32231,
6
- "": 32179,
7
- "": 32218,
8
- "": 32200,
9
- "𐀀": 32299,
10
- "𐀁": 32169,
11
- "𐀂": 32110,
12
- "𐀃": 32301,
13
- "𐀄": 32261,
14
- "𐀅": 32257,
15
- "𐀆": 32102,
16
- "𐀇": 32341,
17
- "𐀈": 32136,
18
- "𐀉": 32187,
19
- "𐀊": 32286,
20
- "𐀋": 32127,
21
- "𐀍": 32108,
22
- "𐀎": 32315,
23
- "𐀏": 32162,
24
- "𐀐": 32154,
25
- "𐀑": 32306,
26
- "𐀒": 32152,
27
- "𐀓": 32156,
28
- "𐀔": 32157,
29
- "𐀕": 32239,
30
- "𐀖": 32295,
31
- "𐀗": 32210,
32
- "𐀘": 32226,
33
- "𐀙": 32340,
34
- "𐀚": 32288,
35
- "𐀛": 32304,
36
- "𐀜": 32188,
37
- "𐀝": 32272,
38
- "𐀞": 32171,
39
- "𐀟": 32268,
40
- "𐀠": 32124,
41
- "𐀡": 32175,
42
- "𐀢": 32219,
43
- "𐀣": 32293,
44
- "𐀤": 32300,
45
- "𐀥": 32287,
46
- "𐀦": 32172,
47
- "𐀨": 32106,
48
- "𐀩": 32214,
49
- "𐀪": 32115,
50
- "𐀫": 32225,
51
- "𐀬": 32309,
52
- "𐀭": 32159,
53
- "𐀮": 32290,
54
- "𐀯": 32237,
55
- "𐀰": 32313,
56
- "𐀱": 32194,
57
- "𐀲": 32281,
58
- "𐀳": 32107,
59
- "𐀴": 32265,
60
- "𐀵": 32325,
61
- "𐀶": 32126,
62
- "𐀷": 32164,
63
- "𐀸": 32130,
64
- "𐀹": 32198,
65
- "𐀺": 32318,
66
- "𐀼": 32263,
67
- "𐀽": 32321,
68
- "𐀿": 32271,
69
- "𐁀": 32216,
70
- "𐁁": 32160,
71
- "𐁂": 32144,
72
- "𐁃": 32135,
73
- "𐁄": 32331,
74
- "𐁅": 32167,
75
- "𐁆": 32208,
76
- "𐁇": 32204,
77
- "𐁈": 32282,
78
- "𐁉": 32118,
79
- "𐁊": 32213,
80
- "𐁋": 32183,
81
- "𐁌": 32311,
82
- "𐁍": 32227,
83
- "𐁐": 32298,
84
- "𐁑": 32344,
85
- "𐁒": 32134,
86
- "𐁓": 32149,
87
- "𐁔": 32228,
88
- "𐁕": 32264,
89
- "𐁖": 32180,
90
- "𐁗": 32165,
91
- "𐁘": 32101,
92
- "𐁙": 32147,
93
- "𐁚": 32173,
94
- "𐁛": 32117,
95
- "𐁜": 32100,
96
- "𐁝": 32270,
97
- "𐂀": 32267,
98
- "𐂁": 32201,
99
- "𐂂": 32291,
100
- "𐂃": 32128,
101
- "𐂄": 32285,
102
- "𐂅": 32137,
103
- "𐂆": 32235,
104
- "𐂇": 32255,
105
- "𐂈": 32199,
106
- "𐂉": 32195,
107
- "𐂊": 32191,
108
- "𐂋": 32249,
109
- "𐂌": 32177,
110
- "𐂍": 32123,
111
- "𐂎": 32302,
112
- "𐂏": 32248,
113
- "𐂐": 32192,
114
- "𐂑": 32310,
115
- "𐂒": 32109,
116
- "𐂔": 32337,
117
- "𐂕": 32276,
118
- "𐂖": 32143,
119
- "𐂗": 32166,
120
- "𐂚": 32184,
121
- "𐂛": 32338,
122
- "𐂜": 32292,
123
- "𐂝": 32215,
124
- "𐂞": 32247,
125
- "𐂟": 32240,
126
- "𐂡": 32111,
127
- "𐂢": 32168,
128
- "𐂣": 32233,
129
- "𐂥": 32125,
130
- "𐂦": 32206,
131
- "𐂧": 32141,
132
- "𐂨": 32297,
133
- "𐂩": 32241,
134
- "𐂪": 32185,
135
- "𐂫": 32343,
136
- "𐂬": 32251,
137
- "𐂭": 32284,
138
- "𐂮": 32202,
139
- "𐂯": 32140,
140
- "𐂰": 32246,
141
- "𐂱": 32336,
142
- "𐂲": 32148,
143
- "𐂳": 32256,
144
- "𐂴": 32317,
145
- "𐂵": 32104,
146
- "𐂶": 32190,
147
- "𐂷": 32329,
148
- "𐂸": 32289,
149
- "𐂹": 32323,
150
- "𐂺": 32320,
151
- "𐂻": 32280,
152
- "𐂼": 32324,
153
- "𐂽": 32150,
154
- "𐂾": 32230,
155
- "𐂿": 32296,
156
- "𐃀": 32232,
157
- "𐃁": 32129,
158
- "𐃂": 32155,
159
- "𐃄": 32163,
160
- "𐃆": 32220,
161
- "𐃇": 32133,
162
- "𐃈": 32333,
163
- "𐃉": 32203,
164
- "𐃊": 32121,
165
- "𐃌": 32120,
166
- "𐃍": 32131,
167
- "𐃎": 32196,
168
- "𐃏": 32158,
169
- "𐃐": 32113,
170
- "𐃑": 32316,
171
- "𐃓": 32258,
172
- "𐃔": 32274,
173
- "𐃕": 32217,
174
- "𐃗": 32322,
175
- "𐃙": 32223,
176
- "𐃚": 32260,
177
- "𐃜": 32342,
178
- "𐃝": 32283,
179
- "𐃞": 32305,
180
- "𐃟": 32119,
181
- "𐃠": 32193,
182
- "𐃡": 32103,
183
- "𐃢": 32253,
184
- "𐃣": 32234,
185
- "𐃤": 32312,
186
- "𐃥": 32153,
187
- "𐃦": 32146,
188
- "𐃧": 32114,
189
- "𐃨": 32209,
190
- "𐃩": 32139,
191
- "𐃪": 32335,
192
- "𐃫": 32151,
193
- "𐃬": 32332,
194
- "𐃭": 32244,
195
- "𐃮": 32279,
196
- "𐃯": 32116,
197
- "𐃰": 32197,
198
- "𐃱": 32205,
199
- "𐃲": 32207,
200
- "𐃳": 32330,
201
- "𐃴": 32181,
202
- "𐃵": 32211,
203
- "𐃶": 32236,
204
- "𐃷": 32254,
205
- "𐃸": 32308,
206
- "𐃹": 32339,
207
- "𐄈": 32334,
208
- "𐄉": 32328,
209
- "𐄊": 32262,
210
- "𐄋": 32138,
211
- "𐄌": 32178,
212
- "𐄍": 32319,
213
- "𐄎": 32269,
214
- "𐄏": 32122,
215
- "𐄐": 32170,
216
- "𐄑": 32221,
217
- "𐄒": 32275,
218
- "𐄓": 32245,
219
- "𐄔": 32174,
220
- "𐄕": 32224,
221
- "𐄖": 32182,
222
- "𐄗": 32142,
223
- "𐄘": 32250,
224
- "𐄙": 32212,
225
- "𐄚": 32294,
226
- "𐄛": 32277,
227
- "𐄜": 32242,
228
- "𐄝": 32186,
229
- "𐄞": 32132,
230
- "𐄟": 32105,
231
- "𐄡": 32112,
232
- "𐄢": 32243,
233
- "𐄣": 32273,
234
- "𐄤": 32238,
235
- "𐄥": 32145,
236
- "𐄧": 32259,
237
- "𐄪": 32327,
238
- "𐄫": 32222,
239
- "𐄷": 32229,
240
- "𐄸": 32326,
241
- "𐄹": 32176,
242
- "𐄺": 32303,
243
- "𐄼": 32307,
244
- "𐄽": 32161,
245
- "𐄾": 32252,
246
- "𐄿": 32278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  }
 
1
  {
2
+ "": 256930,
3
+ "": 256909,
4
+ "": 256848,
5
+ "": 256796,
6
+ "ₚᵤ": 256858,
7
+ "": 256789,
8
+ "": 256350,
9
+ "": 256874,
10
+ "𐀀": 256539,
11
+ "𐀁": 256675,
12
+ "𐀂": 256450,
13
+ "𐀃": 256391,
14
+ "𐀄": 256729,
15
+ "𐀅": 256558,
16
+ "𐀆": 256615,
17
+ "𐀇": 256368,
18
+ "𐀈": 256328,
19
+ "𐀉": 256707,
20
+ "𐀊": 256319,
21
+ "𐀋": 256308,
22
+ "𐀍": 256483,
23
+ "𐀎": 256300,
24
+ "𐀏": 256559,
25
+ "𐀐": 256590,
26
+ "𐀑": 256663,
27
+ "𐀒": 256766,
28
+ "𐀓": 256650,
29
+ "𐀔": 256649,
30
+ "𐀕": 256347,
31
+ "𐀖": 256546,
32
+ "𐀗": 256568,
33
+ "𐀘": 256537,
34
+ "𐀙": 256700,
35
+ "𐀚": 256426,
36
+ "𐀛": 256562,
37
+ "𐀜": 256639,
38
+ "𐀝": 256339,
39
+ "𐀞": 256613,
40
+ "𐀟": 256747,
41
+ "𐀠": 256473,
42
+ "𐀡": 256722,
43
+ "𐀢": 256378,
44
+ "𐀣": 256376,
45
+ "𐀤": 256447,
46
+ "𐀥": 256321,
47
+ "𐀦": 256518,
48
+ "𐀨": 256553,
49
+ "𐀩": 256467,
50
+ "𐀪": 256727,
51
+ "𐀫": 256494,
52
+ "𐀬": 256488,
53
+ "𐀭": 256362,
54
+ "𐀮": 256317,
55
+ "𐀯": 256561,
56
+ "𐀰": 256651,
57
+ "𐀱": 256674,
58
+ "𐀲": 256433,
59
+ "𐀳": 256413,
60
+ "𐀴": 256664,
61
+ "𐀵": 256603,
62
+ "𐀶": 256646,
63
+ "𐀷": 256733,
64
+ "𐀸": 256452,
65
+ "𐀹": 256327,
66
+ "𐀺": 256463,
67
+ "𐀼": 256698,
68
+ "𐀽": 256541,
69
+ "𐀿": 256571,
70
+ "𐁀": 256498,
71
+ "𐁁": 256524,
72
+ "𐁂": 256658,
73
+ "𐁃": 256774,
74
+ "𐁄": 256626,
75
+ "𐁅": 256353,
76
+ "𐁆": 256695,
77
+ "𐁇": 256358,
78
+ "𐁈": 256806,
79
+ "𐁉": 256313,
80
+ "𐁊": 256906,
81
+ "𐁋": 256623,
82
+ "𐁌": 256846,
83
+ "𐁍": 256808,
84
+ "𐁐": 256889,
85
+ "𐁑": 256859,
86
+ "𐁒": 256601,
87
+ "𐁓": 256898,
88
+ "𐁔": 256325,
89
+ "𐁕": 256611,
90
+ "𐁖": 256501,
91
+ "𐁗": 256373,
92
+ "𐁘": 256903,
93
+ "𐁙": 256882,
94
+ "𐁚": 256802,
95
+ "𐁛": 256517,
96
+ "𐁜": 256856,
97
+ "𐁝": 256944,
98
+ "𐂀": 256515,
99
+ "𐂁": 256768,
100
+ "𐂂": 256954,
101
+ "𐂃": 256605,
102
+ "𐂄": 256932,
103
+ "𐂅": 256865,
104
+ "𐂆": 256617,
105
+ "𐂇": 256451,
106
+ "𐂈": 256734,
107
+ "𐂉": 256567,
108
+ "𐂊": 256385,
109
+ "𐂋": 256714,
110
+ "𐂌": 256320,
111
+ "𐂍": 256717,
112
+ "𐂎": 256759,
113
+ "𐂏": 256530,
114
+ "𐂐": 256637,
115
+ "𐂑": 256824,
116
+ "𐂒": 256469,
117
+ "𐂔": 256904,
118
+ "𐂕": 256514,
119
+ "𐂖": 256311,
120
+ "𐂗": 256829,
121
+ "𐂚": 256481,
122
+ "𐂛": 256554,
123
+ "𐂜": 256943,
124
+ "𐂝": 256345,
125
+ "𐂞": 256335,
126
+ "𐂟": 256324,
127
+ "𐂡": 256681,
128
+ "𐂢": 256468,
129
+ "𐂣": 256917,
130
+ "𐂥": 256888,
131
+ "𐂦": 256828,
132
+ "𐂧": 256625,
133
+ "𐂨": 256869,
134
+ "𐂩": 256431,
135
+ "𐂪": 256737,
136
+ "𐂫": 256721,
137
+ "𐂬": 256935,
138
+ "𐂭": 256946,
139
+ "𐂮": 256547,
140
+ "𐂯": 256402,
141
+ "𐂰": 256813,
142
+ "𐂱": 256879,
143
+ "𐂲": 256941,
144
+ "𐂳": 256525,
145
+ "𐂴": 256817,
146
+ "𐂵": 256927,
147
+ "𐂶": 256486,
148
+ "𐂷": 256805,
149
+ "𐂸": 256916,
150
+ "𐂹": 256792,
151
+ "𐂺": 256803,
152
+ "𐂻": 256513,
153
+ "𐂼": 256833,
154
+ "𐂽": 256579,
155
+ "𐂾": 256863,
156
+ "𐂿": 256837,
157
+ "𐃀": 256585,
158
+ "𐃁": 256877,
159
+ "𐃂": 256499,
160
+ "𐃄": 256875,
161
+ "𐃆": 256809,
162
+ "𐃇": 256691,
163
+ "𐃈": 256834,
164
+ "𐃉": 256312,
165
+ "𐃊": 256914,
166
+ "𐃌": 256655,
167
+ "𐃍": 256772,
168
+ "𐃎": 256566,
169
+ "𐃏": 256616,
170
+ "𐃐": 256823,
171
+ "𐃑": 256839,
172
+ "𐃓": 256782,
173
+ "𐃔": 256901,
174
+ "𐃕": 256854,
175
+ "𐃗": 256822,
176
+ "𐃙": 256778,
177
+ "𐃚": 256948,
178
+ "𐃜": 256532,
179
+ "𐃝": 256850,
180
+ "𐃞": 256836,
181
+ "𐃟": 256939,
182
+ "𐃠": 256924,
183
+ "𐃡": 256332,
184
+ "𐃢": 256849,
185
+ "𐃣": 256952,
186
+ "𐃤": 256853,
187
+ "𐃥": 256940,
188
+ "𐃦": 256891,
189
+ "𐃧": 256892,
190
+ "𐃨": 256540,
191
+ "𐃩": 256845,
192
+ "𐃪": 256804,
193
+ "𐃫": 256379,
194
+ "𐃬": 256495,
195
+ "𐃭": 256897,
196
+ "𐃮": 256919,
197
+ "𐃯": 256826,
198
+ "𐃰": 256884,
199
+ "𐃱": 256840,
200
+ "𐃲": 256851,
201
+ "𐃳": 256838,
202
+ "𐃴": 256812,
203
+ "𐃵": 256475,
204
+ "𐃶": 256825,
205
+ "𐃷": 256797,
206
+ "𐃸": 256788,
207
+ "𐃹": 256937,
208
+ "𐄈": 256632,
209
+ "𐄉": 256643,
210
+ "𐄊": 256574,
211
+ "𐄋": 256529,
212
+ "𐄌": 256610,
213
+ "𐄍": 256423,
214
+ "𐄎": 256527,
215
+ "𐄏": 256893,
216
+ "𐄐": 256708,
217
+ "𐄑": 256633,
218
+ "𐄒": 256418,
219
+ "𐄓": 256536,
220
+ "𐄔": 256671,
221
+ "𐄕": 256736,
222
+ "𐄖": 256408,
223
+ "𐄗": 256531,
224
+ "𐄘": 256723,
225
+ "𐄙": 256668,
226
+ "𐄚": 256764,
227
+ "𐄛": 256535,
228
+ "𐄜": 256847,
229
+ "𐄝": 256773,
230
+ "𐄞": 256887,
231
+ "𐄟": 256894,
232
+ "𐄡": 256820,
233
+ "𐄢": 256881,
234
+ "𐄣": 256918,
235
+ "𐄤": 256810,
236
+ "𐄥": 256791,
237
+ "𐄧": 256880,
238
+ "𐄪": 256933,
239
+ "𐄫": 256871,
240
+ "𐄷": 256572,
241
+ "𐄸": 256784,
242
+ "𐄹": 256629,
243
+ "𐄺": 256591,
244
+ "𐄼": 256394,
245
+ "𐄽": 256403,
246
+ "𐄾": 256592,
247
+ "𐄿": 256648,
248
+ "𒀀": 256344,
249
+ "𒀁": 256857,
250
+ "𒀉": 256624,
251
+ "𒀊": 256718,
252
+ "𒀏": 256711,
253
+ "𒀕": 256565,
254
+ "𒀖": 256644,
255
+ "𒀘": 256915,
256
+ "𒀚": 256383,
257
+ "𒀜": 256627,
258
+ "𒀝": 256584,
259
+ "𒀞": 256354,
260
+ "𒀠": 256409,
261
+ "𒀩": 256598,
262
+ "𒀪": 256594,
263
+ "𒀫": 256621,
264
+ "𒀬": 256564,
265
+ "𒀭": 256706,
266
+ "𒀮": 256505,
267
+ "𒀯": 256622,
268
+ "𒀲": 256683,
269
+ "𒀳": 256769,
270
+ "𒀴": 256334,
271
+ "𒀸": 256521,
272
+ "𒀹": 256652,
273
+ "𒀾": 256744,
274
+ "𒀿": 256793,
275
+ "𒁀": 256669,
276
+ "𒁁": 256462,
277
+ "𒁃": 256816,
278
+ "𒁄": 256306,
279
+ "𒁆": 256938,
280
+ "𒁇": 256765,
281
+ "𒁈": 256503,
282
+ "𒁉": 256359,
283
+ "𒁍": 256318,
284
+ "𒁑": 256577,
285
+ "𒁓": 256720,
286
+ "𒁔": 256316,
287
+ "𒁕": 256393,
288
+ "𒁖": 256405,
289
+ "𒁜": 256925,
290
+ "𒁦": 256842,
291
+ "𒁮": 256500,
292
+ "𒁯": 256307,
293
+ "𒁰": 256831,
294
+ "𒁲": 256682,
295
+ "𒁳": 256735,
296
+ "𒁴": 256457,
297
+ "𒁵": 256667,
298
+ "𒁶": 256608,
299
+ "𒁷": 256497,
300
+ "𒁹": 256420,
301
+ "𒁺": 256491,
302
+ "𒁻": 256864,
303
+ "𒁼": 256370,
304
+ "𒁽": 256866,
305
+ "𒁾": 256538,
306
+ "𒂀": 256818,
307
+ "𒂁": 256435,
308
+ "��": 256685,
309
+ "𒂃": 256861,
310
+ "𒂄": 256703,
311
+ "𒂅": 256340,
312
+ "𒂆": 256815,
313
+ "𒂇": 256512,
314
+ "𒂈": 256689,
315
+ "𒂉": 256767,
316
+ "𒂊": 256464,
317
+ "𒂍": 256740,
318
+ "𒂔": 256743,
319
+ "𒂕": 256606,
320
+ "𒂖": 256528,
321
+ "𒂗": 256330,
322
+ "𒂙": 256758,
323
+ "𒂞": 256949,
324
+ "𒂟": 256631,
325
+ "𒂠": 256660,
326
+ "𒂡": 256323,
327
+ "𒂤": 256821,
328
+ "𒂦": 256346,
329
+ "𒂬": 256841,
330
+ "𒂮": 256883,
331
+ "𒂵": 256670,
332
+ "𒂷": 256749,
333
+ "𒂼": 256544,
334
+ "𒃌": 256427,
335
+ "𒃞": 256920,
336
+ "𒃡": 256910,
337
+ "𒃢": 256794,
338
+ "𒃣": 256460,
339
+ "𒃮": 256363,
340
+ "𒃰": 256441,
341
+ "𒃲": 256696,
342
+ "𒃴": 256786,
343
+ "𒃵": 256542,
344
+ "𒃶": 256757,
345
+ "𒃷": 256563,
346
+ "𒃸": 256890,
347
+ "𒃻": 256725,
348
+ "𒃼": 256753,
349
+ "𒃽": 256693,
350
+ "𒃾": 256471,
351
+ "𒄀": 256752,
352
+ "𒄃": 256369,
353
+ "𒄄": 256776,
354
+ "𒄆": 256912,
355
+ "𒄇": 256400,
356
+ "𒄈": 256548,
357
+ "𒄉": 256798,
358
+ "𒄊": 256396,
359
+ "𒄋": 256951,
360
+ "𒄑": 256395,
361
+ "𒄒": 256657,
362
+ "𒄖": 256502,
363
+ "𒄗": 256692,
364
+ "𒄘": 256641,
365
+ "𒄙": 256921,
366
+ "𒄛": 256945,
367
+ "𒄝": 256876,
368
+ "𒄞": 256364,
369
+ "𒄠": 256604,
370
+ "𒄢": 256421,
371
+ "𒄣": 256618,
372
+ "𒄤": 256355,
373
+ "𒄥": 256742,
374
+ "𒄦": 256781,
375
+ "𒄧": 256676,
376
+ "𒄨": 256465,
377
+ "𒄩": 256745,
378
+ "𒄫": 256443,
379
+ "𒄬": 256688,
380
+ "𒄭": 256386,
381
+ "𒄯": 256470,
382
+ "𒄰": 256701,
383
+ "𒄲": 256896,
384
+ "𒄴": 256406,
385
+ "𒄵": 256741,
386
+ "𒄷": 256309,
387
+ "𒄸": 256929,
388
+ "𒄽": 256456,
389
+ "𒄾": 256384,
390
+ "𒄿": 256647,
391
+ "𒅀": 256440,
392
+ "𒅁": 256533,
393
+ "𒅂": 256551,
394
+ "𒅅": 256557,
395
+ "𒅆": 256461,
396
+ "𒅇": 256348,
397
+ "𒅈": 256434,
398
+ "𒅊": 256811,
399
+ "𒅋": 256661,
400
+ "𒅍": 256550,
401
+ "𒅎": 256754,
402
+ "𒅓": 256832,
403
+ "𒅔": 256573,
404
+ "𒅕": 256438,
405
+ "𒅖": 256375,
406
+ "𒅗": 256388,
407
+ "𒅘": 256496,
408
+ "𒅜": 256522,
409
+ "𒅢": 256950,
410
+ "𒅤": 256444,
411
+ "𒅥": 256612,
412
+ "𒅮": 256795,
413
+ "𒅴": 256305,
414
+ "𒅸": 256761,
415
+ "𒅻": 256479,
416
+ "𒅾": 256868,
417
+ "𒆃": 256684,
418
+ "𒆍": 256381,
419
+ "𒆏": 256549,
420
+ "𒆐": 256783,
421
+ "𒆑": 256679,
422
+ "𒆒": 256390,
423
+ "𒆓": 256780,
424
+ "𒆕": 256401,
425
+ "𒆗": 256437,
426
+ "𒆘": 256862,
427
+ "𒆚": 256656,
428
+ "𒆛": 256787,
429
+ "𒆜": 256680,
430
+ "𒆟": 256635,
431
+ "𒆠": 256490,
432
+ "𒆢": 256704,
433
+ "𒆤": 256762,
434
+ "𒆥": 256686,
435
+ "𒆦": 256545,
436
+ "𒆧": 256472,
437
+ "𒆪": 256710,
438
+ "𒆬": 256389,
439
+ "𒆭": 256377,
440
+ "𒆯": 256878,
441
+ "𒆰": 256640,
442
+ "𒆲": 256724,
443
+ "𒆳": 256397,
444
+ "𒆵": 256928,
445
+ "𒆷": 256763,
446
+ "𒆸": 256600,
447
+ "𒆹": 256399,
448
+ "𒇀": 256886,
449
+ "𒇅": 256596,
450
+ "𒇆": 256934,
451
+ "𒇇": 256458,
452
+ "𒇉": 256867,
453
+ "𒇒": 256800,
454
+ "𒇡": 256620,
455
+ "𒇥": 256366,
456
+ "𒇧": 256430,
457
+ "𒇬": 256936,
458
+ "𒇭": 256953,
459
+ "𒇯": 256609,
460
+ "𒇲": 256570,
461
+ "𒇳": 256790,
462
+ "𒇴": 256902,
463
+ "𒇵": 256942,
464
+ "𒇷": 256331,
465
+ "𒇸": 256581,
466
+ "𒇹": 256398,
467
+ "𒇺": 256630,
468
+ "𒇻": 256489,
469
+ "𒇼": 256923,
470
+ "𒇽": 256329,
471
+ "𒇿": 256361,
472
+ "𒈕": 256429,
473
+ "𒈖": 256872,
474
+ "𒈗": 256713,
475
+ "𒈛": 256333,
476
+ "𒈜": 256415,
477
+ "𒈝": 256595,
478
+ "𒈠": 256382,
479
+ "𒈢": 256931,
480
+ "𒈣": 256751,
481
+ "𒈤": 256372,
482
+ "𒈥": 256365,
483
+ "𒈦": 256690,
484
+ "𒈧": 256422,
485
+ "𒈨": 256474,
486
+ "𒈩": 256322,
487
+ "𒈪": 256337,
488
+ "𒈫": 256569,
489
+ "𒈬": 256487,
490
+ "𒈭": 256449,
491
+ "𒈮": 256659,
492
+ "𒈯": 256520,
493
+ "𒈲": 256634,
494
+ "𒈹": 256801,
495
+ "𒈻": 256907,
496
+ "𒈽": 256374,
497
+ "𒈾": 256349,
498
+ "𒈿": 256905,
499
+ "𒉀": 256785,
500
+ "𒉄": 256666,
501
+ "𒉅": 256360,
502
+ "𒉆": 256492,
503
+ "𒉇": 256543,
504
+ "𒉈": 256576,
505
+ "𒉋": 256677,
506
+ "𒉌": 256478,
507
+ "𒉎": 256485,
508
+ "𒉏": 256417,
509
+ "𒉐": 256900,
510
+ "𒉑": 256799,
511
+ "𒉒": 256779,
512
+ "𒉓": 256899,
513
+ "𒉘": 256477,
514
+ "𒉚": 256315,
515
+ "𒉠": 256694,
516
+ "𒉡": 256476,
517
+ "𒉢": 256807,
518
+ "𒉣": 256303,
519
+ "𒉪": 256407,
520
+ "𒉭": 256687,
521
+ "𒉺": 256509,
522
+ "𒉻": 256453,
523
+ "𒉼": 256913,
524
+ "𒉽": 256705,
525
+ "𒉾": 256731,
526
+ "𒉿": 256371,
527
+ "𒊊": 256416,
528
+ "𒊌": 256338,
529
+ "𒊍": 256588,
530
+ "𒊏": 256506,
531
+ "𒊐": 256580,
532
+ "𒊑": 256654,
533
+ "𒊒": 256699,
534
+ "𒊓": 256597,
535
+ "𒊕": 256357,
536
+ "𒊚": 256852,
537
+ "𒊨": 256827,
538
+ "𒊩": 256614,
539
+ "𒊬": 256404,
540
+ "𒊭": 256716,
541
+ "𒊮": 256602,
542
+ "𒊯": 256777,
543
+ "𒊷": 256873,
544
+ "𒊹": 256302,
545
+ "𒊺": 256702,
546
+ "𒊻": 256511,
547
+ "𒊿": 256482,
548
+ "𒋀": 256665,
549
+ "𒋁": 256678,
550
+ "𒋃": 256746,
551
+ "𒋆": 256466,
552
+ "𒋇": 256814,
553
+ "𒋋": 256870,
554
+ "𒋓": 256830,
555
+ "𒋗": 256770,
556
+ "𒋙": 256310,
557
+ "𒋚": 256336,
558
+ "𒋛": 256523,
559
+ "𒋜": 256367,
560
+ "𒋝": 256586,
561
+ "𒋞": 256715,
562
+ "𒋠": 256304,
563
+ "𒋡": 256351,
564
+ "𒋢": 256445,
565
+ "𒋤": 256673,
566
+ "𒋥": 256314,
567
+ "𒋦": 256732,
568
+ "𒋧": 256739,
569
+ "𒋩": 256436,
570
+ "𒋫": 256645,
571
+ "𒋬": 256411,
572
+ "𒋭": 256432,
573
+ "𒋰": 256719,
574
+ "𒋳": 256504,
575
+ "𒋺": 256844,
576
+ "𒋻": 256672,
577
+ "𒋼": 256748,
578
+ "𒋽": 256947,
579
+ "𒋾": 256556,
580
+ "𒌀": 256326,
581
+ "𒌁": 256709,
582
+ "𒌅": 256638,
583
+ "𒌆": 256589,
584
+ "𒌇": 256587,
585
+ "𒌈": 256771,
586
+ "𒌉": 256410,
587
+ "𒌋": 256730,
588
+ "𒌌": 256534,
589
+ "𒌍": 256755,
590
+ "𒌑": 256442,
591
+ "𒌒": 256380,
592
+ "𒌓": 256628,
593
+ "𒌔": 256412,
594
+ "𒌗": 256455,
595
+ "𒌜": 256342,
596
+ "𒌝": 256392,
597
+ "𒌢": 256552,
598
+ "𒌣": 256343,
599
+ "𒌤": 256860,
600
+ "𒌦": 256459,
601
+ "𒌨": 256653,
602
+ "𒌫": 256756,
603
+ "𒌴": 256926,
604
+ "𒌵": 256480,
605
+ "𒌶": 256885,
606
+ "𒌷": 256582,
607
+ "𒌺": 256560,
608
+ "𒍀": 256835,
609
+ "𒍂": 256775,
610
+ "𒍇": 256636,
611
+ "𒍍": 256662,
612
+ "𒍎": 256855,
613
+ "𒍏": 256619,
614
+ "𒍑": 256387,
615
+ "𒍒": 256599,
616
+ "𒍗": 256419,
617
+ "𒍚": 256922,
618
+ "𒍜": 256448,
619
+ "𒍝": 256697,
620
+ "𒍞": 256895,
621
+ "𒍠": 256341,
622
+ "𒍢": 256424,
623
+ "𒍣": 256510,
624
+ "𒍤": 256908,
625
+ "𒍥": 256642,
626
+ "𒍦": 256484,
627
+ "𒍨": 256578,
628
+ "𒍩": 256843,
629
+ "𒍪": 256738,
630
+ "𒍬": 256911,
631
+ "𒍮": 256508,
632
+ "𒐈": 256352,
633
+ "𒐉": 256428,
634
+ "𒐊": 256760,
635
+ "𒐋": 256454,
636
+ "𒐌": 256414,
637
+ "𒐍": 256493,
638
+ "𒐏": 256446,
639
+ "𒐐": 256583,
640
+ "𒐕": 256519,
641
+ "𒐖": 256555,
642
+ "𒐗": 256507,
643
+ "𒐚": 256819,
644
+ "𒐞": 256575,
645
+ "𒐻": 256526,
646
+ "𒑆": 256607,
647
+ "𒑏": 256301,
648
+ "𒑐": 256750,
649
+ "𒑑": 256516,
650
+ "𒑒": 256439,
651
+ "𒑔": 256728,
652
+ "𒑖": 256726,
653
+ "𒑚": 256425,
654
+ "𒑛": 256356,
655
+ "𒑱": 256712,
656
+ "𒑳": 256593
657
  }
config.json CHANGED
@@ -1,61 +1,34 @@
1
  {
2
- "_name_or_path": "/Users/lee/GitHub/results/GMY-T5Small/train_2",
3
  "architectures": [
4
- "T5ForConditionalGeneration"
5
  ],
6
  "classifier_dropout": 0.0,
7
  "d_ff": 2048,
8
  "d_kv": 64,
9
- "d_model": 512,
10
  "decoder_start_token_id": 0,
11
- "dense_act_fn": "relu",
12
  "dropout_rate": 0.1,
13
  "eos_token_id": 1,
14
- "feed_forward_proj": "relu",
15
  "initializer_factor": 1.0,
16
  "is_encoder_decoder": true,
17
- "is_gated_act": false,
18
  "layer_norm_epsilon": 1e-06,
19
- "model_type": "t5",
20
- "n_positions": 512,
21
- "num_decoder_layers": 6,
22
- "num_heads": 8,
23
- "num_layers": 6,
24
  "output_past": true,
25
  "pad_token_id": 0,
26
  "relative_attention_max_distance": 128,
27
  "relative_attention_num_buckets": 32,
28
- "task_specific_params": {
29
- "summarization": {
30
- "early_stopping": true,
31
- "length_penalty": 2.0,
32
- "max_length": 200,
33
- "min_length": 30,
34
- "no_repeat_ngram_size": 3,
35
- "num_beams": 4,
36
- "prefix": "summarize: "
37
- },
38
- "translation_en_to_de": {
39
- "early_stopping": true,
40
- "max_length": 300,
41
- "num_beams": 4,
42
- "prefix": "translate English to German: "
43
- },
44
- "translation_en_to_fr": {
45
- "early_stopping": true,
46
- "max_length": 300,
47
- "num_beams": 4,
48
- "prefix": "translate English to French: "
49
- },
50
- "translation_en_to_ro": {
51
- "early_stopping": true,
52
- "max_length": 300,
53
- "num_beams": 4,
54
- "prefix": "translate English to Romanian: "
55
- }
56
- },
57
  "torch_dtype": "float32",
58
- "transformers_version": "4.49.0",
59
  "use_cache": true,
60
- "vocab_size": 32345
61
  }
 
1
  {
2
+ "_name_or_path": "/home/bly/GitHub/results/AKK-SUX-ELX_GMY_HIT_UMT5/train_2/checkpoint-174724",
3
  "architectures": [
4
+ "UMT5ForConditionalGeneration"
5
  ],
6
  "classifier_dropout": 0.0,
7
  "d_ff": 2048,
8
  "d_kv": 64,
9
+ "d_model": 768,
10
  "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
  "dropout_rate": 0.1,
13
  "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
  "initializer_factor": 1.0,
16
  "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
  "layer_norm_epsilon": 1e-06,
19
+ "model_type": "umt5",
20
+ "num_decoder_layers": 12,
21
+ "num_heads": 12,
22
+ "num_layers": 12,
 
23
  "output_past": true,
24
  "pad_token_id": 0,
25
  "relative_attention_max_distance": 128,
26
  "relative_attention_num_buckets": 32,
27
+ "scalable_attention": true,
28
+ "tie_word_embeddings": false,
29
+ "tokenizer_class": "T5Tokenizer",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  "torch_dtype": "float32",
31
+ "transformers_version": "4.49.0.dev0",
32
  "use_cache": true,
33
+ "vocab_size": 256955
34
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "decoder_start_token_id": 0,
3
  "eos_token_id": 1,
4
  "pad_token_id": 0,
5
- "transformers_version": "4.49.0"
6
  }
 
2
  "decoder_start_token_id": 0,
3
  "eos_token_id": 1,
4
  "pad_token_id": 0,
5
+ "transformers_version": "4.49.0.dev0"
6
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05ba9d58e554c2cb26bbf2ca29f418f91b890a86b0f04802ad152d571deab963
3
- size 242486312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83349f872bd8e9294dd7085ef145a905c1fa257ce721fcb1dfa18ce06c413cc9
3
+ size 2371719032
special_tokens_map.json CHANGED
@@ -99,8 +99,215 @@
99
  "<extra_id_96>",
100
  "<extra_id_97>",
101
  "<extra_id_98>",
102
- "<extra_id_99>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  ],
 
 
 
 
 
 
 
104
  "eos_token": {
105
  "content": "</s>",
106
  "lstrip": false,
 
99
  "<extra_id_96>",
100
  "<extra_id_97>",
101
  "<extra_id_98>",
102
+ "<extra_id_99>",
103
+ "<extra_id_100>",
104
+ "<extra_id_101>",
105
+ "<extra_id_102>",
106
+ "<extra_id_103>",
107
+ "<extra_id_104>",
108
+ "<extra_id_105>",
109
+ "<extra_id_106>",
110
+ "<extra_id_107>",
111
+ "<extra_id_108>",
112
+ "<extra_id_109>",
113
+ "<extra_id_110>",
114
+ "<extra_id_111>",
115
+ "<extra_id_112>",
116
+ "<extra_id_113>",
117
+ "<extra_id_114>",
118
+ "<extra_id_115>",
119
+ "<extra_id_116>",
120
+ "<extra_id_117>",
121
+ "<extra_id_118>",
122
+ "<extra_id_119>",
123
+ "<extra_id_120>",
124
+ "<extra_id_121>",
125
+ "<extra_id_122>",
126
+ "<extra_id_123>",
127
+ "<extra_id_124>",
128
+ "<extra_id_125>",
129
+ "<extra_id_126>",
130
+ "<extra_id_127>",
131
+ "<extra_id_128>",
132
+ "<extra_id_129>",
133
+ "<extra_id_130>",
134
+ "<extra_id_131>",
135
+ "<extra_id_132>",
136
+ "<extra_id_133>",
137
+ "<extra_id_134>",
138
+ "<extra_id_135>",
139
+ "<extra_id_136>",
140
+ "<extra_id_137>",
141
+ "<extra_id_138>",
142
+ "<extra_id_139>",
143
+ "<extra_id_140>",
144
+ "<extra_id_141>",
145
+ "<extra_id_142>",
146
+ "<extra_id_143>",
147
+ "<extra_id_144>",
148
+ "<extra_id_145>",
149
+ "<extra_id_146>",
150
+ "<extra_id_147>",
151
+ "<extra_id_148>",
152
+ "<extra_id_149>",
153
+ "<extra_id_150>",
154
+ "<extra_id_151>",
155
+ "<extra_id_152>",
156
+ "<extra_id_153>",
157
+ "<extra_id_154>",
158
+ "<extra_id_155>",
159
+ "<extra_id_156>",
160
+ "<extra_id_157>",
161
+ "<extra_id_158>",
162
+ "<extra_id_159>",
163
+ "<extra_id_160>",
164
+ "<extra_id_161>",
165
+ "<extra_id_162>",
166
+ "<extra_id_163>",
167
+ "<extra_id_164>",
168
+ "<extra_id_165>",
169
+ "<extra_id_166>",
170
+ "<extra_id_167>",
171
+ "<extra_id_168>",
172
+ "<extra_id_169>",
173
+ "<extra_id_170>",
174
+ "<extra_id_171>",
175
+ "<extra_id_172>",
176
+ "<extra_id_173>",
177
+ "<extra_id_174>",
178
+ "<extra_id_175>",
179
+ "<extra_id_176>",
180
+ "<extra_id_177>",
181
+ "<extra_id_178>",
182
+ "<extra_id_179>",
183
+ "<extra_id_180>",
184
+ "<extra_id_181>",
185
+ "<extra_id_182>",
186
+ "<extra_id_183>",
187
+ "<extra_id_184>",
188
+ "<extra_id_185>",
189
+ "<extra_id_186>",
190
+ "<extra_id_187>",
191
+ "<extra_id_188>",
192
+ "<extra_id_189>",
193
+ "<extra_id_190>",
194
+ "<extra_id_191>",
195
+ "<extra_id_192>",
196
+ "<extra_id_193>",
197
+ "<extra_id_194>",
198
+ "<extra_id_195>",
199
+ "<extra_id_196>",
200
+ "<extra_id_197>",
201
+ "<extra_id_198>",
202
+ "<extra_id_199>",
203
+ "<extra_id_200>",
204
+ "<extra_id_201>",
205
+ "<extra_id_202>",
206
+ "<extra_id_203>",
207
+ "<extra_id_204>",
208
+ "<extra_id_205>",
209
+ "<extra_id_206>",
210
+ "<extra_id_207>",
211
+ "<extra_id_208>",
212
+ "<extra_id_209>",
213
+ "<extra_id_210>",
214
+ "<extra_id_211>",
215
+ "<extra_id_212>",
216
+ "<extra_id_213>",
217
+ "<extra_id_214>",
218
+ "<extra_id_215>",
219
+ "<extra_id_216>",
220
+ "<extra_id_217>",
221
+ "<extra_id_218>",
222
+ "<extra_id_219>",
223
+ "<extra_id_220>",
224
+ "<extra_id_221>",
225
+ "<extra_id_222>",
226
+ "<extra_id_223>",
227
+ "<extra_id_224>",
228
+ "<extra_id_225>",
229
+ "<extra_id_226>",
230
+ "<extra_id_227>",
231
+ "<extra_id_228>",
232
+ "<extra_id_229>",
233
+ "<extra_id_230>",
234
+ "<extra_id_231>",
235
+ "<extra_id_232>",
236
+ "<extra_id_233>",
237
+ "<extra_id_234>",
238
+ "<extra_id_235>",
239
+ "<extra_id_236>",
240
+ "<extra_id_237>",
241
+ "<extra_id_238>",
242
+ "<extra_id_239>",
243
+ "<extra_id_240>",
244
+ "<extra_id_241>",
245
+ "<extra_id_242>",
246
+ "<extra_id_243>",
247
+ "<extra_id_244>",
248
+ "<extra_id_245>",
249
+ "<extra_id_246>",
250
+ "<extra_id_247>",
251
+ "<extra_id_248>",
252
+ "<extra_id_249>",
253
+ "<extra_id_250>",
254
+ "<extra_id_251>",
255
+ "<extra_id_252>",
256
+ "<extra_id_253>",
257
+ "<extra_id_254>",
258
+ "<extra_id_255>",
259
+ "<extra_id_256>",
260
+ "<extra_id_257>",
261
+ "<extra_id_258>",
262
+ "<extra_id_259>",
263
+ "<extra_id_260>",
264
+ "<extra_id_261>",
265
+ "<extra_id_262>",
266
+ "<extra_id_263>",
267
+ "<extra_id_264>",
268
+ "<extra_id_265>",
269
+ "<extra_id_266>",
270
+ "<extra_id_267>",
271
+ "<extra_id_268>",
272
+ "<extra_id_269>",
273
+ "<extra_id_270>",
274
+ "<extra_id_271>",
275
+ "<extra_id_272>",
276
+ "<extra_id_273>",
277
+ "<extra_id_274>",
278
+ "<extra_id_275>",
279
+ "<extra_id_276>",
280
+ "<extra_id_277>",
281
+ "<extra_id_278>",
282
+ "<extra_id_279>",
283
+ "<extra_id_280>",
284
+ "<extra_id_281>",
285
+ "<extra_id_282>",
286
+ "<extra_id_283>",
287
+ "<extra_id_284>",
288
+ "<extra_id_285>",
289
+ "<extra_id_286>",
290
+ "<extra_id_287>",
291
+ "<extra_id_288>",
292
+ "<extra_id_289>",
293
+ "<extra_id_290>",
294
+ "<extra_id_291>",
295
+ "<extra_id_292>",
296
+ "<extra_id_293>",
297
+ "<extra_id_294>",
298
+ "<extra_id_295>",
299
+ "<extra_id_296>",
300
+ "<extra_id_297>",
301
+ "<extra_id_298>",
302
+ "<extra_id_299>"
303
  ],
304
+ "bos_token": {
305
+ "content": "<s>",
306
+ "lstrip": false,
307
+ "normalized": false,
308
+ "rstrip": false,
309
+ "single_word": false
310
+ },
311
  "eos_token": {
312
  "content": "</s>",
313
  "lstrip": false,
spiece.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
- size 791656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3909a67b780650b35cf529ac782ad2b6b26e6d1f849d3fbb6a872905f452458
3
+ size 4548313
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4521433697c7ddf104699994e833a55f75c25c193b91134d84b50b5e69963c6c
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67efcf4d3adf4422128c0e794c83663225db853bcc693f47dabc389a03a146e8
3
  size 5560