Bedru commited on
Commit
b3ce642
·
verified ·
1 Parent(s): c6c0c98

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +2 -2
  2. tokenizer_config.json +4 -4
  3. vocab.json +226 -241
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 304,
3
- "<s>": 303
4
  }
 
1
  {
2
+ "</s>": 289,
3
+ "<s>": 288
4
  }
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "added_tokens_decoder": {
3
- "301": {
4
  "content": "[UNK]",
5
  "lstrip": true,
6
  "normalized": false,
@@ -8,7 +8,7 @@
8
  "single_word": false,
9
  "special": false
10
  },
11
- "302": {
12
  "content": "[PAD]",
13
  "lstrip": true,
14
  "normalized": false,
@@ -16,7 +16,7 @@
16
  "single_word": false,
17
  "special": false
18
  },
19
- "303": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": false,
@@ -24,7 +24,7 @@
24
  "single_word": false,
25
  "special": true
26
  },
27
- "304": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": false,
 
1
  {
2
  "added_tokens_decoder": {
3
+ "286": {
4
  "content": "[UNK]",
5
  "lstrip": true,
6
  "normalized": false,
 
8
  "single_word": false,
9
  "special": false
10
  },
11
+ "287": {
12
  "content": "[PAD]",
13
  "lstrip": true,
14
  "normalized": false,
 
16
  "single_word": false,
17
  "special": false
18
  },
19
+ "288": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": false,
 
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "289": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": false,
vocab.json CHANGED
@@ -14,8 +14,8 @@
14
  "8": 13,
15
  "9": 14,
16
  "[": 15,
17
- "[PAD]": 302,
18
- "[UNK]": 301,
19
  "]": 16,
20
  "a": 17,
21
  "b": 18,
@@ -28,104 +28,104 @@
28
  "i": 25,
29
  "j": 26,
30
  "k": 27,
31
- "m": 28,
32
- "n": 29,
33
- "o": 30,
34
- "p": 31,
35
- "r": 32,
36
- "s": 33,
37
- "t": 34,
38
- "u": 35,
39
- "v": 36,
40
- "x": 37,
41
- "y": 38,
 
 
 
 
42
  "|": 0,
43
- "~": 39,
44
- "£": 40,
45
- "¥": 41,
46
- "°": 42,
47
- "²": 43,
48
- "½": 44,
49
- "¾": 45,
50
- "ã": 46,
51
- "õ": 47,
52
- "ሀ": 48,
53
- "ሁ": 49,
54
- "ሂ": 50,
55
- "ሃ": 51,
56
- "ሄ": 52,
57
- "ህ": 53,
58
- "ሆ": 54,
59
- "ለ": 55,
60
- "ሉ": 56,
61
- "ሊ": 57,
62
- "ላ": 58,
63
- "ሌ": 59,
64
- "ል": 60,
65
- "ሎ": 61,
66
- "ሏ": 62,
67
- "ሐ": 63,
68
- "": 64,
69
- "": 65,
70
- "": 66,
71
- "": 67,
72
- "": 68,
73
- "": 69,
74
- "": 70,
75
- "": 71,
76
- "": 72,
77
- "": 73,
78
- "": 74,
79
- "": 75,
80
- "": 76,
81
- "": 77,
82
- "": 78,
83
- "": 79,
84
- "": 80,
85
- "": 81,
86
- "": 82,
87
- "": 83,
88
- "": 84,
89
- "": 85,
90
- "": 86,
91
- "": 87,
92
- "": 88,
93
- "": 89,
94
- "": 90,
95
- "": 91,
96
- "": 92,
97
- "": 93,
98
- "": 94,
99
- "": 95,
100
- "": 96,
101
- "": 97,
102
- "": 98,
103
- "": 99,
104
- "": 100,
105
- "": 101,
106
- "": 102,
107
- "": 103,
108
- "": 104,
109
- "": 105,
110
- "": 106,
111
- "": 107,
112
- "": 108,
113
- "": 109,
114
- "": 110,
115
- "": 111,
116
- "": 112,
117
- "": 113,
118
- "": 114,
119
- "": 115,
120
- "": 116,
121
- "": 117,
122
- "": 118,
123
- "": 119,
124
- "": 120,
125
- "ቨ": 121,
126
- "ቪ": 122,
127
- "ቫ": 123,
128
- "ቬ": 124,
129
  "ቭ": 125,
130
  "ቮ": 126,
131
  "ተ": 127,
@@ -160,146 +160,131 @@
160
  "ኙ": 156,
161
  "ኚ": 157,
162
  "ኛ": 158,
163
- "": 159,
164
- "": 160,
165
- "": 161,
166
- "": 162,
167
- "": 163,
168
- "": 164,
169
- "": 165,
170
- "": 166,
171
- "": 167,
172
- "": 168,
173
- "": 169,
174
- "": 170,
175
- "": 171,
176
- "": 172,
177
- "": 173,
178
- "": 174,
179
- "": 175,
180
- "": 176,
181
- "": 177,
182
- "": 178,
183
- "": 179,
184
- "": 180,
185
- "": 181,
186
- "": 182,
187
- "": 183,
188
- "": 184,
189
- "": 185,
190
- "": 186,
191
- "": 187,
192
- "": 188,
193
- "": 189,
194
- "": 190,
195
- "": 191,
196
- "": 192,
197
- "": 193,
198
- "": 194,
199
- "": 195,
200
- "": 196,
201
- "": 197,
202
- "": 198,
203
- "": 199,
204
- "": 200,
205
- "": 201,
206
- "": 202,
207
- "": 203,
208
- "": 204,
209
- "": 205,
210
- "": 206,
211
- "": 207,
212
- "": 208,
213
- "": 209,
214
- "": 210,
215
- "": 211,
216
- "": 212,
217
- "": 213,
218
- "": 214,
219
- "": 215,
220
- "": 216,
221
- "": 217,
222
- "": 218,
223
- "": 219,
224
- "": 220,
225
- "": 221,
226
- "": 222,
227
- "": 223,
228
- "": 224,
229
- "": 225,
230
- "": 226,
231
- "": 227,
232
- "": 228,
233
- "": 229,
234
- "": 230,
235
- "": 231,
236
- "": 232,
237
- "": 233,
238
- "": 234,
239
- "": 235,
240
- "": 236,
241
- "": 237,
242
- "": 238,
243
- "": 239,
244
- "": 240,
245
- "": 241,
246
- "": 242,
247
- "": 243,
248
- "": 244,
249
- "": 245,
250
- "": 246,
251
- "": 247,
252
- "": 248,
253
- "": 249,
254
- "": 250,
255
- "": 251,
256
- "": 252,
257
- "": 253,
258
- "": 254,
259
- "": 255,
260
- "": 256,
261
- "": 257,
262
- "": 258,
263
- "": 259,
264
- "": 260,
265
- "": 261,
266
- "": 262,
267
- "": 263,
268
- "": 264,
269
- "": 265,
270
- "": 266,
271
- "": 267,
272
- "": 268,
273
- "": 269,
274
- "": 270,
275
- "": 271,
276
- "": 272,
277
- "": 273,
278
- "": 274,
279
- "": 275,
280
- "": 276,
281
- "": 277,
282
- "": 278,
283
- "": 279,
284
- "": 280,
285
- "": 281,
286
- "": 282,
287
- "": 283,
288
- "": 284,
289
- "": 285,
290
- "ፔ": 286,
291
- "ፕ": 287,
292
- "ፖ": 288,
293
- "ፗ": 289,
294
- "፡": 290,
295
- "።": 291,
296
- "፣": 292,
297
- "፤": 293,
298
- "፥": 294,
299
- "፦": 295,
300
- "–": 296,
301
- "—": 297,
302
- "’": 298,
303
- "‹": 299,
304
- "›": 300
305
  }
 
14
  "8": 13,
15
  "9": 14,
16
  "[": 15,
17
+ "[PAD]": 287,
18
+ "[UNK]": 286,
19
  "]": 16,
20
  "a": 17,
21
  "b": 18,
 
28
  "i": 25,
29
  "j": 26,
30
  "k": 27,
31
+ "l": 28,
32
+ "m": 29,
33
+ "n": 30,
34
+ "o": 31,
35
+ "p": 32,
36
+ "q": 33,
37
+ "r": 34,
38
+ "s": 35,
39
+ "t": 36,
40
+ "u": 37,
41
+ "v": 38,
42
+ "w": 39,
43
+ "x": 40,
44
+ "y": 41,
45
+ "z": 42,
46
  "|": 0,
47
+ "~": 43,
48
+ "£": 44,
49
+ "¥": 45,
50
+ "°": 46,
51
+ "²": 47,
52
+ "½": 48,
53
+ "¾": 49,
54
+ "ã": 50,
55
+ "õ": 51,
56
+ "ሀ": 52,
57
+ "ሁ": 53,
58
+ "ሂ": 54,
59
+ "ሃ": 55,
60
+ "ሄ": 56,
61
+ "ህ": 57,
62
+ "ሆ": 58,
63
+ "ለ": 59,
64
+ "ሉ": 60,
65
+ "ሊ": 61,
66
+ "ላ": 62,
67
+ "ሌ": 63,
68
+ "ል": 64,
69
+ "ሎ": 65,
70
+ "ሏ": 66,
71
+ "ሐ": 67,
72
+ "": 68,
73
+ "": 69,
74
+ "": 70,
75
+ "": 71,
76
+ "": 72,
77
+ "": 73,
78
+ "": 74,
79
+ "": 75,
80
+ "": 76,
81
+ "": 77,
82
+ "": 78,
83
+ "": 79,
84
+ "": 80,
85
+ "": 81,
86
+ "": 82,
87
+ "": 83,
88
+ "": 84,
89
+ "": 85,
90
+ "": 86,
91
+ "": 87,
92
+ "": 88,
93
+ "": 89,
94
+ "": 90,
95
+ "": 91,
96
+ "": 92,
97
+ "": 93,
98
+ "": 94,
99
+ "": 95,
100
+ "": 96,
101
+ "": 97,
102
+ "": 98,
103
+ "": 99,
104
+ "": 100,
105
+ "": 101,
106
+ "": 102,
107
+ "": 103,
108
+ "": 104,
109
+ "": 105,
110
+ "": 106,
111
+ "": 107,
112
+ "": 108,
113
+ "": 109,
114
+ "": 110,
115
+ "": 111,
116
+ "": 112,
117
+ "": 113,
118
+ "": 114,
119
+ "": 115,
120
+ "": 116,
121
+ "": 117,
122
+ "": 118,
123
+ "": 119,
124
+ "": 120,
125
+ "": 121,
126
+ "": 122,
127
+ "": 123,
128
+ "": 124,
 
 
 
 
129
  "ቭ": 125,
130
  "ቮ": 126,
131
  "ተ": 127,
 
160
  "ኙ": 156,
161
  "ኚ": 157,
162
  "ኛ": 158,
163
+ "": 159,
164
+ "": 160,
165
+ "": 161,
166
+ "": 162,
167
+ "": 163,
168
+ "": 164,
169
+ "": 165,
170
+ "": 166,
171
+ "": 167,
172
+ "": 168,
173
+ "": 169,
174
+ "": 170,
175
+ "": 171,
176
+ "": 172,
177
+ "": 173,
178
+ "": 174,
179
+ "": 175,
180
+ "": 176,
181
+ "": 177,
182
+ "": 178,
183
+ "": 179,
184
+ "": 180,
185
+ "": 181,
186
+ "": 182,
187
+ "": 183,
188
+ "": 184,
189
+ "": 185,
190
+ "": 186,
191
+ "": 187,
192
+ "": 188,
193
+ "": 189,
194
+ "": 190,
195
+ "": 191,
196
+ "": 192,
197
+ "": 193,
198
+ "": 194,
199
+ "": 195,
200
+ "": 196,
201
+ "": 197,
202
+ "": 198,
203
+ "": 199,
204
+ "": 200,
205
+ "": 201,
206
+ "": 202,
207
+ "": 203,
208
+ "": 204,
209
+ "": 205,
210
+ "": 206,
211
+ "": 207,
212
+ "": 208,
213
+ "": 209,
214
+ "": 210,
215
+ "": 211,
216
+ "": 212,
217
+ "": 213,
218
+ "": 214,
219
+ "": 215,
220
+ "": 216,
221
+ "": 217,
222
+ "": 218,
223
+ "": 219,
224
+ "": 220,
225
+ "": 221,
226
+ "": 222,
227
+ "": 223,
228
+ "": 224,
229
+ "": 225,
230
+ "": 226,
231
+ "": 227,
232
+ "": 228,
233
+ "": 229,
234
+ "": 230,
235
+ "": 231,
236
+ "": 232,
237
+ "": 233,
238
+ "": 234,
239
+ "": 235,
240
+ "": 236,
241
+ "": 237,
242
+ "": 238,
243
+ "": 239,
244
+ "": 240,
245
+ "": 241,
246
+ "": 242,
247
+ "": 243,
248
+ "": 244,
249
+ "": 245,
250
+ "": 246,
251
+ "": 247,
252
+ "": 248,
253
+ "": 249,
254
+ "": 250,
255
+ "": 251,
256
+ "": 252,
257
+ "": 253,
258
+ "": 254,
259
+ "": 255,
260
+ "": 256,
261
+ "": 257,
262
+ "": 258,
263
+ "": 259,
264
+ "": 260,
265
+ "": 261,
266
+ "": 262,
267
+ "": 263,
268
+ "": 264,
269
+ "": 265,
270
+ "": 266,
271
+ "": 267,
272
+ "": 268,
273
+ "": 269,
274
+ "": 270,
275
+ "": 271,
276
+ "": 272,
277
+ "": 273,
278
+ "": 274,
279
+ "": 275,
280
+ "": 276,
281
+ "": 277,
282
+ "": 278,
283
+ "": 279,
284
+ "": 280,
285
+ "": 281,
286
+ "": 282,
287
+ "": 283,
288
+ "": 284,
289
+ "": 285
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  }