mrm8488 commited on
Commit
0f85e2d
·
1 Parent(s): f4a986f

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -8,7 +8,7 @@
8
  "single_word": false
9
  },
10
  {
11
- "content": "[\\INST]",
12
  "lstrip": false,
13
  "normalized": false,
14
  "rstrip": false,
@@ -29,7 +29,6 @@
29
  "rstrip": false,
30
  "single_word": false
31
  },
32
- "pad_token": "</s>",
33
  "unk_token": {
34
  "content": "<unk>",
35
  "lstrip": false,
 
8
  "single_word": false
9
  },
10
  {
11
+ "content": "[/INST]",
12
  "lstrip": false,
13
  "normalized": false,
14
  "rstrip": false,
 
29
  "rstrip": false,
30
  "single_word": false
31
  },
 
32
  "unk_token": {
33
  "content": "<unk>",
34
  "lstrip": false,
tokenizer.json CHANGED
@@ -41,7 +41,7 @@
41
  },
42
  {
43
  "id": 32001,
44
- "content": "[\\INST]",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
 
41
  },
42
  {
43
  "id": 32001,
44
+ "content": "[/INST]",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -1,6 +1,4 @@
1
  {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
@@ -35,7 +33,7 @@
35
  "special": true
36
  },
37
  "32001": {
38
- "content": "[\\INST]",
39
  "lstrip": false,
40
  "normalized": false,
41
  "rstrip": false,
@@ -45,14 +43,14 @@
45
  },
46
  "additional_special_tokens": [
47
  "[INST]",
48
- "[\\INST]"
49
  ],
50
  "bos_token": "<s>",
51
  "clean_up_tokenization_spaces": false,
52
  "eos_token": "</s>",
53
  "legacy": true,
54
  "model_max_length": 1000000000000000019884624838656,
55
- "pad_token": "</s>",
56
  "sp_model_kwargs": {},
57
  "spaces_between_special_tokens": false,
58
  "tokenizer_class": "LlamaTokenizer",
 
1
  {
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
 
33
  "special": true
34
  },
35
  "32001": {
36
+ "content": "[/INST]",
37
  "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
 
43
  },
44
  "additional_special_tokens": [
45
  "[INST]",
46
+ "[/INST]"
47
  ],
48
  "bos_token": "<s>",
49
  "clean_up_tokenization_spaces": false,
50
  "eos_token": "</s>",
51
  "legacy": true,
52
  "model_max_length": 1000000000000000019884624838656,
53
+ "pad_token": null,
54
  "sp_model_kwargs": {},
55
  "spaces_between_special_tokens": false,
56
  "tokenizer_class": "LlamaTokenizer",