yuyuzhang commited on
Commit
2b4d112
·
verified ·
1 Parent(s): 5f0d886

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +4 -4
  2. tokenizer.json +2 -2
  3. tokenizer_config.json +11 -11
special_tokens_map.json CHANGED
@@ -1,27 +1,27 @@
1
  {
2
  "bos_token": {
3
- "content": "<[BOS_never_used_51bce0c785ca2f68081bfa7d91973934]>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<[EOS_never_used_51bce0c785ca2f68081bfa7d91973934]>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
- "content": "<[PAD_never_used_51bce0c785ca2f68081bfa7d91973934]>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
  "sep_token": {
24
- "content": "<[SEP_never_used_51bce0c785ca2f68081bfa7d91973934]>",
25
  "lstrip": false,
26
  "normalized": false,
27
  "rstrip": false,
 
1
  {
2
  "bos_token": {
3
+ "content": "<[begin▁of▁sentence]>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<[end▁of▁sentence]>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
+ "content": "<[PAD▁TOKEN]>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
  "sep_token": {
24
+ "content": "<[SEP▁TOKEN]>",
25
  "lstrip": false,
26
  "normalized": false,
27
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f5a7581f9bd555b11181c8fd69e9c7e6f6f5904d958f7c8771393aec7de1da9
3
- size 11892194
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db6520146c388c495a98bbea62ff6d00c0a8935bed33622e33bb33ec71aaafed
3
+ size 11891696
tokenizer_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "added_tokens_decoder": {
3
  "0": {
4
- "content": "<[BOS_never_used_51bce0c785ca2f68081bfa7d91973934]>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
@@ -9,7 +9,7 @@
9
  "special": true
10
  },
11
  "1": {
12
- "content": "<[PAD_never_used_51bce0c785ca2f68081bfa7d91973934]>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
@@ -17,7 +17,7 @@
17
  "special": true
18
  },
19
  "2": {
20
- "content": "<[EOS_never_used_51bce0c785ca2f68081bfa7d91973934]>",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
@@ -49,7 +49,7 @@
49
  "special": true
50
  },
51
  "6": {
52
- "content": "<[SEP_never_used_51bce0c785ca2f68081bfa7d91973934]>",
53
  "lstrip": false,
54
  "normalized": false,
55
  "rstrip": false,
@@ -993,7 +993,7 @@
993
  "special": true
994
  },
995
  "124": {
996
- "content": "<[PLHD124_never_used_51bce0c785ca2f68081bfa7d91973934]>",
997
  "lstrip": false,
998
  "normalized": false,
999
  "rstrip": false,
@@ -1001,7 +1001,7 @@
1001
  "special": true
1002
  },
1003
  "125": {
1004
- "content": "<[PLHD125_never_used_51bce0c785ca2f68081bfa7d91973934]>",
1005
  "lstrip": false,
1006
  "normalized": false,
1007
  "rstrip": false,
@@ -1009,7 +1009,7 @@
1009
  "special": true
1010
  },
1011
  "126": {
1012
- "content": "<[PLHD126_never_used_51bce0c785ca2f68081bfa7d91973934]>",
1013
  "lstrip": false,
1014
  "normalized": false,
1015
  "rstrip": false,
@@ -1025,12 +1025,12 @@
1025
  "special": true
1026
  }
1027
  },
1028
- "bos_token": "<[BOS_never_used_51bce0c785ca2f68081bfa7d91973934]>",
1029
  "chat_template": "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set role = message['role'] %}{{ bos_token + role + '\n' + message['content'] | trim + eos_token }}{% endfor %}{% if add_generation_prompt %}{{ bos_token + 'assistant\n'}}{% endif %}",
1030
  "clean_up_tokenization_spaces": false,
1031
- "eos_token": "<[EOS_never_used_51bce0c785ca2f68081bfa7d91973934]>",
1032
  "model_max_length": 1000000000000000019884624838656,
1033
- "pad_token": "<[PAD_never_used_51bce0c785ca2f68081bfa7d91973934]>",
1034
- "sep_token": "<[SEP_never_used_51bce0c785ca2f68081bfa7d91973934]>",
1035
  "tokenizer_class": "PreTrainedTokenizerFast"
1036
  }
 
1
  {
2
  "added_tokens_decoder": {
3
  "0": {
4
+ "content": "<[begin▁of▁sentence]>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
 
9
  "special": true
10
  },
11
  "1": {
12
+ "content": "<[PAD▁TOKEN]>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
 
17
  "special": true
18
  },
19
  "2": {
20
+ "content": "<[end▁of▁sentence]>",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
 
49
  "special": true
50
  },
51
  "6": {
52
+ "content": "<[SEP▁TOKEN]>",
53
  "lstrip": false,
54
  "normalized": false,
55
  "rstrip": false,
 
993
  "special": true
994
  },
995
  "124": {
996
+ "content": "<[fim-prefix]>",
997
  "lstrip": false,
998
  "normalized": false,
999
  "rstrip": false,
 
1001
  "special": true
1002
  },
1003
  "125": {
1004
+ "content": "<[fim-suffix]>",
1005
  "lstrip": false,
1006
  "normalized": false,
1007
  "rstrip": false,
 
1009
  "special": true
1010
  },
1011
  "126": {
1012
+ "content": "<[fim-middle]>",
1013
  "lstrip": false,
1014
  "normalized": false,
1015
  "rstrip": false,
 
1025
  "special": true
1026
  }
1027
  },
1028
+ "bos_token": "<[begin▁of▁sentence]>",
1029
  "chat_template": "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set role = message['role'] %}{{ bos_token + role + '\n' + message['content'] | trim + eos_token }}{% endfor %}{% if add_generation_prompt %}{{ bos_token + 'assistant\n'}}{% endif %}",
1030
  "clean_up_tokenization_spaces": false,
1031
+ "eos_token": "<[end▁of▁sentence]>",
1032
  "model_max_length": 1000000000000000019884624838656,
1033
+ "pad_token": "<[PAD▁TOKEN]>",
1034
+ "sep_token": "<[SEP▁TOKEN]>",
1035
  "tokenizer_class": "PreTrainedTokenizerFast"
1036
  }