256m / tokenizer_config.json
Corianas's picture
Update tokenizer_config.json
d29d0ff verified
{
"add_bos_token": false,
"add_prefix_space": false,
"bos_token": {
"__type": "AddedToken",
"content": "<|endoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"eos_token": {
"__type": "AddedToken",
"content": "<|endoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"errors": "replace",
"model_max_length": 1000000000000000019884624838656,
"name_or_path": "cerebras/Cerebras-GPT-256M",
"pad_token": null,
"special_tokens_map_file": null,
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{ add_bos_token and bos_token or '' }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{ '<|user|>' + message['content'] }}{%- elif message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{{ '<|assistant|><|tool_calls_begin|><|tool_call_begin|>' + tool['type'] + '<|tool_sep|>' + tool['function']['name'] + '\\n```json\\n' + tool['function']['arguments'] + '\\n```' + '<|tool_call_end|>' }}{% set ns.is_first = true %}{%- else %}{{ '\\n' + '<|tool_call_begin|>' + tool['type'] + '<|tool_sep|>' + tool['function']['name'] + '\\n```json\\n' + tool['function']['arguments'] + '\\n```' + '<|tool_call_end|>' + '<|tool_calls_end|>' + '<|endoftext|>' }}{%- endif %}{%- endfor %}{%- elif message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{ '<|tool_outputs_end|>' + message['content'] + '<|endoftext|>' }}{% set ns.is_tool = false %}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{ '<|assistant|>' + content + '<|endoftext|>' }}{%- endif %}{%- elif message['role'] == 'tool' %}{%- set ns.is_tool = true %}{%- if ns.is_output_first %}{{ '<|tool_outputs_begin|><|tool_output_begin|>' + message['content'] + '<|tool_output_end|>' }}{% set ns.is_output_first = false %}{%- else %}{{ '\\n<|tool_output_begin|>' + message['content'] + '<|tool_output_end|>' }}{%- endif %}{%- endif %}{%- endfor %}{% if ns.is_tool %}{{ '<|tool_outputs_end|>' }}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{ '<|assistant|>' }}{% endif %}",
"tokenizer_class": "GPT2Tokenizer",
"unk_token": {
"__type": "AddedToken",
"content": "<|endoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
}
}