uno-recognizer / tokenizer.json
Hayloo9838's picture
Create tokenizer.json
dfb48df verified
raw
history blame contribute delete
444 Bytes
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
],
"normalizer": null,
"pre_tokenizer": {
"type": "Whitespace"
},
"post_processor": null,
"decoder": null,
"model": {
"type": "WordPiece",
"unk_token": "[UNK]",
"continuing_subword_prefix": "##",
"max_input_chars_per_word": 100,
"vocab": {
"[PAD]": 0,
"[UNK]": 1,
"[CLS]": 2,
"[SEP]": 3
}
}
}