{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "special": true, "content": "[STOP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false }, { "id": 1, "special": true, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false }, { "id": 2, "special": true, "content": "[SPACE]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false }, { "id": 255, "special": true, "content": "[START]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false }, { "id": 604, "content": "[UH]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 605, "content": "[UM]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 606, "content": "[giggle]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 607, "content": "[laughter]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 608, "content": "[guffaw]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 609, "content": "[inhale]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 610, "content": "[exhale]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 611, "content": "[sigh]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 612, "content": "[cry]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 613, "content": "[bark]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 614, "content": "[howl]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 615, "content": "[meow]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 616, "content": "[singing]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 617, "content": "[music]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 618, "content": "[whistle]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 619, "content": "[humming]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 620, "content": "[gasp]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 621, "content": "[groan]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 622, "content": "[whisper]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 623, "content": "[mumble]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 624, "content": "[sniff]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 625, "content": "[sneeze]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 626, "content": "[cough]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 627, "content": "[snore]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 628, "content": "[chew]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 629, "content": "[sip]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 630, "content": "[clear_throat]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 631, "content": "[kiss]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 632, "content": "[shhh]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 633, "content": "[gibberish]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 634, "content": "[fr]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 635, "content": "[es]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 636, "content": "[de]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 637, "content": "[it]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 638, "content": "[ipa]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 639, "content": "[end_of_label]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 695, "content": "[PLACEHOLDER55]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 696, "content": "[PLACEHOLDER56]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 697, "content": "[PLACEHOLDER57]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 698, "content": "[PLACEHOLDER58]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 699, "content": "[PLACEHOLDER59]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 700, "content": "[PLACEHOLDER60]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 701, "content": "[PLACEHOLDER61]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 702, "content": "[PLACEHOLDER62]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 703, "content": "[PLACEHOLDER63]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Whitespace" }, "post_processor": null, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": "[UNK]", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "vocab": { "[STOP]": 0, "[UNK]": 1, "[SPACE]": 2, "!": 3, "'": 4, "(": 5, ")": 6, ",": 7, "-": 8, ".": 9, "/": 10, ":": 11, ";": 12, "?": 13, "a": 14, "b": 15, "c": 16, "d": 17, "e": 18, "f": 19, "g": 20, "h": 21, "i": 22, "j": 23, "k": 24, "l": 25, "m": 26, "n": 27, "o": 28, "p": 29, "q": 30, "r": 31, "s": 32, "t": 33, "u": 34, "v": 35, "w": 36, "x": 37, "y": 38, "z": 39, "th": 40, "in": 41, "the": 42, "an": 43, "er": 44, "ou": 45, "re": 46, "on": 47, "at": 48, "ed": 49, "en": 50, "to": 51, "ing": 52, "and": 53, "is": 54, "as": 55, "al": 56, "or": 57, "of": 58, "ar": 59, "it": 60, "es": 61, "he": 62, "st": 63, "le": 64, "om": 65, "se": 66, "be": 67, "ad": 68, "ow": 69, "ly": 70, "ch": 71, "wh": 72, "that": 73, "you": 74, "li": 75, "ve": 76, "ac": 77, "ti": 78, "ld": 79, "me": 80, "was": 81, "gh": 82, "id": 83, "ll": 84, "wi": 85, "ent": 86, "for": 87, "ay": 88, "ro": 89, "ver": 90, "ic": 91, "her": 92, "ke": 93, "his": 94, "no": 95, "ut": 96, "un": 97, "ir": 98, "lo": 99, "we": 100, "ri": 101, "ha": 102, "with": 103, "ght": 104, "out": 105, "im": 106, "ion": 107, "all": 108, "ab": 109, "one": 110, "ne": 111, "ge": 112, "ould": 113, "ter": 114, "mo": 115, "had": 116, "ce": 117, "she": 118, "go": 119, "sh": 120, "ur": 121, "am": 122, "so": 123, "pe": 124, "my": 125, "de": 126, "are": 127, "but": 128, "ome": 129, "fr": 130, "ther": 131, "fe": 132, "su": 133, "do": 134, "con": 135, "te": 136, "ain": 137, "ere": 138, "po": 139, "if": 140, "they": 141, "us": 142, "ag": 143, "tr": 144, "now": 145, "oun": 146, "this": 147, "have": 148, "not": 149, "sa": 150, "il": 151, "up": 152, "thing": 153, "from": 154, "ap": 155, "him": 156, "ack": 157, "ation": 158, "ant": 159, "our": 160, "op": 161, "like": 162, "ust": 163, "ess": 164, "bo": 165, "ok": 166, "ul": 167, "ind": 168, "ex": 169, "com": 170, "some": 171, "there": 172, "ers": 173, "co": 174, "res": 175, "man": 176, "ard": 177, "pl": 178, "wor": 179, "way": 180, "tion": 181, "fo": 182, "ca": 183, "were": 184, "by": 185, "ate": 186, "pro": 187, "ted": 188, "ound": 189, "own": 190, "would": 191, "ts": 192, "what": 193, "qu": 194, "ally": 195, "ight": 196, "ck": 197, "gr": 198, "when": 199, "ven": 200, "can": 201, "ough": 202, "ine": 203, "end": 204, "per": 205, "ous": 206, "od": 207, "ide": 208, "know": 209, "ty": 210, "very": 211, "si": 212, "ak": 213, "who": 214, "about": 215, "ill": 216, "them": 217, "est": 218, "red": 219, "ye": 220, "could": 221, "ong": 222, "your": 223, "their": 224, "em": 225, "just": 226, "other": 227, "into": 228, "any": 229, "whi": 230, "um": 231, "tw": 232, "ast": 233, "der": 234, "did": 235, "ie": 236, "been": 237, "ace": 238, "ink": 239, "ity": 240, "back": 241, "ting": 242, "br": 243, "more": 244, "ake": 245, "pp": 246, "then": 247, "sp": 248, "el": 249, "use": 250, "bl": 251, "said": 252, "over": 253, "get": 254, "[START]": 255, "\"": 256, "#": 257, "$": 258, "%": 259, "&": 260, "*": 261, "+": 262, "0": 263, "1": 264, "2": 265, "3": 266, "4": 267, "5": 268, "6": 269, "7": 270, "8": 271, "9": 272, "<": 273, "=": 274, ">": 275, "@": 276, "A": 277, "B": 278, "C": 279, "D": 280, "E": 281, "F": 282, "G": 283, "H": 284, "I": 285, "J": 286, "K": 287, "L": 288, "M": 289, "N": 290, "O": 291, "P": 292, "Q": 293, "R": 294, "S": 295, "T": 296, "U": 297, "V": 298, "W": 299, "X": 300, "Y": 301, "Z": 302, "[": 303, "\\": 304, "]": 305, "^": 306, "_": 307, "`": 308, "{": 309, "|": 310, "}": 311, "~": 312, "‐": 313, "‑": 314, "‒": 315, "–": 316, "—": 317, "―": 318, "‖": 319, "‗": 320, "‘": 321, "’": 322, "‚": 323, "‛": 324, "“": 325, "”": 326, "„": 327, "‟": 328, " ": 329, "¡": 330, "¢": 331, "£": 332, "¤": 333, "¥": 334, "¦": 335, "§": 336, "¨": 337, "©": 338, "ª": 339, "«": 340, "¬": 341, "­": 342, "®": 343, "¯": 344, "°": 345, "±": 346, "²": 347, "³": 348, "´": 349, "µ": 350, "¶": 351, "·": 352, "¸": 353, "¹": 354, "º": 355, "»": 356, "¼": 357, "½": 358, "¾": 359, "¿": 360, "À": 361, "Á": 362, "Â": 363, "Ã": 364, "Ä": 365, "Å": 366, "Æ": 367, "Ç": 368, "È": 369, "É": 370, "Ê": 371, "Ë": 372, "Ì": 373, "Í": 374, "Î": 375, "Ï": 376, "Ð": 377, "Ñ": 378, "Ò": 379, "Ó": 380, "Ô": 381, "Õ": 382, "Ö": 383, "×": 384, "Ø": 385, "Ù": 386, "Ú": 387, "Û": 388, "Ü": 389, "Ý": 390, "Þ": 391, "ß": 392, "à": 393, "á": 394, "â": 395, "ã": 396, "ä": 397, "å": 398, "æ": 399, "ç": 400, "è": 401, "é": 402, "ê": 403, "ë": 404, "ì": 405, "í": 406, "î": 407, "ï": 408, "ð": 409, "ñ": 410, "ò": 411, "ó": 412, "ô": 413, "õ": 414, "ö": 415, "÷": 416, "ø": 417, "ù": 418, "ú": 419, "û": 420, "ü": 421, "ý": 422, "þ": 423, "ÿ": 424, "ɐ": 425, "ɑ": 426, "ɒ": 427, "ɓ": 428, "ɔ": 429, "ɕ": 430, "ɖ": 431, "ɗ": 432, "ɘ": 433, "ə": 434, "ɚ": 435, "ɛ": 436, "ɜ": 437, "ɝ": 438, "ɞ": 439, "ɟ": 440, "ɠ": 441, "ɡ": 442, "ɢ": 443, "ɣ": 444, "ɤ": 445, "ɥ": 446, "ɦ": 447, "ɧ": 448, "ɨ": 449, "ɩ": 450, "ɪ": 451, "ɫ": 452, "ɬ": 453, "ɭ": 454, "ɮ": 455, "ɯ": 456, "ɰ": 457, "ɱ": 458, "ɲ": 459, "ɳ": 460, "ɴ": 461, "ɵ": 462, "ɶ": 463, "ɷ": 464, "ɸ": 465, "ɹ": 466, "ɺ": 467, "ɻ": 468, "ɼ": 469, "ɽ": 470, "ɾ": 471, "ɿ": 472, "ʀ": 473, "ʁ": 474, "ʂ": 475, "ʃ": 476, "ʄ": 477, "ʅ": 478, "ʆ": 479, "ʇ": 480, "ʈ": 481, "ʉ": 482, "ʊ": 483, "ʋ": 484, "ʌ": 485, "ʍ": 486, "ʎ": 487, "ʏ": 488, "ʐ": 489, "ʑ": 490, "ʒ": 491, "ʓ": 492, "ʔ": 493, "ʕ": 494, "ʖ": 495, "ʗ": 496, "ʘ": 497, "ʙ": 498, "ʚ": 499, "ʛ": 500, "ʜ": 501, "ʝ": 502, "ʞ": 503, "ʟ": 504, "ʠ": 505, "ʡ": 506, "ʢ": 507, "ʣ": 508, "ʤ": 509, "ʥ": 510, "ʦ": 511, "ʧ": 512, "ʨ": 513, "ʩ": 514, "ʪ": 515, "ʫ": 516, "ʬ": 517, "ʭ": 518, "ʮ": 519, "ʯ": 520, "ʰ": 521, "ʱ": 522, "ʲ": 523, "ʳ": 524, "ʴ": 525, "ʵ": 526, "ʶ": 527, "ʷ": 528, "ʸ": 529, "ʹ": 530, "ʺ": 531, "ʻ": 532, "ʼ": 533, "ʽ": 534, "ʾ": 535, "ʿ": 536, "ˀ": 537, "ˁ": 538, "˂": 539, "˃": 540, "˄": 541, "˅": 542, "ˆ": 543, "ˇ": 544, "ˈ": 545, "ˉ": 546, "ˊ": 547, "ˋ": 548, "ˌ": 549, "ˍ": 550, "ˎ": 551, "ˏ": 552, "ː": 553, "ˑ": 554, "˒": 555, "˓": 556, "˔": 557, "˕": 558, "˖": 559, "˗": 560, "˘": 561, "˙": 562, "˚": 563, "˛": 564, "˜": 565, "˝": 566, "˞": 567, "˟": 568, "ˠ": 569, "ˡ": 570, "ˢ": 571, "ˣ": 572, "ˤ": 573, "˥": 574, "˦": 575, "˧": 576, "˨": 577, "˩": 578, "˪": 579, "˫": 580, "ˬ": 581, "˭": 582, "ˮ": 583, "˯": 584, "˰": 585, "˱": 586, "˲": 587, "˳": 588, "˴": 589, "˵": 590, "˶": 591, "˷": 592, "˸": 593, "˹": 594, "˺": 595, "˻": 596, "˼": 597, "˽": 598, "˾": 599, "˿": 600, "ā": 601, "ō": 602, "…": 603, "[UH]": 604, "[UM]": 605, "[giggle]": 606, "[laughter]": 607, "[guffaw]": 608, "[inhale]": 609, "[exhale]": 610, "[sigh]": 611, "[cry]": 612, "[bark]": 613, "[howl]": 614, "[meow]": 615, "[singing]": 616, "[music]": 617, "[whistle]": 618, "[humming]": 619, "[gasp]": 620, "[groan]": 621, "[whisper]": 622, "[mumble]": 623, "[sniff]": 624, "[sneeze]": 625, "[cough]": 626, "[snore]": 627, "[chew]": 628, "[sip]": 629, "[clear_throat]": 630, "[kiss]": 631, "[shhh]": 632, "[gibberish]": 633, "[fr]": 634, "[es]": 635, "[de]": 636, "[it]": 637, "[ipa]": 638, "[end_of_label]": 639, "ŋ": 640, "ᵻ": 641, "θ": 642, "̩": 643, "\u0303": 644, "ɑː": 645, "iː": 646, "uː": 647, "ɜː": 648, "ɔː": 649, "oː": 650, "eɪ": 651, "oʊ": 652, "aɪ": 653, "aʊ": 654, "ɔɪ": 655, "dʒ": 656, "tʃ": 657, "ɪŋ": 658, "ᵻd": 659, "ˈiː": 660, "ˌiː": 661, "ˈɪ": 662, "ˌɪ": 663, "ˈeɪ": 664, "ˌeɪ": 665, "ˈɛ": 666, "ˌɛ": 667, "ˈæ": 668, "ˌæ": 669, "ˈɑː": 670, "ˌɑː": 671, "ˈɔː": 672, "ˌɔː": 673, "oːɹ": 674, "ˈoːɹ": 675, "ˌoːɹ": 676, "ˈoʊ": 677, "ˌoʊ": 678, "ˈʊ": 679, "ˌʊ": 680, "ˈuː": 681, "ˌuː": 682, "ˈɜː": 683, "ˌɜː": 684, "ˈʌ": 685, "ˌʌ": 686, "ˈaɪ": 687, "ˌaɪ": 688, "ˈaʊ": 689, "ˌaʊ": 690, "ˈɔɪ": 691, "ˌɔɪ": 692, "ˈɚ": 693, "ˌɐ": 694, "[PLACEHOLDER55]": 695, "[PLACEHOLDER56]": 696, "[PLACEHOLDER57]": 697, "[PLACEHOLDER58]": 698, "[PLACEHOLDER59]": 699, "[PLACEHOLDER60]": 700, "[PLACEHOLDER61]": 701, "[PLACEHOLDER62]": 702, "[PLACEHOLDER63]": 703 }, "merges": [ "t h", "i n", "th e", "a n", "e r", "o u", "r e", "o n", "a t", "e d", "e n", "t o", "in g", "an d", "i s", "a s", "a l", "o r", "o f", "a r", "i t", "e s", "h e", "s t", "l e", "o m", "s e", "b e", "a d", "o w", "l y", "c h", "w h", "th at", "y ou", "l i", "v e", "a c", "t i", "l d", "m e", "w as", "g h", "i d", "l l", "w i", "en t", "f or", "a y", "r o", "v er", "i c", "h er", "k e", "h is", "n o", "u t", "u n", "i r", "l o", "w e", "r i", "h a", "wi th", "gh t", "ou t", "i m", "i on", "al l", "a b", "on e", "n e", "g e", "ou ld", "t er", "m o", "h ad", "c e", "s he", "g o", "s h", "u r", "a m", "s o", "p e", "m y", "d e", "a re", "b ut", "om e", "f r", "the r", "f e", "s u", "d o", "c on", "t e", "a in", "er e", "p o", "i f", "the y", "u s", "a g", "t r", "n ow", "ou n", "th is", "ha ve", "no t", "s a", "i l", "u p", "th ing", "fr om", "a p", "h im", "ac k", "at ion", "an t", "ou r", "o p", "li ke", "u st", "es s", "b o", "o k", "u l", "in d", "e x", "c om", "s ome", "the re", "er s", "c o", "re s", "m an", "ar d", "p l", "w or", "w ay", "ti on", "f o", "c a", "w ere", "b y", "at e", "p ro", "t ed", "oun d", "ow n", "w ould", "t s", "wh at", "q u", "al ly", "i ght", "c k", "g r", "wh en", "v en", "c an", "ou gh", "in e", "en d", "p er", "ou s", "o d", "id e", "k now", "t y", "ver y", "s i", "a k", "wh o", "ab out", "i ll", "the m", "es t", "re d", "y e", "c ould", "on g", "you r", "the ir", "e m", "j ust", "o ther", "in to", "an y", "wh i", "u m", "t w", "as t", "d er", "d id", "i e", "be en", "ac e", "in k", "it y", "b ack", "t ing", "b r", "mo re", "a ke", "p p", "the n", "s p", "e l", "u se", "b l", "sa id", "o ver", "ge t", "ɑ ː", "i ː", "u ː", "ɜ ː", "ɔ ː", "o ː", "e ɪ", "o ʊ", "a ɪ", "a ʊ", "ɔ ɪ", "d ʒ", "t ʃ", "ɪ ŋ", "ᵻ d", "ˈ iː", "ˌ iː", "ˈ ɪ", "ˌ ɪ", "ˈ eɪ", "ˌ eɪ", "ˈ ɛ", "ˌ ɛ", "ˈ æ", "ˌ æ", "ˈ ɑː", "ˌ ɑː", "ˈ ɔː", "ˌ ɔː", "oː ɹ", "ˈ oːɹ", "ˌ oːɹ", "ˈ oʊ", "ˌ oʊ", "ˈ ʊ", "ˌ ʊ", "ˈ uː", "ˌ uː", "ˈ ɜː", "ˌ ɜː", "ˈ ʌ", "ˌ ʌ", "ˈ aɪ", "ˌ aɪ", "ˈ aʊ", "ˌ aʊ", "ˈ ɔɪ", "ˌ ɔɪ", "ˈ ɚ", "ˌ ɐ" ] } }