{ | |
"version": "1.0", | |
"truncation": null, | |
"padding": null, | |
"added_tokens": [ | |
{ | |
"id": 0, | |
"special": true, | |
"content": "[STOP]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false | |
}, | |
{ | |
"id": 1, | |
"special": true, | |
"content": "[UNK]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false | |
}, | |
{ | |
"id": 2, | |
"special": true, | |
"content": "[SPACE]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false | |
}, | |
{ | |
"id": 255, | |
"special": true, | |
"content": "[START]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false | |
}, | |
{ | |
"id": 604, | |
"content": "[UH]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 605, | |
"content": "[UM]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 606, | |
"content": "[giggle]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 607, | |
"content": "[laughter]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 608, | |
"content": "[guffaw]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 609, | |
"content": "[inhale]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 610, | |
"content": "[exhale]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 611, | |
"content": "[sigh]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 612, | |
"content": "[cry]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 613, | |
"content": "[bark]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 614, | |
"content": "[howl]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 615, | |
"content": "[meow]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 616, | |
"content": "[singing]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 617, | |
"content": "[music]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 618, | |
"content": "[whistle]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 619, | |
"content": "[humming]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 620, | |
"content": "[gasp]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 621, | |
"content": "[groan]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 622, | |
"content": "[whisper]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 623, | |
"content": "[mumble]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 624, | |
"content": "[sniff]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 625, | |
"content": "[sneeze]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 626, | |
"content": "[cough]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 627, | |
"content": "[snore]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 628, | |
"content": "[chew]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 629, | |
"content": "[sip]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 630, | |
"content": "[clear_throat]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 631, | |
"content": "[kiss]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 632, | |
"content": "[shhh]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 633, | |
"content": "[gibberish]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 634, | |
"content": "[fr]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 635, | |
"content": "[es]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 636, | |
"content": "[de]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 637, | |
"content": "[it]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 638, | |
"content": "[ipa]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 639, | |
"content": "[end_of_label]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 695, | |
"content": "[PLACEHOLDER55]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 696, | |
"content": "[PLACEHOLDER56]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 697, | |
"content": "[PLACEHOLDER57]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 698, | |
"content": "[PLACEHOLDER58]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 699, | |
"content": "[PLACEHOLDER59]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 700, | |
"content": "[PLACEHOLDER60]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 701, | |
"content": "[PLACEHOLDER61]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 702, | |
"content": "[PLACEHOLDER62]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 703, | |
"content": "[PLACEHOLDER63]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
} | |
], | |
"normalizer": null, | |
"pre_tokenizer": { | |
"type": "Whitespace" | |
}, | |
"post_processor": null, | |
"decoder": null, | |
"model": { | |
"type": "BPE", | |
"dropout": null, | |
"unk_token": "[UNK]", | |
"continuing_subword_prefix": null, | |
"end_of_word_suffix": null, | |
"fuse_unk": false, | |
"vocab": { | |
"[STOP]": 0, | |
"[UNK]": 1, | |
"[SPACE]": 2, | |
"!": 3, | |
"'": 4, | |
"(": 5, | |
")": 6, | |
",": 7, | |
"-": 8, | |
".": 9, | |
"/": 10, | |
":": 11, | |
";": 12, | |
"?": 13, | |
"a": 14, | |
"b": 15, | |
"c": 16, | |
"d": 17, | |
"e": 18, | |
"f": 19, | |
"g": 20, | |
"h": 21, | |
"i": 22, | |
"j": 23, | |
"k": 24, | |
"l": 25, | |
"m": 26, | |
"n": 27, | |
"o": 28, | |
"p": 29, | |
"q": 30, | |
"r": 31, | |
"s": 32, | |
"t": 33, | |
"u": 34, | |
"v": 35, | |
"w": 36, | |
"x": 37, | |
"y": 38, | |
"z": 39, | |
"th": 40, | |
"in": 41, | |
"the": 42, | |
"an": 43, | |
"er": 44, | |
"ou": 45, | |
"re": 46, | |
"on": 47, | |
"at": 48, | |
"ed": 49, | |
"en": 50, | |
"to": 51, | |
"ing": 52, | |
"and": 53, | |
"is": 54, | |
"as": 55, | |
"al": 56, | |
"or": 57, | |
"of": 58, | |
"ar": 59, | |
"it": 60, | |
"es": 61, | |
"he": 62, | |
"st": 63, | |
"le": 64, | |
"om": 65, | |
"se": 66, | |
"be": 67, | |
"ad": 68, | |
"ow": 69, | |
"ly": 70, | |
"ch": 71, | |
"wh": 72, | |
"that": 73, | |
"you": 74, | |
"li": 75, | |
"ve": 76, | |
"ac": 77, | |
"ti": 78, | |
"ld": 79, | |
"me": 80, | |
"was": 81, | |
"gh": 82, | |
"id": 83, | |
"ll": 84, | |
"wi": 85, | |
"ent": 86, | |
"for": 87, | |
"ay": 88, | |
"ro": 89, | |
"ver": 90, | |
"ic": 91, | |
"her": 92, | |
"ke": 93, | |
"his": 94, | |
"no": 95, | |
"ut": 96, | |
"un": 97, | |
"ir": 98, | |
"lo": 99, | |
"we": 100, | |
"ri": 101, | |
"ha": 102, | |
"with": 103, | |
"ght": 104, | |
"out": 105, | |
"im": 106, | |
"ion": 107, | |
"all": 108, | |
"ab": 109, | |
"one": 110, | |
"ne": 111, | |
"ge": 112, | |
"ould": 113, | |
"ter": 114, | |
"mo": 115, | |
"had": 116, | |
"ce": 117, | |
"she": 118, | |
"go": 119, | |
"sh": 120, | |
"ur": 121, | |
"am": 122, | |
"so": 123, | |
"pe": 124, | |
"my": 125, | |
"de": 126, | |
"are": 127, | |
"but": 128, | |
"ome": 129, | |
"fr": 130, | |
"ther": 131, | |
"fe": 132, | |
"su": 133, | |
"do": 134, | |
"con": 135, | |
"te": 136, | |
"ain": 137, | |
"ere": 138, | |
"po": 139, | |
"if": 140, | |
"they": 141, | |
"us": 142, | |
"ag": 143, | |
"tr": 144, | |
"now": 145, | |
"oun": 146, | |
"this": 147, | |
"have": 148, | |
"not": 149, | |
"sa": 150, | |
"il": 151, | |
"up": 152, | |
"thing": 153, | |
"from": 154, | |
"ap": 155, | |
"him": 156, | |
"ack": 157, | |
"ation": 158, | |
"ant": 159, | |
"our": 160, | |
"op": 161, | |
"like": 162, | |
"ust": 163, | |
"ess": 164, | |
"bo": 165, | |
"ok": 166, | |
"ul": 167, | |
"ind": 168, | |
"ex": 169, | |
"com": 170, | |
"some": 171, | |
"there": 172, | |
"ers": 173, | |
"co": 174, | |
"res": 175, | |
"man": 176, | |
"ard": 177, | |
"pl": 178, | |
"wor": 179, | |
"way": 180, | |
"tion": 181, | |
"fo": 182, | |
"ca": 183, | |
"were": 184, | |
"by": 185, | |
"ate": 186, | |
"pro": 187, | |
"ted": 188, | |
"ound": 189, | |
"own": 190, | |
"would": 191, | |
"ts": 192, | |
"what": 193, | |
"qu": 194, | |
"ally": 195, | |
"ight": 196, | |
"ck": 197, | |
"gr": 198, | |
"when": 199, | |
"ven": 200, | |
"can": 201, | |
"ough": 202, | |
"ine": 203, | |
"end": 204, | |
"per": 205, | |
"ous": 206, | |
"od": 207, | |
"ide": 208, | |
"know": 209, | |
"ty": 210, | |
"very": 211, | |
"si": 212, | |
"ak": 213, | |
"who": 214, | |
"about": 215, | |
"ill": 216, | |
"them": 217, | |
"est": 218, | |
"red": 219, | |
"ye": 220, | |
"could": 221, | |
"ong": 222, | |
"your": 223, | |
"their": 224, | |
"em": 225, | |
"just": 226, | |
"other": 227, | |
"into": 228, | |
"any": 229, | |
"whi": 230, | |
"um": 231, | |
"tw": 232, | |
"ast": 233, | |
"der": 234, | |
"did": 235, | |
"ie": 236, | |
"been": 237, | |
"ace": 238, | |
"ink": 239, | |
"ity": 240, | |
"back": 241, | |
"ting": 242, | |
"br": 243, | |
"more": 244, | |
"ake": 245, | |
"pp": 246, | |
"then": 247, | |
"sp": 248, | |
"el": 249, | |
"use": 250, | |
"bl": 251, | |
"said": 252, | |
"over": 253, | |
"get": 254, | |
"[START]": 255, | |
"\"": 256, | |
"#": 257, | |
"$": 258, | |
"%": 259, | |
"&": 260, | |
"*": 261, | |
"+": 262, | |
"0": 263, | |
"1": 264, | |
"2": 265, | |
"3": 266, | |
"4": 267, | |
"5": 268, | |
"6": 269, | |
"7": 270, | |
"8": 271, | |
"9": 272, | |
"<": 273, | |
"=": 274, | |
">": 275, | |
"@": 276, | |
"A": 277, | |
"B": 278, | |
"C": 279, | |
"D": 280, | |
"E": 281, | |
"F": 282, | |
"G": 283, | |
"H": 284, | |
"I": 285, | |
"J": 286, | |
"K": 287, | |
"L": 288, | |
"M": 289, | |
"N": 290, | |
"O": 291, | |
"P": 292, | |
"Q": 293, | |
"R": 294, | |
"S": 295, | |
"T": 296, | |
"U": 297, | |
"V": 298, | |
"W": 299, | |
"X": 300, | |
"Y": 301, | |
"Z": 302, | |
"[": 303, | |
"\\": 304, | |
"]": 305, | |
"^": 306, | |
"_": 307, | |
"`": 308, | |
"{": 309, | |
"|": 310, | |
"}": 311, | |
"~": 312, | |
"‐": 313, | |
"‑": 314, | |
"‒": 315, | |
"–": 316, | |
"—": 317, | |
"―": 318, | |
"‖": 319, | |
"‗": 320, | |
"‘": 321, | |
"’": 322, | |
"‚": 323, | |
"‛": 324, | |
"“": 325, | |
"”": 326, | |
"„": 327, | |
"‟": 328, | |
" ": 329, | |
"¡": 330, | |
"¢": 331, | |
"£": 332, | |
"¤": 333, | |
"¥": 334, | |
"¦": 335, | |
"§": 336, | |
"¨": 337, | |
"©": 338, | |
"ª": 339, | |
"«": 340, | |
"¬": 341, | |
"": 342, | |
"®": 343, | |
"¯": 344, | |
"°": 345, | |
"±": 346, | |
"²": 347, | |
"³": 348, | |
"´": 349, | |
"µ": 350, | |
"¶": 351, | |
"·": 352, | |
"¸": 353, | |
"¹": 354, | |
"º": 355, | |
"»": 356, | |
"¼": 357, | |
"½": 358, | |
"¾": 359, | |
"¿": 360, | |
"À": 361, | |
"Á": 362, | |
"Â": 363, | |
"Ã": 364, | |
"Ä": 365, | |
"Å": 366, | |
"Æ": 367, | |
"Ç": 368, | |
"È": 369, | |
"É": 370, | |
"Ê": 371, | |
"Ë": 372, | |
"Ì": 373, | |
"Í": 374, | |
"Î": 375, | |
"Ï": 376, | |
"Ð": 377, | |
"Ñ": 378, | |
"Ò": 379, | |
"Ó": 380, | |
"Ô": 381, | |
"Õ": 382, | |
"Ö": 383, | |
"×": 384, | |
"Ø": 385, | |
"Ù": 386, | |
"Ú": 387, | |
"Û": 388, | |
"Ü": 389, | |
"Ý": 390, | |
"Þ": 391, | |
"ß": 392, | |
"à": 393, | |
"á": 394, | |
"â": 395, | |
"ã": 396, | |
"ä": 397, | |
"å": 398, | |
"æ": 399, | |
"ç": 400, | |
"è": 401, | |
"é": 402, | |
"ê": 403, | |
"ë": 404, | |
"ì": 405, | |
"í": 406, | |
"î": 407, | |
"ï": 408, | |
"ð": 409, | |
"ñ": 410, | |
"ò": 411, | |
"ó": 412, | |
"ô": 413, | |
"õ": 414, | |
"ö": 415, | |
"÷": 416, | |
"ø": 417, | |
"ù": 418, | |
"ú": 419, | |
"û": 420, | |
"ü": 421, | |
"ý": 422, | |
"þ": 423, | |
"ÿ": 424, | |
"ɐ": 425, | |
"ɑ": 426, | |
"ɒ": 427, | |
"ɓ": 428, | |
"ɔ": 429, | |
"ɕ": 430, | |
"ɖ": 431, | |
"ɗ": 432, | |
"ɘ": 433, | |
"ə": 434, | |
"ɚ": 435, | |
"ɛ": 436, | |
"ɜ": 437, | |
"ɝ": 438, | |
"ɞ": 439, | |
"ɟ": 440, | |
"ɠ": 441, | |
"ɡ": 442, | |
"ɢ": 443, | |
"ɣ": 444, | |
"ɤ": 445, | |
"ɥ": 446, | |
"ɦ": 447, | |
"ɧ": 448, | |
"ɨ": 449, | |
"ɩ": 450, | |
"ɪ": 451, | |
"ɫ": 452, | |
"ɬ": 453, | |
"ɭ": 454, | |
"ɮ": 455, | |
"ɯ": 456, | |
"ɰ": 457, | |
"ɱ": 458, | |
"ɲ": 459, | |
"ɳ": 460, | |
"ɴ": 461, | |
"ɵ": 462, | |
"ɶ": 463, | |
"ɷ": 464, | |
"ɸ": 465, | |
"ɹ": 466, | |
"ɺ": 467, | |
"ɻ": 468, | |
"ɼ": 469, | |
"ɽ": 470, | |
"ɾ": 471, | |
"ɿ": 472, | |
"ʀ": 473, | |
"ʁ": 474, | |
"ʂ": 475, | |
"ʃ": 476, | |
"ʄ": 477, | |
"ʅ": 478, | |
"ʆ": 479, | |
"ʇ": 480, | |
"ʈ": 481, | |
"ʉ": 482, | |
"ʊ": 483, | |
"ʋ": 484, | |
"ʌ": 485, | |
"ʍ": 486, | |
"ʎ": 487, | |
"ʏ": 488, | |
"ʐ": 489, | |
"ʑ": 490, | |
"ʒ": 491, | |
"ʓ": 492, | |
"ʔ": 493, | |
"ʕ": 494, | |
"ʖ": 495, | |
"ʗ": 496, | |
"ʘ": 497, | |
"ʙ": 498, | |
"ʚ": 499, | |
"ʛ": 500, | |
"ʜ": 501, | |
"ʝ": 502, | |
"ʞ": 503, | |
"ʟ": 504, | |
"ʠ": 505, | |
"ʡ": 506, | |
"ʢ": 507, | |
"ʣ": 508, | |
"ʤ": 509, | |
"ʥ": 510, | |
"ʦ": 511, | |
"ʧ": 512, | |
"ʨ": 513, | |
"ʩ": 514, | |
"ʪ": 515, | |
"ʫ": 516, | |
"ʬ": 517, | |
"ʭ": 518, | |
"ʮ": 519, | |
"ʯ": 520, | |
"ʰ": 521, | |
"ʱ": 522, | |
"ʲ": 523, | |
"ʳ": 524, | |
"ʴ": 525, | |
"ʵ": 526, | |
"ʶ": 527, | |
"ʷ": 528, | |
"ʸ": 529, | |
"ʹ": 530, | |
"ʺ": 531, | |
"ʻ": 532, | |
"ʼ": 533, | |
"ʽ": 534, | |
"ʾ": 535, | |
"ʿ": 536, | |
"ˀ": 537, | |
"ˁ": 538, | |
"˂": 539, | |
"˃": 540, | |
"˄": 541, | |
"˅": 542, | |
"ˆ": 543, | |
"ˇ": 544, | |
"ˈ": 545, | |
"ˉ": 546, | |
"ˊ": 547, | |
"ˋ": 548, | |
"ˌ": 549, | |
"ˍ": 550, | |
"ˎ": 551, | |
"ˏ": 552, | |
"ː": 553, | |
"ˑ": 554, | |
"˒": 555, | |
"˓": 556, | |
"˔": 557, | |
"˕": 558, | |
"˖": 559, | |
"˗": 560, | |
"˘": 561, | |
"˙": 562, | |
"˚": 563, | |
"˛": 564, | |
"˜": 565, | |
"˝": 566, | |
"˞": 567, | |
"˟": 568, | |
"ˠ": 569, | |
"ˡ": 570, | |
"ˢ": 571, | |
"ˣ": 572, | |
"ˤ": 573, | |
"˥": 574, | |
"˦": 575, | |
"˧": 576, | |
"˨": 577, | |
"˩": 578, | |
"˪": 579, | |
"˫": 580, | |
"ˬ": 581, | |
"˭": 582, | |
"ˮ": 583, | |
"˯": 584, | |
"˰": 585, | |
"˱": 586, | |
"˲": 587, | |
"˳": 588, | |
"˴": 589, | |
"˵": 590, | |
"˶": 591, | |
"˷": 592, | |
"˸": 593, | |
"˹": 594, | |
"˺": 595, | |
"˻": 596, | |
"˼": 597, | |
"˽": 598, | |
"˾": 599, | |
"˿": 600, | |
"ā": 601, | |
"ō": 602, | |
"…": 603, | |
"[UH]": 604, | |
"[UM]": 605, | |
"[giggle]": 606, | |
"[laughter]": 607, | |
"[guffaw]": 608, | |
"[inhale]": 609, | |
"[exhale]": 610, | |
"[sigh]": 611, | |
"[cry]": 612, | |
"[bark]": 613, | |
"[howl]": 614, | |
"[meow]": 615, | |
"[singing]": 616, | |
"[music]": 617, | |
"[whistle]": 618, | |
"[humming]": 619, | |
"[gasp]": 620, | |
"[groan]": 621, | |
"[whisper]": 622, | |
"[mumble]": 623, | |
"[sniff]": 624, | |
"[sneeze]": 625, | |
"[cough]": 626, | |
"[snore]": 627, | |
"[chew]": 628, | |
"[sip]": 629, | |
"[clear_throat]": 630, | |
"[kiss]": 631, | |
"[shhh]": 632, | |
"[gibberish]": 633, | |
"[fr]": 634, | |
"[es]": 635, | |
"[de]": 636, | |
"[it]": 637, | |
"[ipa]": 638, | |
"[end_of_label]": 639, | |
"ŋ": 640, | |
"ᵻ": 641, | |
"θ": 642, | |
"̩": 643, | |
"\u0303": 644, | |
"ɑː": 645, | |
"iː": 646, | |
"uː": 647, | |
"ɜː": 648, | |
"ɔː": 649, | |
"oː": 650, | |
"eɪ": 651, | |
"oʊ": 652, | |
"aɪ": 653, | |
"aʊ": 654, | |
"ɔɪ": 655, | |
"dʒ": 656, | |
"tʃ": 657, | |
"ɪŋ": 658, | |
"ᵻd": 659, | |
"ˈiː": 660, | |
"ˌiː": 661, | |
"ˈɪ": 662, | |
"ˌɪ": 663, | |
"ˈeɪ": 664, | |
"ˌeɪ": 665, | |
"ˈɛ": 666, | |
"ˌɛ": 667, | |
"ˈæ": 668, | |
"ˌæ": 669, | |
"ˈɑː": 670, | |
"ˌɑː": 671, | |
"ˈɔː": 672, | |
"ˌɔː": 673, | |
"oːɹ": 674, | |
"ˈoːɹ": 675, | |
"ˌoːɹ": 676, | |
"ˈoʊ": 677, | |
"ˌoʊ": 678, | |
"ˈʊ": 679, | |
"ˌʊ": 680, | |
"ˈuː": 681, | |
"ˌuː": 682, | |
"ˈɜː": 683, | |
"ˌɜː": 684, | |
"ˈʌ": 685, | |
"ˌʌ": 686, | |
"ˈaɪ": 687, | |
"ˌaɪ": 688, | |
"ˈaʊ": 689, | |
"ˌaʊ": 690, | |
"ˈɔɪ": 691, | |
"ˌɔɪ": 692, | |
"ˈɚ": 693, | |
"ˌɐ": 694, | |
"[PLACEHOLDER55]": 695, | |
"[PLACEHOLDER56]": 696, | |
"[PLACEHOLDER57]": 697, | |
"[PLACEHOLDER58]": 698, | |
"[PLACEHOLDER59]": 699, | |
"[PLACEHOLDER60]": 700, | |
"[PLACEHOLDER61]": 701, | |
"[PLACEHOLDER62]": 702, | |
"[PLACEHOLDER63]": 703 | |
}, | |
"merges": [ | |
"t h", | |
"i n", | |
"th e", | |
"a n", | |
"e r", | |
"o u", | |
"r e", | |
"o n", | |
"a t", | |
"e d", | |
"e n", | |
"t o", | |
"in g", | |
"an d", | |
"i s", | |
"a s", | |
"a l", | |
"o r", | |
"o f", | |
"a r", | |
"i t", | |
"e s", | |
"h e", | |
"s t", | |
"l e", | |
"o m", | |
"s e", | |
"b e", | |
"a d", | |
"o w", | |
"l y", | |
"c h", | |
"w h", | |
"th at", | |
"y ou", | |
"l i", | |
"v e", | |
"a c", | |
"t i", | |
"l d", | |
"m e", | |
"w as", | |
"g h", | |
"i d", | |
"l l", | |
"w i", | |
"en t", | |
"f or", | |
"a y", | |
"r o", | |
"v er", | |
"i c", | |
"h er", | |
"k e", | |
"h is", | |
"n o", | |
"u t", | |
"u n", | |
"i r", | |
"l o", | |
"w e", | |
"r i", | |
"h a", | |
"wi th", | |
"gh t", | |
"ou t", | |
"i m", | |
"i on", | |
"al l", | |
"a b", | |
"on e", | |
"n e", | |
"g e", | |
"ou ld", | |
"t er", | |
"m o", | |
"h ad", | |
"c e", | |
"s he", | |
"g o", | |
"s h", | |
"u r", | |
"a m", | |
"s o", | |
"p e", | |
"m y", | |
"d e", | |
"a re", | |
"b ut", | |
"om e", | |
"f r", | |
"the r", | |
"f e", | |
"s u", | |
"d o", | |
"c on", | |
"t e", | |
"a in", | |
"er e", | |
"p o", | |
"i f", | |
"the y", | |
"u s", | |
"a g", | |
"t r", | |
"n ow", | |
"ou n", | |
"th is", | |
"ha ve", | |
"no t", | |
"s a", | |
"i l", | |
"u p", | |
"th ing", | |
"fr om", | |
"a p", | |
"h im", | |
"ac k", | |
"at ion", | |
"an t", | |
"ou r", | |
"o p", | |
"li ke", | |
"u st", | |
"es s", | |
"b o", | |
"o k", | |
"u l", | |
"in d", | |
"e x", | |
"c om", | |
"s ome", | |
"the re", | |
"er s", | |
"c o", | |
"re s", | |
"m an", | |
"ar d", | |
"p l", | |
"w or", | |
"w ay", | |
"ti on", | |
"f o", | |
"c a", | |
"w ere", | |
"b y", | |
"at e", | |
"p ro", | |
"t ed", | |
"oun d", | |
"ow n", | |
"w ould", | |
"t s", | |
"wh at", | |
"q u", | |
"al ly", | |
"i ght", | |
"c k", | |
"g r", | |
"wh en", | |
"v en", | |
"c an", | |
"ou gh", | |
"in e", | |
"en d", | |
"p er", | |
"ou s", | |
"o d", | |
"id e", | |
"k now", | |
"t y", | |
"ver y", | |
"s i", | |
"a k", | |
"wh o", | |
"ab out", | |
"i ll", | |
"the m", | |
"es t", | |
"re d", | |
"y e", | |
"c ould", | |
"on g", | |
"you r", | |
"the ir", | |
"e m", | |
"j ust", | |
"o ther", | |
"in to", | |
"an y", | |
"wh i", | |
"u m", | |
"t w", | |
"as t", | |
"d er", | |
"d id", | |
"i e", | |
"be en", | |
"ac e", | |
"in k", | |
"it y", | |
"b ack", | |
"t ing", | |
"b r", | |
"mo re", | |
"a ke", | |
"p p", | |
"the n", | |
"s p", | |
"e l", | |
"u se", | |
"b l", | |
"sa id", | |
"o ver", | |
"ge t", | |
"ɑ ː", | |
"i ː", | |
"u ː", | |
"ɜ ː", | |
"ɔ ː", | |
"o ː", | |
"e ɪ", | |
"o ʊ", | |
"a ɪ", | |
"a ʊ", | |
"ɔ ɪ", | |
"d ʒ", | |
"t ʃ", | |
"ɪ ŋ", | |
"ᵻ d", | |
"ˈ iː", | |
"ˌ iː", | |
"ˈ ɪ", | |
"ˌ ɪ", | |
"ˈ eɪ", | |
"ˌ eɪ", | |
"ˈ ɛ", | |
"ˌ ɛ", | |
"ˈ æ", | |
"ˌ æ", | |
"ˈ ɑː", | |
"ˌ ɑː", | |
"ˈ ɔː", | |
"ˌ ɔː", | |
"oː ɹ", | |
"ˈ oːɹ", | |
"ˌ oːɹ", | |
"ˈ oʊ", | |
"ˌ oʊ", | |
"ˈ ʊ", | |
"ˌ ʊ", | |
"ˈ uː", | |
"ˌ uː", | |
"ˈ ɜː", | |
"ˌ ɜː", | |
"ˈ ʌ", | |
"ˌ ʌ", | |
"ˈ aɪ", | |
"ˌ aɪ", | |
"ˈ aʊ", | |
"ˌ aʊ", | |
"ˈ ɔɪ", | |
"ˌ ɔɪ", | |
"ˈ ɚ", | |
"ˌ ɐ" | |
] | |
} | |
} |