{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[STOP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[SPACE]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Whitespace" }, "post_processor": null, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": "[UNK]", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "vocab": { "[STOP]": 0, "[UNK]": 1, "[SPACE]": 2, "-": 3, "1": 4, "2": 5, "3": 6, "4": 7, "5": 8, "a": 9, "b": 10, "c": 11, "d": 12, "e": 13, "f": 14, "g": 15, "h": 16, "i": 17, "j": 18, "k": 19, "l": 20, "m": 21, "n": 22, "o": 23, "p": 24, "q": 25, "r": 26, "s": 27, "t": 28, "u": 29, "v": 30, "w": 31, "x": 32, "y": 33, "z": 34, "“": 35, "”": 36, "…": 37, "、": 38, "。": 39, "!": 40, "(": 41, ")": 42, ",": 43, ":": 44, ";": 45, "?": 46, "an": 47, "i4": 48, "ng": 49, "sh": 50, "ang": 51, "ji": 52, "zh": 53, "u4": 54, "ao": 55, "e5": 56, "i1": 57, "i2": 58, "i3": 59, "u3": 60, "en": 61, "xi": 62, "an4": 63, "ong": 64, "de5": 65, "ch": 66, "e4": 67, "eng": 68, "an2": 69, "uo": 70, "an1": 71, "ing": 72, "u2": 73, "a1": 74, "ao4": 75, "an3": 76, "shi4": 77, "ou3": 78, "ai4": 79, "u1": 80, "e2": 81, "ang4": 82, "en2": 83, "ong1": 84, "a4": 85, "ang1": 86, "ao3": 87, "li": 88, "hu": 89, "you3": 90, "n1": 91, "ang3": 92, "yi1": 93, "shi2": 94, "ing2": 95, "ei4": 96, "yu": 97, "ai2": 98, "er": 99, "ang2": 100, "qi": 101, "eng2": 102, "e1": 103, "e3": 104, "ao1": 105, "di": 106, "wo": 107, "bu4": 108, "wo3": 109, "uo4": 110, "zai4": 111, "ai3": 112, "ou4": 113, "n4": 114, "eng1": 115, "ong4": 116, "me5": 117, "gu": 118, "en1": 119, "shen2": 120, "ui4": 121, "le5": 122, "ong2": 123, "ren2": 124, "a3": 125, "jing": 126, "ou1": 127, "gong1": 128, "uo2": 129, "yi4": 130, "er4": 131, "ei2": 132, "dao4": 133, "zhong1": 134, "ei3": 135, "eng4": 136, "he2": 137, "san1": 138, "xing": 139, "wei4": 140, "ji4": 141, "n2": 142, "uo1": 143, "ta1": 144, "ing4": 145, "bi": 146, "cheng2": 147, "qi1": 148, "wu3": 149, "en4": 150, "bai3": 151, "en3": 152, "shang4": 153, "zhe4": 154, "jia1": 155, "ou2": 156, "ge4": 157, "di4": 158, "yuan2": 159, "xu": 160, "da4": 161, "a2": 162, "li4": 163, "n3": 164, "ni3": 165, "ai1": 166, "xian4": 167, "uo3": 168, "yi3": 169, "ni": 170, "ti": 171, "xin1": 172, "jiu3": 173, "qu": 174, "ing1": 175, "hui4": 176, "si4": 177, "zhi1": 178, "ye4": 179, "li3": 180, "ji1": 181, "lai2": 182, "chu1": 183, "dian4": 184, "zhi4": 185, "guo2": 186, "ling2": 187, "dian3": 188, "er2": 189, "ba1": 190, "mi": 191, "yao4": 192, "jing1": 193, "ao2": 194, "si1": 195, "ong3": 196, "sheng1": 197, "liu4": 198, "shou3": 199, "zhu": 200, "dui4": 201, "jian4": 202, "men": 203, "men5": 204, "zheng4": 205, "yi2": 206, "nian2": 207, "ji2": 208, "ge1": 209, "pi": 210, "yi": 211, "neng2": 212, "zuo4": 213, "fa1": 214, "yu2": 215, "ju4": 216, "xing2": 217, "ye3": 218, "jiu4": 219, "bei4": 220, "jin4": 221, "xia4": 222, "ku": 223, "jin1": 224, "hou4": 225, "qi3": 226, "ju": 227, "xi1": 228, "wu4": 229, "ming2": 230, "ei1": 231, "yue4": 232, "chang3": 233, "hua4": 234, "chu": 235, "qian1": 236, "wan4": 237, "eng3": 238, "ing3": 239, "jiang1": 240, "qian2": 241, "xiang4": 242, "mei2": 243, "xiao3": 244, "fang2": 245, "shu": 246, "wei2": 247, "ke3": 248, "nan2": 249, "hai2": 250, "i5": 251, "kai1": 252, "dong4": 253, "zi5": 254 }, "merges": [ "a n", "i 4", "n g", "s h", "an g", "j i", "z h", "u 4", "a o", "e 5", "i 1", "i 2", "i 3", "u 3", "e n", "x i", "an 4", "o ng", "d e5", "c h", "e 4", "e ng", "an 2", "u o", "an 1", "i ng", "u 2", "a 1", "ao 4", "an 3", "sh i4", "o u3", "a i4", "u 1", "e 2", "ang 4", "en 2", "ong 1", "a 4", "ang 1", "ao 3", "l i", "h u", "y ou3", "n 1", "ang 3", "y i1", "sh i2", "ing 2", "e i4", "y u", "a i2", "e r", "ang 2", "q i", "eng 2", "e 1", "e 3", "ao 1", "d i", "w o", "b u4", "wo 3", "uo 4", "z ai4", "a i3", "o u4", "n 4", "eng 1", "ong 4", "m e5", "g u", "en 1", "sh en2", "u i4", "l e5", "ong 2", "r en2", "a 3", "ji ng", "o u1", "g ong1", "uo 2", "y i4", "er 4", "e i2", "d ao4", "zh ong1", "e i3", "eng 4", "h e2", "s an1", "xi ng", "w ei4", "j i4", "n 2", "uo 1", "t a1", "ing 4", "b i", "ch eng2", "q i1", "w u3", "en 4", "b ai3", "en 3", "sh ang4", "zh e4", "ji a1", "o u2", "g e4", "d i4", "yu an2", "x u", "d a4", "a 2", "l i4", "n 3", "n i3", "a i1", "xi an4", "uo 3", "y i3", "n i", "t i", "xi n1", "ji u3", "q u", "ing 1", "hu i4", "s i4", "zh i1", "y e4", "l i3", "ji 1", "l ai2", "ch u1", "di an4", "zh i4", "g uo2", "l ing2", "di an3", "er 2", "b a1", "m i", "y ao4", "jing 1", "ao 2", "s i1", "ong 3", "sh eng1", "li u4", "sh ou3", "zh u", "d ui4", "ji an4", "m en", "men 5", "zh eng4", "y i2", "ni an2", "ji 2", "g e1", "p i", "y i", "n eng2", "z uo4", "f a1", "y u2", "j u4", "xing 2", "y e3", "ji u4", "b ei4", "ji n4", "xi a4", "k u", "ji n1", "h ou4", "q i3", "j u", "x i1", "w u4", "m ing2", "e i1", "yu e4", "ch ang3", "hu a4", "ch u", "qi an1", "w an4", "eng 3", "ing 3", "ji ang1", "qi an2", "xi ang4", "m ei2", "xi ao3", "f ang2", "sh u", "w ei2", "k e3", "n an2", "h ai2", "i 5", "k ai1", "d ong4", "z i5" ] } }