madcap's picture
add tokenizer
22914bc
{"ǹ": 0, "ộ": 1, "đ": 2, "~": 3, "̇": 4, "扬": 5, "þ": 6, "ī": 7, "ˢ": 8, "…": 9, "ę": 10, "̈": 11, "م": 12, "î": 13, "*": 14, "の": 15, "ζ": 16, "馆": 17, "a": 18, "ţ": 19, "い": 20, "̃": 21, "‹": 22, "é": 23, "̀": 24, "ị": 25, "ǎ": 26, "≥": 27, "ķ": 28, "ņ": 29, "ε": 30, "r": 31, "乃": 32, "ï": 33, "ꝑ": 34, "ℰ": 35, "–": 36, "n": 37, "∼": 38, "ầ": 39, "∞": 40, "ì": 41, "ą": 42, "û": 43, "v": 44, "ð": 45, "ω": 46, "ǔ": 47, "西": 48, "ј": 49, "생": 50, "ҫ": 51, "€": 52, "{": 53, "‐": 54, "æ": 55, "υ": 56, "â": 57, "ġ": 58, "ủ": 59, "е": 60, "t": 61, "ō": 62, "d": 63, "β": 64, "ö": 65, "−": 66, "ɨ": 67, "ǃ": 68, "ğ": 69, "ø": 70, "星": 71, "z": 72, "ů": 73, "ل": 74, "δ": 75, "ة": 76, "l": 77, "ı": 78, "ć": 79, "ゔ": 80, "ư": 81, "ぬ": 82, "ħ": 83, "ũ": 84, "ż": 85, "ι": 86, "ý": 87, "κ": 88, "'": 89, "ổ": 90, "g": 91, "š": 92, "ờ": 93, "ə": 94, "̐": 95, "/": 96, "ě": 97, "ʔ": 98, "青": 99, "文": 100, "í": 101, "ỳ": 102, "º": 103, "w": 104, "ë": 105, "а": 106, "ū": 107, "ʽ": 108, "(": 109, "®": 110, "ደ": 111, "c": 112, "f": 113, "ÿ": 114, "ቀ": 115, "ǀ": 116, "ù": 117, "먹": 118, "ă": 119, "μ": 120, "ወ": 121, "―": 122, "$": 123, "ồ": 124, "œ": 125, "ã": 126, "ċ": 127, "e": 128, "ʿ": 129, "ń": 130, "术": 131, "ē": 132, "ų": 133, "к": 135, "た": 136, "ç": 137, "つ": 138, "}": 139, "ś": 140, "ử": 141, "á": 142, "’": 143, "へ": 144, "m": 145, "ò": 146, "п": 147, ")": 148, "à": 149, "`": 150, "ʻ": 151, "ℵ": 152, "_": 153, "ơ": 154, "京": 155, "—": 156, "ひ": 157, "̂": 158, "ļ": 159, "«": 160, "ô": 161, "ο": 162, "̧": 163, "э": 164, "ḍ": 165, "ł": 166, "^": 167, "н": 168, "ʼ": 169, "ς": 170, "զ": 171, "å": 172, "ạ": 173, "北": 174, "ž": 175, "د": 176, "õ": 177, "̲": 178, "ź": 179, "ː": 180, "ề": 181, "貴": 182, "o": 183, "ṭ": 184, "µ": 185, "č": 186, "′": 187, "∨": 188, "ệ": 189, "ℕ": 190, "ℝ": 191, "ر": 192, "ş": 193, "̠": 194, "ñ": 195, "ắ": 196, "±": 197, "s": 198, "з": 199, "ř": 200, "ợ": 201, "ê": 202, "ė": 203, "─": 204, "ả": 205, "x": 206, "ș": 207, "ā": 208, "γ": 209, "ő": 210, "ب": 211, "집": 212, "ț": 213, "ĩ": 214, "ľ": 215, "う": 216, "甌": 217, "†": 218, "ن": 219, "½": 220, "∈": 221, "や": 222, "ʉ": 223, "·": 224, "y": 225, "b": 226, "&": 227, "„": 228, "ä": 229, "»": 230, "ا": 231, "ℂ": 232, "̱": 233, "ú": 234, "я": 235, "ጠ": 236, "기": 237, "ŏ": 238, "գ": 239, "°": 240, "i": 241, "ṇ": 242, "め": 243, "و": 244, "|": 134, "и": 246, "г": 247, "ψ": 248, "ǫ": 249, "р": 250, "ậ": 251, "η": 252, "=": 253, "☉": 254, "ó": 255, "美": 256, "ṅ": 257, "삼": 258, "p": 259, "k": 260, "ṣ": 261, "θ": 262, "j": 263, "ü": 264, "́": 265, "ν": 266, "ℤ": 267, "ό": 268, "ß": 269, "₽": 270, "∅": 271, "→": 272, "£": 273, "杜": 274, "×": 275, "ρ": 276, "ễ": 277, "고": 278, "ẵ": 279, "u": 280, "h": 281, "ň": 282, "ʾ": 283, "∆": 284, "і": 285, "ĺ": 286, "´": 287, "§": 288, "ď": 289, "ي": 290, "ť": 291, "ṯ": 292, "τ": 293, "›": 294, "ま": 295, "α": 296, "è": 297, "σ": 298, "q": 299, "π": 300, "м": 301, "[UNK]": 301, "[PAD]": 302}