charizard / tokenizer.json
typeof's picture
init
8aa4c2e
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "</s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "UNKNOWN_0",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "UNKNOWN_1",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 5,
"content": "UNKNOWN_2",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 6,
"content": "UNKNOWN_3",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 7,
"content": "UNKNOWN_4",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 8,
"content": "UNKNOWN_5",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 9,
"content": "UNKNOWN_6",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 10,
"content": "UNKNOWN_7",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 11,
"content": "UNKNOWN_8",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 12,
"content": "UNKNOWN_9",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 13,
"content": "UNKNOWN_10",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 14,
"content": "UNKNOWN_11",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 15,
"content": "UNKNOWN_12",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 16,
"content": "UNKNOWN_13",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 17,
"content": "UNKNOWN_14",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 18,
"content": "UNKNOWN_15",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 19,
"content": "UNKNOWN_16",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 20,
"content": "UNKNOWN_17",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 21,
"content": "UNKNOWN_18",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 22,
"content": "UNKNOWN_19",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 23,
"content": "UNKNOWN_20",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 24,
"content": "UNKNOWN_21",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 25,
"content": "UNKNOWN_22",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 26,
"content": "UNKNOWN_23",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 27,
"content": "UNKNOWN_24",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 28,
"content": "UNKNOWN_25",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 29,
"content": "UNKNOWN_26",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 30,
"content": "UNKNOWN_27",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 31,
"content": "UNKNOWN_28",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 32,
"content": "UNKNOWN_29",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 33,
"content": "UNKNOWN_30",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 34,
"content": "UNKNOWN_31",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 35,
"content": "UNKNOWN_32",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 36,
"content": "UNKNOWN_33",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 37,
"content": "UNKNOWN_34",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 38,
"content": "UNKNOWN_35",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 39,
"content": "UNKNOWN_36",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 40,
"content": "UNKNOWN_37",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 41,
"content": "UNKNOWN_38",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 42,
"content": "UNKNOWN_39",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 43,
"content": "UNKNOWN_40",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 44,
"content": "UNKNOWN_41",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 45,
"content": "UNKNOWN_42",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 46,
"content": "UNKNOWN_43",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 47,
"content": "UNKNOWN_44",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 48,
"content": "UNKNOWN_45",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 49,
"content": "UNKNOWN_46",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 50,
"content": "UNKNOWN_47",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 51,
"content": "UNKNOWN_48",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 52,
"content": "UNKNOWN_49",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "NFKC"
},
"pre_tokenizer": {
"type": "Metaspace",
"replacement": "▁",
"prepend_scheme": "always",
"split": true
},
"post_processor": null,
"decoder": {
"type": "Metaspace",
"replacement": "▁",
"prepend_scheme": "always",
"split": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "<unk>",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {
"<unk>": 0,
"<s>": 1,
"</s>": 2,
"UNKNOWN_0": 3,
"UNKNOWN_1": 4,
"UNKNOWN_2": 5,
"UNKNOWN_3": 6,
"UNKNOWN_4": 7,
"UNKNOWN_5": 8,
"UNKNOWN_6": 9,
"UNKNOWN_7": 10,
"UNKNOWN_8": 11,
"UNKNOWN_9": 12,
"UNKNOWN_10": 13,
"UNKNOWN_11": 14,
"UNKNOWN_12": 15,
"UNKNOWN_13": 16,
"UNKNOWN_14": 17,
"UNKNOWN_15": 18,
"UNKNOWN_16": 19,
"UNKNOWN_17": 20,
"UNKNOWN_18": 21,
"UNKNOWN_19": 22,
"UNKNOWN_20": 23,
"UNKNOWN_21": 24,
"UNKNOWN_22": 25,
"UNKNOWN_23": 26,
"UNKNOWN_24": 27,
"UNKNOWN_25": 28,
"UNKNOWN_26": 29,
"UNKNOWN_27": 30,
"UNKNOWN_28": 31,
"UNKNOWN_29": 32,
"UNKNOWN_30": 33,
"UNKNOWN_31": 34,
"UNKNOWN_32": 35,
"UNKNOWN_33": 36,
"UNKNOWN_34": 37,
"UNKNOWN_35": 38,
"UNKNOWN_36": 39,
"UNKNOWN_37": 40,
"UNKNOWN_38": 41,
"UNKNOWN_39": 42,
"UNKNOWN_40": 43,
"UNKNOWN_41": 44,
"UNKNOWN_42": 45,
"UNKNOWN_43": 46,
"UNKNOWN_44": 47,
"UNKNOWN_45": 48,
"UNKNOWN_46": 49,
"UNKNOWN_47": 50,
"UNKNOWN_48": 51,
"UNKNOWN_49": 52,
"!": 53,
"\"": 54,
"#": 55,
"$": 56,
"%": 57,
"&": 58,
"'": 59,
"(": 60,
")": 61,
"*": 62,
"+": 63,
"-": 64,
".": 65,
"/": 66,
"0": 67,
"1": 68,
"2": 69,
"3": 70,
"4": 71,
"5": 72,
"6": 73,
"7": 74,
"8": 75,
"9": 76,
":": 77,
";": 78,
"<": 79,
"=": 80,
">": 81,
"?": 82,
"@": 83,
"A": 84,
"B": 85,
"C": 86,
"D": 87,
"E": 88,
"F": 89,
"G": 90,
"H": 91,
"I": 92,
"J": 93,
"K": 94,
"L": 95,
"M": 96,
"N": 97,
"O": 98,
"P": 99,
"Q": 100,
"R": 101,
"S": 102,
"T": 103,
"U": 104,
"V": 105,
"W": 106,
"X": 107,
"Y": 108,
"Z": 109,
"[": 110,
"\\": 111,
"]": 112,
"^": 113,
"_": 114,
"`": 115,
"a": 116,
"b": 117,
"c": 118,
"d": 119,
"e": 120,
"f": 121,
"g": 122,
"h": 123,
"i": 124,
"j": 125,
"k": 126,
"l": 127,
"m": 128,
"n": 129,
"o": 130,
"p": 131,
"q": 132,
"r": 133,
"s": 134,
"t": 135,
"u": 136,
"v": 137,
"w": 138,
"x": 139,
"y": 140,
"z": 141,
"{": 142,
"|": 143,
"}": 144,
"~": 145,
"¡": 146,
"¢": 147,
"£": 148,
"¤": 149,
"¥": 150,
"¦": 151,
"§": 152,
"©": 153,
"¬": 154,
"®": 155,
"°": 156,
"±": 157,
"¶": 158,
"·": 159,
"»": 160,
"¿": 161,
"À": 162,
"Á": 163,
"Â": 164,
"Ã": 165,
"Ä": 166,
"Å": 167,
"Æ": 168,
"Ç": 169,
"È": 170,
"É": 171,
"Ê": 172,
"Ë": 173,
"Ì": 174,
"Í": 175,
"Î": 176,
"Ï": 177,
"Ð": 178,
"Ñ": 179,
"Ò": 180,
"Ó": 181,
"Ô": 182,
"Õ": 183,
"Ö": 184,
"×": 185,
"Ø": 186,
"Ù": 187,
"Ú": 188,
"Û": 189,
"Ü": 190,
"Ý": 191,
"Þ": 192,
"ß": 193,
"à": 194,
"á": 195,
"â": 196,
"ã": 197,
"ä": 198,
"å": 199,
"æ": 200,
"ç": 201,
"è": 202,
"é": 203,
"ê": 204,
"ë": 205,
"ì": 206,
"í": 207,
"î": 208,
"ï": 209,
"ð": 210,
"ñ": 211,
"ò": 212,
"ó": 213,
"ô": 214,
"õ": 215,
"ö": 216,
"÷": 217,
"ø": 218,
"ù": 219,
"ú": 220,
"û": 221,
"ü": 222,
"ý": 223,
"þ": 224,
"ÿ": 225,
"Œ": 226,
"œ": 227,
"Š": 228,
"š": 229,
"Ÿ": 230,
"Ž": 231,
"ž": 232,
"ƒ": 233,
"ˆ": 234,
"́": 235,
"̃": 236,
"̄": 237,
"̈": 238,
"̧": 239,
"μ": 240,
"–": 241,
"—": 242,
"‚": 243,
"“": 244,
"”": 245,
"„": 246,
"†": 247,
"‡": 248,
"•": 249,
"‰": 250,
"‹": 251,
"›": 252,
"⁄": 253,
"€": 254,
"▁": 255
},
"merges": []
}
}