musiclang / tokenizer.json
floriangardin's picture
Upload tokenizer
77c4c3f
raw
history blame
11.8 kB
{
"version": "1.0",
"truncation": {
"direction": "Right",
"max_length": 128,
"strategy": "LongestFirst",
"stride": 0
},
"padding": null,
"added_tokens": [
{
"id": 461,
"content": "<|endoftext|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": true
},
{
"id": 462,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "ByteLevel",
"add_prefix_space": false,
"trim_offsets": true,
"use_regex": true
},
"post_processor": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": false,
"use_regex": true
},
"decoder": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": true,
"use_regex": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": null,
"continuing_subword_prefix": "",
"end_of_word_suffix": "",
"fuse_unk": false,
"byte_fallback": false,
"vocab": {
"<s>": 0,
"</s>": 1,
"<unk>": 2,
"<mask>": 3,
"!": 4,
"\"": 5,
"#": 6,
"$": 7,
"%": 8,
"&": 9,
"'": 10,
"(": 11,
")": 12,
"*": 13,
"+": 14,
",": 15,
"-": 16,
".": 17,
"/": 18,
"0": 19,
"1": 20,
"2": 21,
"3": 22,
"4": 23,
"5": 24,
"6": 25,
"7": 26,
"8": 27,
"9": 28,
":": 29,
";": 30,
"<": 31,
"=": 32,
">": 33,
"?": 34,
"@": 35,
"A": 36,
"B": 37,
"C": 38,
"D": 39,
"E": 40,
"F": 41,
"G": 42,
"H": 43,
"I": 44,
"J": 45,
"K": 46,
"L": 47,
"M": 48,
"N": 49,
"O": 50,
"P": 51,
"Q": 52,
"R": 53,
"S": 54,
"T": 55,
"U": 56,
"V": 57,
"W": 58,
"X": 59,
"Y": 60,
"Z": 61,
"[": 62,
"\\": 63,
"]": 64,
"^": 65,
"_": 66,
"`": 67,
"a": 68,
"b": 69,
"c": 70,
"d": 71,
"e": 72,
"f": 73,
"g": 74,
"h": 75,
"i": 76,
"j": 77,
"k": 78,
"l": 79,
"m": 80,
"n": 81,
"o": 82,
"p": 83,
"q": 84,
"r": 85,
"s": 86,
"t": 87,
"u": 88,
"v": 89,
"w": 90,
"x": 91,
"y": 92,
"z": 93,
"{": 94,
"|": 95,
"}": 96,
"~": 97,
"¡": 98,
"¢": 99,
"£": 100,
"¤": 101,
"¥": 102,
"¦": 103,
"§": 104,
"¨": 105,
"©": 106,
"ª": 107,
"«": 108,
"¬": 109,
"®": 110,
"¯": 111,
"°": 112,
"±": 113,
"²": 114,
"³": 115,
"´": 116,
"µ": 117,
"¶": 118,
"·": 119,
"¸": 120,
"¹": 121,
"º": 122,
"»": 123,
"¼": 124,
"½": 125,
"¾": 126,
"¿": 127,
"À": 128,
"Á": 129,
"Â": 130,
"Ã": 131,
"Ä": 132,
"Å": 133,
"Æ": 134,
"Ç": 135,
"È": 136,
"É": 137,
"Ê": 138,
"Ë": 139,
"Ì": 140,
"Í": 141,
"Î": 142,
"Ï": 143,
"Ð": 144,
"Ñ": 145,
"Ò": 146,
"Ó": 147,
"Ô": 148,
"Õ": 149,
"Ö": 150,
"×": 151,
"Ø": 152,
"Ù": 153,
"Ú": 154,
"Û": 155,
"Ü": 156,
"Ý": 157,
"Þ": 158,
"ß": 159,
"à": 160,
"á": 161,
"â": 162,
"ã": 163,
"ä": 164,
"å": 165,
"æ": 166,
"ç": 167,
"è": 168,
"é": 169,
"ê": 170,
"ë": 171,
"ì": 172,
"í": 173,
"î": 174,
"ï": 175,
"ð": 176,
"ñ": 177,
"ò": 178,
"ó": 179,
"ô": 180,
"õ": 181,
"ö": 182,
"÷": 183,
"ø": 184,
"ù": 185,
"ú": 186,
"û": 187,
"ü": 188,
"ý": 189,
"þ": 190,
"ÿ": 191,
"Ā": 192,
"ā": 193,
"Ă": 194,
"ă": 195,
"Ą": 196,
"ą": 197,
"Ć": 198,
"ć": 199,
"Ĉ": 200,
"ĉ": 201,
"Ċ": 202,
"ċ": 203,
"Č": 204,
"č": 205,
"Ď": 206,
"ď": 207,
"Đ": 208,
"đ": 209,
"Ē": 210,
"ē": 211,
"Ĕ": 212,
"ĕ": 213,
"Ė": 214,
"ė": 215,
"Ę": 216,
"ę": 217,
"Ě": 218,
"ě": 219,
"Ĝ": 220,
"ĝ": 221,
"Ğ": 222,
"ğ": 223,
"Ġ": 224,
"ġ": 225,
"Ģ": 226,
"ģ": 227,
"Ĥ": 228,
"ĥ": 229,
"Ħ": 230,
"ħ": 231,
"Ĩ": 232,
"ĩ": 233,
"Ī": 234,
"ī": 235,
"Ĭ": 236,
"ĭ": 237,
"Į": 238,
"į": 239,
"İ": 240,
"ı": 241,
"IJ": 242,
"ij": 243,
"Ĵ": 244,
"ĵ": 245,
"Ķ": 246,
"ķ": 247,
"ĸ": 248,
"Ĺ": 249,
"ĺ": 250,
"Ļ": 251,
"ļ": 252,
"Ľ": 253,
"ľ": 254,
"Ŀ": 255,
"ŀ": 256,
"Ł": 257,
"ł": 258,
"Ń": 259,
"Ġ+": 260,
"Ġs": 261,
").": 262,
"(-": 263,
"Ġr": 264,
"ĠĊ": 265,
"__": 266,
"an": 267,
"ian": 268,
"pian": 269,
"piano": 270,
"mp": 271,
"pp": 272,
"Ġh": 273,
"Ġl": 274,
")(": 275,
"Ġ%": 276,
")+": 277,
"II": 278,
"ed": 279,
"']": 280,
"['": 281,
"qd": 282,
"ĠV": 283,
"sd": 284,
"ac": 285,
"au": 286,
"en": 287,
"fr": 288,
"gm": 289,
"augm": 290,
"ent": 291,
"frac": 292,
"augment": 293,
"ĠII": 294,
"ĠI": 295,
"br": 296,
"gh": 297,
"igh": 298,
"brigh": 299,
"bright": 300,
"11": 301,
"10": 302,
"),": 303,
"Ġ2": 304,
"ĠVI": 305,
"ĠVII": 306,
"))": 307,
"ĠIII": 308,
"hd": 309,
")).": 310,
"ĠIV": 311,
"Ġ24": 312,
"VII": 313,
"64": 314,
"Ġ21": 315,
"IV": 316,
"ppp": 317,
"65": 318,
"))+": 319,
"wd": 320,
"43": 321,
"ff": 322,
"VI": 323,
"Ġ4": 324,
"56": 325,
"Ġ56": 326,
"Ġ40": 327,
"12": 328,
"III": 329,
"Ġ3": 330,
"13": 331,
"Ġ35": 332,
"15": 333,
"Ġ15": 334,
"14": 335,
"Ġ42": 336,
")),": 337,
"Ġ30": 338,
")']": 339,
"17": 340,
"(+": 341,
"(+)']": 342,
"{-": 343,
"}']": 344,
"fff": 345,
"19": 346,
"mm": 347,
"16": 348,
"23": 349,
")))+": 350,
"['(+)']": 351,
"Ġ8": 352,
"25": 353,
"Ġ16": 354,
"Ġ84": 355,
"20": 356,
"31": 357,
"21": 358,
"29": 359,
"18": 360,
"37": 361,
"Ġ168": 362,
"60": 363,
"Ġ60": 364,
"td": 365,
"53": 366,
"59": 367,
"35": 368,
"45": 369,
"Ġ12": 370,
"Ġ120": 371,
"47": 372,
"73": 373,
"70": 374,
"41": 375,
"Ġ70": 376,
"55": 377,
"39": 378,
"Ġ80": 379,
"33": 380,
"61": 381,
"83": 382,
"87": 383,
"26": 384,
"81": 385,
"67": 386,
"89": 387,
"77": 388,
"Ġ48": 389,
"57": 390,
"Ġ11": 391,
"Ġ112": 392,
"27": 393,
"101": 394,
"62": 395,
"79": 396,
"109": 397,
"22": 398,
"Ġ14": 399,
"Ġ140": 400,
"49": 401,
"71": 402,
"80": 403,
"95": 404,
"113": 405,
"103": 406,
"115": 407,
"Ġ280": 408,
"151": 409,
"143": 410,
"32": 411,
"69": 412,
"85": 413,
"Ġ240": 414,
"241": 415,
"40": 416,
"91": 417,
"97": 418,
"125": 419,
"137": 420,
"223": 421,
"229": 422,
"34": 423,
"48": 424,
"63": 425,
"92": 426,
"93": 427,
"Ġ10": 428,
"119": 429,
"121": 430,
"123": 431,
"127": 432,
"131": 433,
"173": 434,
"179": 435,
"Ġ840": 436,
"293": 437,
"Ġ105": 438,
"221": 439,
"289": 440,
"36": 441,
"365": 442,
"44": 443,
"46": 444,
"117": 445,
"Ġ210": 446,
")))": 447,
"129": 448,
"Ġ336": 449,
"133": 450,
"139": 451,
"157": 452,
"197": 453,
"199": 454,
"167": 455,
"169": 456,
"207": 457,
"181": 458,
"185": 459,
"187": 460
},
"merges": [
"Ġ +",
"Ġ s",
") .",
"( -",
"Ġ r",
"Ġ Ċ",
"_ _",
"a n",
"i an",
"p ian",
"pian o",
"m p",
"p p",
"Ġ h",
"Ġ l",
") (",
"Ġ %",
") +",
"I I",
"e d",
"' ]",
"[ '",
"q d",
"Ġ V",
"s d",
"a c",
"a u",
"e n",
"f r",
"g m",
"au gm",
"en t",
"fr ac",
"augm ent",
"Ġ II",
"Ġ I",
"b r",
"g h",
"i gh",
"br igh",
"brigh t",
"1 1",
"1 0",
") ,",
"Ġ 2",
"ĠV I",
"ĠV II",
") )",
"ĠII I",
"h d",
") ).",
"ĠI V",
"Ġ2 4",
"V II",
"6 4",
"Ġ2 1",
"I V",
"pp p",
"6 5",
") )+",
"w d",
"4 3",
"f f",
"V I",
"Ġ 4",
"5 6",
"Ġ 56",
"Ġ4 0",
"1 2",
"II I",
"Ġ 3",
"1 3",
"Ġ3 5",
"1 5",
"Ġ 15",
"1 4",
"Ġ4 2",
") ),",
"Ġ3 0",
") ']",
"1 7",
"( +",
"(+ )']",
"{ -",
"} ']",
"ff f",
"1 9",
"m m",
"1 6",
"2 3",
")) )+",
"[' (+)']",
"Ġ 8",
"2 5",
"Ġ 16",
"Ġ8 4",
"2 0",
"3 1",
"2 1",
"2 9",
"1 8",
"3 7",
"Ġ16 8",
"6 0",
"Ġ 60",
"t d",
"5 3",
"5 9",
"3 5",
"4 5",
"Ġ 12",
"Ġ12 0",
"4 7",
"7 3",
"7 0",
"4 1",
"Ġ 70",
"5 5",
"3 9",
"Ġ8 0",
"3 3",
"6 1",
"8 3",
"8 7",
"2 6",
"8 1",
"6 7",
"8 9",
"7 7",
"Ġ4 8",
"5 7",
"Ġ 11",
"Ġ11 2",
"2 7",
"10 1",
"6 2",
"7 9",
"10 9",
"2 2",
"Ġ 14",
"Ġ14 0",
"4 9",
"7 1",
"8 0",
"9 5",
"11 3",
"10 3",
"11 5",
"Ġ2 80",
"15 1",
"1 43",
"3 2",
"6 9",
"8 5",
"Ġ24 0",
"2 41",
"4 0",
"9 1",
"9 7",
"12 5",
"13 7",
"2 23",
"2 29",
"3 4",
"4 8",
"6 3",
"9 2",
"9 3",
"Ġ 10",
"11 9",
"12 1",
"12 3",
"12 7",
"13 1",
"17 3",
"17 9",
"Ġ84 0",
"29 3",
"Ġ10 5",
"2 21",
"2 89",
"3 6",
"3 65",
"4 4",
"4 6",
"11 7",
"Ġ2 10",
")) )",
"12 9",
"Ġ3 36",
"13 3",
"13 9",
"15 7",
"19 7",
"19 9",
"16 7",
"16 9",
"20 7",
"18 1",
"18 5",
"18 7"
]
}
}