calculator-8m / tokenizer.json
georgiyozhegov's picture
Upload tokenizer
1b605ab verified
raw
history blame
8.24 kB
{
"version": "1.0",
"truncation": {
"direction": "Right",
"max_length": 256,
"strategy": "LongestFirst",
"stride": 0
},
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<sos>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<eos>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "find",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 5,
"content": "step",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 6,
"content": "answer",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 128,
"content": "<bos>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Metaspace",
"replacement": "▁",
"prepend_scheme": "always",
"split": true
},
"post_processor": null,
"decoder": {
"type": "Metaspace",
"replacement": "▁",
"prepend_scheme": "always",
"split": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": null,
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {
"<sos>": 0,
"<eos>": 1,
"<pad>": 2,
"<unk>": 3,
"find": 4,
"step": 5,
"answer": 6,
"\n": 7,
"(": 8,
")": 9,
"*": 10,
"+": 11,
"-": 12,
".": 13,
"/": 14,
"0": 15,
"1": 16,
"2": 17,
"3": 18,
"4": 19,
"5": 20,
"6": 21,
"7": 22,
"8": 23,
"9": 24,
"=": 25,
"a": 26,
"d": 27,
"e": 28,
"f": 29,
"i": 30,
"n": 31,
"o": 32,
"p": 33,
"r": 34,
"s": 35,
"t": 36,
"w": 37,
"▁": 38,
"▁-": 39,
"\ns": 40,
"ep": 41,
"tep": 42,
"▁=": 43,
"\nstep": 44,
"0.": 45,
"▁1": 46,
"er": 47,
"▁/": 48,
"▁*": 49,
"\na": 50,
"fi": 51,
"nd": 52,
"ns": 53,
"wer": 54,
"▁fi": 55,
"\nans": 56,
"▁find": 57,
"\nanswer": 58,
"▁+": 59,
"▁-1": 60,
"▁2": 61,
"▁0.": 62,
"▁3": 63,
"▁4": 64,
"▁5": 65,
"▁6": 66,
"▁7": 67,
"▁8": 68,
"▁9": 69,
"▁-2": 70,
"▁-0.": 71,
"▁-3": 72,
"▁-4": 73,
"▁-5": 74,
"▁-6": 75,
"▁-7": 76,
".5": 77,
".2": 78,
".1": 79,
"▁-8": 80,
"▁-9": 81,
".3": 82,
".6": 83,
".8": 84,
".4": 85,
".7": 86,
".9": 87,
"33": 88,
"66": 89,
"5\nstep": 90,
"0\nstep": 91,
"2\nstep": 92,
"11": 93,
"4\nstep": 94,
"8\nstep": 95,
"6\nstep": 96,
"▁10": 97,
"25": 98,
"28": 99,
"14": 100,
"7\nstep": 101,
"99": 102,
"75": 103,
"3\nstep": 104,
"1\nstep": 105,
"▁(": 106,
"18": 107,
"29": 108,
"15": 109,
"44": 110,
"16": 111,
"19": 112,
"23": 113,
"78": 114,
"76": 115,
"13": 116,
"26": 117,
"24": 118,
"08": 119,
"79": 120,
"05": 121,
"09": 122,
"04": 123,
"74": 124,
"73": 125,
"06": 126,
"88": 127
},
"merges": [
[
"▁",
"-"
],
[
"\n",
"s"
],
[
"e",
"p"
],
[
"t",
"ep"
],
[
"▁",
"="
],
[
"\ns",
"tep"
],
[
"0",
"."
],
[
"▁",
"1"
],
[
"e",
"r"
],
[
"▁",
"/"
],
[
"▁",
"*"
],
[
"\n",
"a"
],
[
"f",
"i"
],
[
"n",
"d"
],
[
"n",
"s"
],
[
"w",
"er"
],
[
"▁",
"fi"
],
[
"\na",
"ns"
],
[
"▁fi",
"nd"
],
[
"\nans",
"wer"
],
[
"▁",
"+"
],
[
"▁-",
"1"
],
[
"▁",
"2"
],
[
"▁",
"0."
],
[
"▁",
"3"
],
[
"▁",
"4"
],
[
"▁",
"5"
],
[
"▁",
"6"
],
[
"▁",
"7"
],
[
"▁",
"8"
],
[
"▁",
"9"
],
[
"▁-",
"2"
],
[
"▁-",
"0."
],
[
"▁-",
"3"
],
[
"▁-",
"4"
],
[
"▁-",
"5"
],
[
"▁-",
"6"
],
[
"▁-",
"7"
],
[
".",
"5"
],
[
".",
"2"
],
[
".",
"1"
],
[
"▁-",
"8"
],
[
"▁-",
"9"
],
[
".",
"3"
],
[
".",
"6"
],
[
".",
"8"
],
[
".",
"4"
],
[
".",
"7"
],
[
".",
"9"
],
[
"3",
"3"
],
[
"6",
"6"
],
[
"5",
"\nstep"
],
[
"0",
"\nstep"
],
[
"2",
"\nstep"
],
[
"1",
"1"
],
[
"4",
"\nstep"
],
[
"8",
"\nstep"
],
[
"6",
"\nstep"
],
[
"▁1",
"0"
],
[
"2",
"5"
],
[
"2",
"8"
],
[
"1",
"4"
],
[
"7",
"\nstep"
],
[
"9",
"9"
],
[
"7",
"5"
],
[
"3",
"\nstep"
],
[
"1",
"\nstep"
],
[
"▁",
"("
],
[
"1",
"8"
],
[
"2",
"9"
],
[
"1",
"5"
],
[
"4",
"4"
],
[
"1",
"6"
],
[
"1",
"9"
],
[
"2",
"3"
],
[
"7",
"8"
],
[
"7",
"6"
],
[
"1",
"3"
],
[
"2",
"6"
],
[
"2",
"4"
],
[
"0",
"8"
],
[
"7",
"9"
],
[
"0",
"5"
],
[
"0",
"9"
],
[
"0",
"4"
],
[
"7",
"4"
],
[
"7",
"3"
],
[
"0",
"6"
],
[
"8",
"8"
]
]
}
}