{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": "<|endoftext|>", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "<|endoftext|>": 0, "\u0000": 1, "\u0001": 2, "\u0002": 3, "\u0003": 4, "\u0004": 5, "\u0005": 6, "\u0006": 7, "\u0007": 8, "\b": 9, "\t": 10, "\n": 11, "\u000b": 12, "\f": 13, "\r": 14, "\u000e": 15, "\u000f": 16, "\u0010": 17, "\u0011": 18, "\u0012": 19, "\u0013": 20, "\u0014": 21, "\u0015": 22, "\u0016": 23, "\u0017": 24, "\u0018": 25, "\u0019": 26, "\u001a": 27, "\u001b": 28, "\u001c": 29, "\u001d": 30, "\u001e": 31, "\u001f": 32, " ": 33, "!": 34, "\"": 35, "#": 36, "$": 37, "%": 38, "&": 39, "'": 40, "(": 41, ")": 42, "*": 43, "+": 44, ",": 45, "-": 46, ".": 47, "/": 48, "0": 49, "1": 50, "2": 51, "3": 52, "4": 53, "5": 54, "6": 55, "7": 56, "8": 57, "9": 58, ":": 59, ";": 60, "<": 61, "=": 62, ">": 63, "?": 64, "@": 65, "A": 66, "B": 67, "C": 68, "D": 69, "E": 70, "F": 71, "G": 72, "H": 73, "I": 74, "J": 75, "K": 76, "L": 77, "M": 78, "N": 79, "O": 80, "P": 81, "Q": 82, "R": 83, "S": 84, "T": 85, "U": 86, "V": 87, "W": 88, "X": 89, "Y": 90, "Z": 91, "[": 92, "\\": 93, "]": 94, "^": 95, "_": 96, "`": 97, "a": 98, "b": 99, "c": 100, "d": 101, "e": 102, "f": 103, "g": 104, "h": 105, "i": 106, "j": 107, "k": 108, "l": 109, "m": 110, "n": 111, "o": 112, "p": 113, "q": 114, "r": 115, "s": 116, "t": 117, "u": 118, "v": 119, "w": 120, "x": 121, "y": 122, "z": 123, "{": 124, "|": 125, "}": 126, "~": 127, "": 128, "€": 129, "": 130, "‚": 131, "ƒ": 132, "„": 133, "…": 134, "†": 135, "‡": 136, "ˆ": 137, "‰": 138, "Š": 139, "‹": 140, "Œ": 141, "": 142, "Ž": 143, "": 144, "": 145, "‘": 146, "’": 147, "“": 148, "”": 149, "•": 150, "–": 151, "—": 152, "˜": 153, "™": 154, "š": 155, "›": 156, "œ": 157, "": 158, "ž": 159, "Ÿ": 160, " ": 161, "¡": 162, "¢": 163, "£": 164, "¤": 165, "¥": 166, "¦": 167, "§": 168, "¨": 169, "©": 170, "ª": 171, "«": 172, "¬": 173, "­": 174, "®": 175, "¯": 176, "°": 177, "±": 178, "²": 179, "³": 180, "´": 181, "µ": 182, "¶": 183, "·": 184, "¸": 185, "¹": 186, "º": 187, "»": 188, "¼": 189, "½": 190, "¾": 191, "¿": 192, "À": 193, "Á": 194, "Â": 195, "Ã": 196, "Ä": 197, "Å": 198, "Æ": 199, "Ç": 200, "È": 201, "É": 202, "Ê": 203, "Ë": 204, "Ì": 205, "Í": 206, "Î": 207, "Ï": 208, "Ð": 209, "Ñ": 210, "Ò": 211, "Ó": 212, "Ô": 213, "Õ": 214, "Ö": 215, "×": 216, "Ø": 217, "Ù": 218, "Ú": 219, "Û": 220, "Ü": 221, "Ý": 222, "Þ": 223, "ß": 224, "à": 225, "á": 226, "â": 227, "ã": 228, "ä": 229, "å": 230, "æ": 231, "ç": 232, "è": 233, "é": 234, "ê": 235, "ë": 236, "ì": 237, "í": 238, "î": 239, "ï": 240, "ð": 241, "ñ": 242, "ò": 243, "ó": 244, "ô": 245, "õ": 246, "ö": 247, "÷": 248, "ø": 249, "ù": 250, "ú": 251, "û": 252, "ü": 253, "ý": 254, "þ": 255, "ÿ": 256, "Ċ": 257, "Ġ": 258, "Ġt": 259, "he": 260, "Ġa": 261, "Ġs": 262, "nd": 263, "Ġw": 264, "Ġthe": 265, "ed": 266, "Ġb": 267, "Ġto": 268, "Ġand": 269, "Ġh": 270, "Ġf": 271, "ĠT": 272, "in": 273, "Ġwa": 274, "re": 275, "it": 276, "ou": 277, "Ġl": 278, "Ġd": 279, "Ġc": 280, "Ġp": 281, "ay": 282, "Ġm": 283, "er": 284, "Ġwas": 285, "ĠThe": 286, "om": 287, "Ġhe": 288, "is": 289, "Ġn": 290, "ar": 291, "im": 292, "on": 293, "Ġsa": 294, "id": 295, "ll": 296, "Ġha": 297, "Ġg": 298, "at": 299, "ĠS": 300, "ing": 301, "ot": 302, "en": 303, "an": 304, "le": 305, "or": 306, "end": 307, "ir": 308, "of": 309, "am": 310, "et": 311, "ĠH": 312, "Ġit": 313, "Ġth": 314, "ig": 315, "ĠThey": 316, "Ġin": 317, "il": 318, "Ġpl": 319, "Ġ\"": 320, "ĠHe": 321, "ow": 322, "ri": 323, "ver": 324, "ut": 325, "Ġu": 326, "Ġbe": 327, "Ġplay": 328, "Ġsaid": 329, "ith": 330, "Ġday": 331, "Ġwith": 332, "pp": 333, "On": 334, "Ġy": 335, "oo": 336, "ked": 337, "Ġr": 338, "ex": 339, "Ġher": 340, "ce": 341, "ĠI": 342, "ĠTim": 343, "ĠShe": 344, "ld": 345, "Ġhis": 346, "Ġst": 347, "ke": 348, "Ġbig": 349, "nt": 350, "ck": 351, "very": 352, "Ġyou": 353, "st": 354, "ve": 355, "Ġhapp": 356, "un": 357, "Ġon": 358, "riend": 359, "Ġfriend": 360, "all": 361, "ily": 362, "ext": 363, "ĠL": 364, "Ġthey": 365, "oft": 366, "Ġwe": 367, "Ġhad": 368, "Ġnot": 369, "Ġli": 370, "Ġup": 371, "her": 372, "Ġwant": 373, "Ġof": 374, "itt": 375, "<|": 376, "|>": 377, "endoft": 378, "endoftext": 379, "ad": 380, "se": 381, "ĠB": 382, "Ġdo": 383 }, "merges": [ "Ġ t", "h e", "Ġ a", "Ġ s", "n d", "Ġ w", "Ġt he", "e d", "Ġ b", "Ġt o", "Ġa nd", "Ġ h", "Ġ f", "Ġ T", "i n", "Ġw a", "r e", "i t", "o u", "Ġ l", "Ġ d", "Ġ c", "Ġ p", "a y", "Ġ m", "e r", "Ġwa s", "ĠT he", "o m", "Ġ he", "i s", "Ġ n", "a r", "i m", "o n", "Ġs a", "i d", "l l", "Ġh a", "Ġ g", "a t", "Ġ S", "in g", "o t", "e n", "a n", "l e", "o r", "e nd", "i r", "o f", "a m", "e t", "Ġ H", "Ġ it", "Ġt h", "i g", "ĠThe y", "Ġ in", "i l", "Ġp l", "Ġ \"", "ĠH e", "o w", "r i", "v er", "u t", "Ġ u", "Ġb e", "Ġpl ay", "Ġsa id", "it h", "Ġd ay", "Ġw ith", "p p", "O n", "Ġ y", "o o", "k ed", "Ġ r", "e x", "Ġhe r", "c e", "Ġ I", "ĠT im", "ĠS he", "l d", "Ġh is", "Ġs t", "k e", "Ġb ig", "n t", "c k", "ver y", "Ġy ou", "s t", "v e", "Ġha pp", "u n", "Ġ on", "ri end", "Ġf riend", "a ll", "il y", "ex t", "Ġ L", "Ġthe y", "of t", "Ġw e", "Ġha d", "Ġn ot", "Ġl i", "Ġu p", "he r", "Ġwa nt", "Ġ of", "it t", "< |", "| >", "end oft", "endoft ext", "a d", "s e", "Ġ B", "Ġd o" ] } }