{ "added_tokens_decoder": { "3": { "content": "[SEP]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "4": { "content": "[EOD]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "5": { "content": "[CLS]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "6": { "content": "[PAD]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "7": { "content": "[BOM]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "8": { "content": "[BOS]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "9": { "content": "[EOS]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "10": { "content": "[BOP]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "11": { "content": "[EOP]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "12": { "content": "[LAT]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "13": { "content": "[URL]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "14": { "content": "[KRK]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "15": { "content": "[NUM]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "16": { "content": "[UNK]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "17": { "content": "۰", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "18": { "content": "۱", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "19": { "content": "۲", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "20": { "content": "۳", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "21": { "content": "۴", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "22": { "content": "۵", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "23": { "content": "۶", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "24": { "content": "۷", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "25": { "content": "۸", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "26": { "content": "۹", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "28": { "content": "[EOL]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "25000": { "content": "[MSK]", "lstrip": true, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "25001": { "content": "[]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "25002": { "content": "[]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "25003": { "content": "[]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true } }, "additional_special_tokens": [ "[]", "[]", "[]" ], "bos_token": "[BOS]", "clean_up_tokenization_spaces": true, "cls_token": "[CLS]", "do_lower_case": false, "eos_token": "[EOS]", "keep_accents": true, "mask_token": "[MSK]", "model_max_length": 1000000000000000019884624838656, "normalizer": { "normalizers": [], "type": "Sequence" }, "pad_token": "[PAD]", "remove_space": true, "sep_token": "[SEP]", "tokenizer_class": "AlbertTokenizer", "unk_token": "[UNK]" }