{ "version": "1.0", "truncation": null, "padding": { "strategy": { "Fixed": 12 }, "direction": "Right", "pad_to_multiple_of": null, "pad_id": 1, "pad_type_id": 0, "pad_token": "" }, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Replace", "pattern": { "String": "X" }, "content": "S" }, "pre_tokenizer": { "type": "Split", "pattern": { "String": "-" }, "behavior": "Removed", "invert": false }, "post_processor": { "type": "RobertaProcessing", "sep": [ "", 2 ], "cls": [ "", 0 ], "trim_offsets": true, "add_prefix_space": false }, "decoder": { "type": "Fuse" }, "model": { "type": "WordLevel", "vocab": { "": 0, "": 1, "": 2, "": 3, "": 4, "E": 5, "S": 6, "R": 7, "A": 8, "K": 9, "L": 10, "N": 11, "T": 12, "O": 13, "P": 14, "M": 15, "^": 16, "U": 17, "B": 18, "D": 19, "SH": 20, "F": 21, "V": 22, "I": 23, "G": 24, "J": 25, "NT": 26, "'": 27, "TN": 28, "MN": 29, "TH": 30, "CH": 31, "EU": 32, "TM": 33, "DF": 34, "A&'": 35, "OE": 36, "SS": 37, "NG": 38, "NK": 39, "AU": 40, "TD": 41, "PNT": 42, "A&E": 43, "LD": 44, "MT": 45, "\\": 46 }, "unk_token": "" } }