{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Split", "pattern": { "Regex": "(\\[[^\\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\\(|\\)|\\.|=|#|-|\\+|\\\\\\\\|\\/|:|~|@|\\?|>>?|\\*|\\$|\\%[0-9]{2}|[0-9])" }, "behavior": "Isolated", "invert": false }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "": { "id": "", "ids": [ 2 ], "tokens": [ "" ] }, "": { "id": "", "ids": [ 3 ], "tokens": [ "" ] } } }, "decoder": { "type": "BPEDecoder", "suffix": "" }, "model": { "type": "WordLevel", "vocab": { "": 0, "": 1, "": 2, "": 3, "[C]": 4, "[Ring1]": 5, "[Branch1]": 6, "[N]": 7, "[=Branch1]": 8, "[=C]": 9, "[=O]": 10, "[O]": 11, "[Branch2]": 12, "[Ring2]": 13, "[C@H1]": 14, "[C@@H1]": 15, "[=N]": 16, "[S]": 17, "[F]": 18, "[#Branch1]": 19, "[=Branch2]": 20, "[#C]": 21, "[P]": 22, "[#Branch2]": 23, "[=Ring1]": 24, "[Cl]": 25, "[NH1]": 26, "[C@]": 27, "[C@@]": 28, "[Br]": 29, "[/C]": 30, "[#N]": 31, "[=Ring2]": 32, "[O-1]": 33, "[N+1]": 34, "[=N+1]": 35, "[I]": 36, "[=N-1]": 37, "[S@]": 38, "[=S]": 39, "[S@@]": 40, "[N-1]": 41, "[Si]": 42, "[/C@H1]": 43, "[/Cl]": 44, "[/C@@H1]": 45, "[S+1]": 46, "[=S@]": 47, "[=S@@]": 48, "[B]": 49, "[/Br]": 50, "[/S]": 51, "[P@]": 52, "[/F]": 53, "[P@@]": 54, "[N@]": 55, "[/N]": 56, "[/O]": 57, "[/N+1]": 58, "[N@@]": 59, "[=P]": 60, "[/I]": 61, "[B-1]": 62, "[NH1+1]": 63, "[N@@H1+1]": 64, "[NH2+1]": 65, "[N@H1+1]": 66, "[OH0]": 67, "[NH3+1]": 68, "[PH1]": 69, "[Si@]": 70, "[Si@@]": 71, "[/S@@]": 72, "[=NH1+1]": 73, "[N@+1]": 74, "[/S@]": 75, "[N@@+1]": 76, "[/P]": 77, "[Sn]": 78, "[=Se]": 79, ".": 80, "[Cl-1]": 81, "[#N+1]": 82, "[=NH2+1]": 83, "[/C@]": 84, "[C-1]": 85, "[=S+1]": 86, "[CH0]": 87, "[NH0]": 88, "[=P@@]": 89, "[S@@+1]": 90, "[=NH0]": 91, "[=P@]": 92, "[/C@@]": 93, "[/O-1]": 94, "[=O+1]": 95, "[Si@H1]": 96, "[/Si]": 97, "[=SH1]": 98, "[O+1]": 99, "[P+1]": 100, "[P@@H1]": 101, "[SH1]": 102, "[Si@@H1]": 103 }, "unk_token": "" } }