|
{ |
|
"version": "1.0", |
|
"truncation": null, |
|
"padding": null, |
|
"added_tokens": [ |
|
{ |
|
"id": 0, |
|
"content": "<pad>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 1, |
|
"content": "<eos>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 2, |
|
"content": "<bos>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 3, |
|
"content": "<unk>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 4, |
|
"content": "<mask>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
} |
|
], |
|
"normalizer": null, |
|
"pre_tokenizer": { |
|
"type": "Sequence", |
|
"pretokenizers": [ |
|
{ |
|
"type": "Split", |
|
"pattern": { |
|
"Regex": "(\\[[^\\]]+]|<[^>]+>|Br?|Cl?|N|O|S|P|F|H|I|b|c|n|o|s|p|\\(|\\)|\\.|=|#|-|\\+|\\\\\\\\|\\u005C|/|:|~|@|\\?|\\*|\\$|\\%[0-9]{2}|[0-9])" |
|
}, |
|
"behavior": "Isolated", |
|
"invert": false |
|
}, |
|
{ |
|
"type": "Split", |
|
"pattern": { |
|
"Regex": "\\[|\\]|0|1|2|3|4|5|6|7|8|9|-|\\+|H|@" |
|
}, |
|
"behavior": "Isolated", |
|
"invert": false |
|
} |
|
] |
|
}, |
|
"post_processor": { |
|
"type": "TemplateProcessing", |
|
"single": [ |
|
{ |
|
"SpecialToken": { |
|
"id": "<bos>", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "A", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"SpecialToken": { |
|
"id": "<eos>", |
|
"type_id": 0 |
|
} |
|
} |
|
], |
|
"pair": [ |
|
{ |
|
"Sequence": { |
|
"id": "A", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "B", |
|
"type_id": 1 |
|
} |
|
} |
|
], |
|
"special_tokens": { |
|
"<bos>": { |
|
"id": "<bos>", |
|
"ids": [ |
|
2 |
|
], |
|
"tokens": [ |
|
"<bos>" |
|
] |
|
}, |
|
"<eos>": { |
|
"id": "<eos>", |
|
"ids": [ |
|
1 |
|
], |
|
"tokens": [ |
|
"<eos>" |
|
] |
|
}, |
|
"<pad>": { |
|
"id": "<pad>", |
|
"ids": [ |
|
0 |
|
], |
|
"tokens": [ |
|
"<pad>" |
|
] |
|
} |
|
} |
|
}, |
|
"decoder": { |
|
"type": "BPEDecoder", |
|
"suffix": "</w>" |
|
}, |
|
"model": { |
|
"type": "WordLevel", |
|
"vocab": { |
|
"<pad>": 0, |
|
"<eos>": 1, |
|
"<bos>": 2, |
|
"<unk>": 3, |
|
"<mask>": 4, |
|
"C": 5, |
|
"=": 6, |
|
"(": 7, |
|
")": 8, |
|
"O": 9, |
|
"N": 10, |
|
"1": 11, |
|
"2": 12, |
|
"3": 13, |
|
"4": 14, |
|
"F": 15, |
|
"S": 16, |
|
"<BASE>": 17, |
|
"<SYST>": 18, |
|
"<TRAD>": 19, |
|
"5": 20, |
|
"Cl": 21, |
|
"[": 22, |
|
"]": 23, |
|
".": 24, |
|
"6": 25, |
|
"7": 26, |
|
"-": 27, |
|
"+": 28, |
|
"#": 29, |
|
"Br": 30, |
|
"8": 31, |
|
"9": 32, |
|
"P": 33, |
|
"H": 34, |
|
"I": 35, |
|
"Si": 36, |
|
"B": 37, |
|
"Na": 38, |
|
"Y": 39, |
|
"Ir": 40, |
|
"Pt": 41, |
|
"K": 42, |
|
"Se": 43, |
|
"Li": 44, |
|
"W": 45, |
|
"Sn": 46, |
|
"V": 47, |
|
"Zr": 48, |
|
"Zn": 49, |
|
"Cu": 50, |
|
"Fe": 51, |
|
"%": 52, |
|
"Ti": 53, |
|
"Pd": 54, |
|
"Co": 55, |
|
"Mg": 56, |
|
"Al": 57, |
|
"Ni": 58, |
|
"Ge": 59, |
|
"Ru": 60, |
|
"Ca": 61, |
|
"U": 62, |
|
"Mn": 63, |
|
"Cr": 64, |
|
"Au": 65, |
|
"Ag": 66, |
|
"As": 67, |
|
"Te": 68, |
|
"Mo": 69, |
|
"Ac": 70, |
|
"0": 71, |
|
"Tb": 72, |
|
"f": 73, |
|
"Rh": 74, |
|
"g": 75, |
|
"Cs": 76, |
|
"Rf": 77, |
|
"Ar": 78, |
|
"Sb": 79, |
|
"Rb": 80, |
|
"Ba": 81, |
|
"Os": 82, |
|
"Re": 83, |
|
"Gd": 84, |
|
"Cd": 85, |
|
"Bi": 86, |
|
"Pb": 87, |
|
"In": 88, |
|
"Ga": 89, |
|
"Ce": 90, |
|
"La": 91, |
|
"Eu": 92, |
|
"Tl": 93, |
|
"Tc": 94, |
|
"Nb": 95, |
|
"Sr": 96, |
|
"Ta": 97, |
|
"Nd": 98, |
|
"Pr": 99, |
|
"Yb": 100, |
|
"Sm": 101, |
|
"Be": 102, |
|
"Sc": 103, |
|
"Dy": 104, |
|
"Lu": 105, |
|
"Fm": 106, |
|
"Er": 107, |
|
"Th": 108, |
|
"o": 109, |
|
"Tm": 110, |
|
"At": 111, |
|
"No": 112, |
|
"Po": 113, |
|
"Cm": 114, |
|
"Sg": 115, |
|
"Xe": 116, |
|
"Np": 117, |
|
"Lr": 118, |
|
"Pu": 119, |
|
"Pm": 120, |
|
"Cf": 121, |
|
"e": 122, |
|
"Am": 123, |
|
"Es": 124, |
|
"Pa": 125, |
|
"Ne": 126, |
|
"Bk": 127, |
|
"Db": 128, |
|
"Mt": 129, |
|
"Kr": 130, |
|
"Rn": 131, |
|
"s": 132, |
|
"Bh": 133, |
|
"Md": 134, |
|
"Ra": 135, |
|
"Fr": 136, |
|
"@": 137, |
|
"\u005C": 138, |
|
"/": 139 |
|
}, |
|
"unk_token": "<unk>" |
|
} |
|
} |