SmilesPeTokenizer / SmilesPeTokenizer.py
saicharan2804
SmilesPE tokenizer
1fc0c38
raw
history blame
251 Bytes
import codecs
from SmilesPE.tokenizer import *
def smilespe_tokenizer(smiles_string):
spe_vob = codecs.open('chembl_smiles_tokenizer30000.txt')
spe = SPE_Tokenizer(spe_vob)
tokenized = spe.tokenize(smiles_string)
return tokenized