import codecs from SmilesPE.tokenizer import * def smilespe_tokenizer(smiles_string): spe_vob = codecs.open('chembl_smiles_tokenizer30000.txt') spe = SPE_Tokenizer(spe_vob) tokenized = spe.tokenize(smiles_string) return tokenized