Spaces:
Runtime error
Runtime error
File size: 464 Bytes
f23bcf0 1fc0c38 f23bcf0 1fc0c38 f23bcf0 1fc0c38 f23bcf0 1fc0c38 f23bcf0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
from tokenizers import Tokenizer
def bpe_tokenizer(smiles_string):
# Load the tokenizer from the saved file
tokenizer = Tokenizer.from_file("chembl_bpe_tokenizer.json")
# Tokenize the SMILES string
encoded_output = tokenizer.encode(smiles_string)
# To get the tokenized output as text
tokens_text = encoded_output.tokens
# To get the corresponding token IDs
token_ids = encoded_output.ids
return tokens_text, token_ids
|