Spaces:

saicharan2804
/

BpeTokenizer

Runtime error

File size: 464 Bytes

from tokenizers import Tokenizer

def bpe_tokenizer(smiles_string):
    # Load the tokenizer from the saved file
    tokenizer = Tokenizer.from_file("chembl_bpe_tokenizer.json")

    # Tokenize the SMILES string
    encoded_output = tokenizer.encode(smiles_string)

    # To get the tokenized output as text
    tokens_text = encoded_output.tokens

    # To get the corresponding token IDs
    token_ids = encoded_output.ids
    
    return tokens_text, token_ids