xu-song's picture
fix tiktoken
a6c67ec
raw
history blame
389 Bytes
"""
"""
import tiktoken
import tokenizer.tiktoken_patch
tokenizer = tiktoken.encoding_for_model('gpt-3.5-turbo')
tokenizer.vocab_size = tokenizer.n_vocab
tokenizer.comments = "tiktoken is a fast BPE tokeniser for use with OpenAI's models. There are 16 tokens KeyError"
tokenizer.reversible = True # It's reversible and lossless, so you can convert tokens back into the original text