Spaces:
Running
Running
""" | |
gpt_35_turbo decode UnicodeDecodeError 99413 b' \xe6\xb5' | |
gpt_35_turbo decode UnicodeDecodeError 99742 b'\x8c\xa8' | |
gpt_35_turbo decode UnicodeDecodeError 99834 b'\xad\x90' | |
gpt_35_turbo decode UnicodeDecodeError 100112 b'\xe0\xae\xbf\xe0\xae' | |
gpt_35_turbo decode KeyError 100256 | |
gpt_35_turbo decode KeyError 100261 | |
gpt_35_turbo decode KeyError 100262 | |
gpt_35_turbo decode KeyError 100263 | |
""" | |
import json | |
import tiktoken | |
tokenizer = tiktoken.encoding_for_model('gpt-3.5-turbo') | |
tokens = [100263, 99834] | |
tokenizer.decode(tokens) | |
tokenizer.decode(tokens) | |
tokenizer._core_bpe.decode_bytes(tokens).decode("utf-8", errors="replace") | |
for token_id in [100263, 99834]: # special_tokens: 200257-100260 100276 | |
try: | |
tokenizer.decode_tokens_bytes([token_id]) | |
except: | |
pass | |
try: | |
tokenizer.decode_single_token_bytes(token_id) | |
except: | |
pass | |
try: | |
tokenizer.decode_bytes([token_id]) | |
except: | |
pass | |