Spaces:
Running
Running
from transformers import AutoTokenizer, GPTNeoXJapaneseTokenizer | |
tokenizer = GPTNeoXJapaneseTokenizer.from_pretrained("tokenizer") | |
# tokenizer = AutoTokenizer.from_pretrained("abeja/gpt-neox-japanese-2.7b") | |
tokens = tokenizer.encode("人とAIが協調するためには http://baidu.com 🤣") | |
for token in tokens: | |
print(token, tokenizer.decode([token])) | |
tokens = tokenizer.tokenize("人とAIが協調するためには http://baidu.com 🤣", clean=True) | |
print(tokens) | |
# for token in tokens: | |
# print(token, tokenizer.decode([token])) | |