Spaces:
Running
Running
File size: 552 Bytes
751936e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
from transformers import AutoTokenizer, GPTNeoXJapaneseTokenizer
tokenizer = GPTNeoXJapaneseTokenizer.from_pretrained("tokenizer")
# tokenizer = AutoTokenizer.from_pretrained("abeja/gpt-neox-japanese-2.7b")
tokens = tokenizer.encode("人とAIが協調するためには http://baidu.com 🤣")
for token in tokens:
print(token, tokenizer.decode([token]))
tokens = tokenizer.tokenize("人とAIが協調するためには http://baidu.com 🤣", clean=True)
print(tokens)
# for token in tokens:
# print(token, tokenizer.decode([token]))
|