Spaces:
Running
Running
""" | |
""" | |
from vocab.gpt2 import tokenizer | |
# from transformers import GPT2Tokenizer | |
# # tokenizer = GPT2Tokenizer.from_pretrained("gpt2") | |
# tokenizer = GPT2Tokenizer.from_pretrained("tokenizer") | |
print(tokenizer.bpe('中国')) | |
# | |
print(tokenizer.encode("Hello world")) # 默认 add_prefix_space=False | |
print(tokenizer.encode("Hello world", add_prefix_space=True)) | |
print(tokenizer.encode(" Hello world")) | |
print(tokenizer.encode("Hello world", add_special_tokens=True)) # add_special_tokens 没用 | |
print(tokenizer.encode(text='中国\n', add_special_tokens=False)) | |
# | |
# print(tokenizer.encode(text='中国', add_special_tokens=False)) | |
# | |
# print(tokenizer.tokenize('I love Salah and salad')) | |