"""

"""

from vocab.gpt2 import tokenizer
# from transformers import GPT2Tokenizer
# # tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
# tokenizer = GPT2Tokenizer.from_pretrained("tokenizer")

print(tokenizer.bpe('中国'))

#

print(tokenizer.encode("Hello world"))  # 默认 add_prefix_space=False
print(tokenizer.encode("Hello world", add_prefix_space=True))
print(tokenizer.encode(" Hello world"))
print(tokenizer.encode("Hello world", add_special_tokens=True))  # add_special_tokens 没用


print(tokenizer.encode(text='中国\n', add_special_tokens=False))
#
# print(tokenizer.encode(text='中国', add_special_tokens=False))
#
# print(tokenizer.tokenize('I love Salah and  salad'))