xu-song's picture
update
9495a4f
raw
history blame contribute delete
282 Bytes
"""
## 词典扩容
32000 <pad>
32001 但
"""
from transformers import LlamaTokenizer
tokenizer = LlamaTokenizer.from_pretrained("ziqingyang/chinese-llama-2-7b")
tokenizer.comments = "重新设计了新词表(大小:55296),进一步提升了中文字词的覆盖程度"