Spaces:
Running
Running
File size: 573 Bytes
751936e 814ee6b 751936e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
"""
默认采用:GLMGPT2Tokenizer
"""
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("THUDM/glm-10b", trust_remote_code=True)
tokens_id = [3856, 11030]
# tokens_id = [ 2484, 272, 20380]
tokens_id = [50259, 51, 12215, 33061, 2059, 318, 5140, 287, 50260, 13,
50256, 50256, 50256, 50256, 50256, 50257, 3856, 50257, 2484, 272]
print(tokenizer.decode(tokens_id))
# ''
# decode_text = tokenizer.DecodeIds(tokens_id)
# print(decode_text)
for idx in tokens_id:
print(idx, tokenizer.convert_ids_to_tokens(idx)) |