File size: 573 Bytes
751936e
 
 
 
 
814ee6b
751936e
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20

"""
默认采用:GLMGPT2Tokenizer
"""

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("THUDM/glm-10b", trust_remote_code=True)

tokens_id = [3856, 11030]
# tokens_id = [ 2484,   272, 20380]
tokens_id = [50259,    51, 12215, 33061,  2059,   318,  5140,   287, 50260,    13,
         50256, 50256, 50256, 50256, 50256, 50257,  3856, 50257,  2484,   272]

print(tokenizer.decode(tokens_id))

# ''
# decode_text = tokenizer.DecodeIds(tokens_id)
# print(decode_text)
for idx in tokens_id:
    print(idx, tokenizer.convert_ids_to_tokens(idx))