File size: 1,277 Bytes
23aa310
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import os
import torch
from transformers import AutoConfig, AutoModel, AutoTokenizer

# 载入Tokenizer
model_path = "..\\models\\chatglm-6b-int4"
CHECKPOINT_PATH = '.\\output\\adgen-chatglm-6b-pt-128-2e-2\\checkpoint-100'

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

# 如果需要加载的是新 Checkpoint(只包含 PrefixEncoder 参数):
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True, pre_seq_len=128)
model = AutoModel.from_pretrained(model_path, config=config, trust_remote_code=True)
prefix_state_dict = torch.load(os.path.join(CHECKPOINT_PATH, "pytorch_model.bin"))
new_prefix_state_dict = {}
for k, v in prefix_state_dict.items():
    if k.startswith("transformer.prefix_encoder."):
        new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v
model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)

# 之后根据需求可以进行量化,也可以直接使用:
kernel_file = "{}\\quantization_kernels.so".format(model_path)
model = model.quantize(bits=4,kernel_file=kernel_file)

model = model.half().cuda()
model.transformer.prefix_encoder.float()

model = model.eval()

response, history = model.chat(tokenizer, "你好呀", history=[])
print("response:", response)