chatglm-6b-int4 / ptuning /predict_multi_chat.py
NewBreaker's picture
first
23aa310
raw
history blame
3.56 kB
import os
import torch
from transformers import AutoConfig, AutoModel, AutoTokenizer
# 载入Tokenizer
model_path = "..\\models\\chatglm-6b-int4"
CHECKPOINT_PATH = '.\\output\\adgen-chatglm-6b-pt-128-2e-2\\checkpoint-1000'
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
# 如果需要加载的是新 Checkpoint(只包含 PrefixEncoder 参数):
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True, pre_seq_len=128)
model = AutoModel.from_pretrained(model_path, config=config, trust_remote_code=True)
prefix_state_dict = torch.load(os.path.join(CHECKPOINT_PATH, "pytorch_model.bin"))
new_prefix_state_dict = {}
for k, v in prefix_state_dict.items():
if k.startswith("transformer.prefix_encoder."):
new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v
model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
# 之后根据需求可以进行量化,也可以直接使用:
kernel_file = "{}\\quantization_kernels.so".format(model_path)
model = model.quantize(bits=4,kernel_file=kernel_file)
model = model.half().cuda()
model.transformer.prefix_encoder.float()
model = model.eval()
# response, history = model.chat(tokenizer, "你好呀", history=[])
# print("response:", response)
def parse_text(text):
lines = text.split("\n")
lines = [line for line in lines if line != ""]
count = 0
for i, line in enumerate(lines):
if "```" in line:
count += 1
items = line.split('`')
if count % 2 == 1:
lines[i] = f'<pre><code class="language-{items[-1]}">'
else:
lines[i] = f'<br></code></pre>'
else:
if i > 0:
if count % 2 == 1:
line = line.replace("`", "\`")
line = line.replace("<", "&lt;")
line = line.replace(">", "&gt;")
line = line.replace(" ", "&nbsp;")
line = line.replace("*", "&ast;")
line = line.replace("_", "&lowbar;")
line = line.replace("-", "&#45;")
line = line.replace(".", "&#46;")
line = line.replace("!", "&#33;")
line = line.replace("(", "&#40;")
line = line.replace(")", "&#41;")
line = line.replace("$", "&#36;")
lines[i] = "<br>"+line
text = "".join(lines)
return text
def predict(input, chatbot, max_length, top_p, temperature, history):
chatbot.append((parse_text(input), ""))
for response, history in model.stream_chat(tokenizer, input, history, max_length=max_length, top_p=top_p,
temperature=temperature):
chatbot[-1] = (parse_text(input), parse_text(response))
yield chatbot, history
response_new = ''
history = []
for i in range(3000):
length_history = len(history)
if (length_history > 5): # 如果对话长度太长,就把之前的遗忘掉
del history[0]
del history[0]
# print('\nYou:',end='')
print('\033[1;31m{}\033[0m'.format('\nYou:'),end='')
msg = input()
print('\033[1;34m{}\033[0m'.format('ChatGLM:'),end='')
for chatbot, history in predict(input=msg, chatbot=[], max_length=10000, top_p=0.5, temperature=0.5, history=history):
response_old = response_new
response_new = chatbot[0][1]
new_single = response_new.replace(response_old, '')
print(new_single,end='')