lihongze8 commited on
Commit
61d18a4
1 Parent(s): e760def

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -0
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+
5
+ # 模型名称(官方仓库)
6
+ model_name = "Qwen/Qwen2.5-0.5B-Instruct"
7
+
8
+ # 加载分词器
9
+ tokenizer = AutoTokenizer.from_pretrained(
10
+ model_name,
11
+ trust_remote_code=True
12
+ )
13
+
14
+ # 加载模型到 CPU
15
+ model = AutoModelForCausalLM.from_pretrained(
16
+ model_name,
17
+ trust_remote_code=True,
18
+ ).to("cpu") # 显式移至CPU
19
+
20
+ # 简易对话函数
21
+ def predict(query, history=None):
22
+ if history is None:
23
+ history = []
24
+
25
+ # 编码输入
26
+ inputs = tokenizer(query, return_tensors="pt")
27
+ # 放到CPU张量上
28
+ input_ids = inputs["input_ids"].to("cpu")
29
+ attention_mask = inputs["attention_mask"].to("cpu")
30
+
31
+ # 推理
32
+ with torch.no_grad():
33
+ output_ids = model.generate(
34
+ input_ids=input_ids,
35
+ attention_mask=attention_mask,
36
+ max_new_tokens=128,
37
+ do_sample=True,
38
+ top_p=0.9,
39
+ temperature=0.8
40
+ )
41
+
42
+ # 解码
43
+ output_text = tokenizer.decode(
44
+ output_ids[0][inputs["input_ids"].shape[1]:],
45
+ skip_special_tokens=True
46
+ )
47
+
48
+ # 更新对话历史
49
+ history.append((query, output_text))
50
+ return history, history
51
+
52
+ # 搭建 Gradio 界面
53
+ with gr.Blocks() as demo:
54
+ gr.Markdown("## Qwen2.5-0.5B-Instruct (CPU) 测试 Demo")
55
+ chatbot = gr.Chatbot(label="Qwen Chatbot")
56
+ msg = gr.Textbox(label="输入你的问题或对话")
57
+ state = gr.State([])
58
+
59
+ submit = gr.Button("发送")
60
+ submit.click(
61
+ fn=predict,
62
+ inputs=[msg, state],
63
+ outputs=[chatbot, state]
64
+ )
65
+
66
+ # 启动服务
67
+ demo.launch(server_name="0.0.0.0", server_port=7860)