NewBreaker commited on
Commit
ae25e3a
1 Parent(s): 310cea3

add app.py int4 cpu model and stream show

Browse files
Files changed (2) hide show
  1. app.py +88 -23
  2. app_local.py +88 -20
app.py CHANGED
@@ -1,39 +1,104 @@
1
- from transformers import AutoTokenizer, AutoModel
2
  import gradio as gr
 
3
 
4
- # tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
5
- # model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
6
- # chatglm-6b-int4 cuda,本地可以运行成功
7
- # tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
8
- # model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
9
 
 
10
 
11
- # chatglm-6b-int4 CPU,
12
- tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
13
- model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").float()
14
 
15
 
 
 
 
 
 
 
 
 
 
16
 
17
- # chatglm-6b
18
- # kernel_file = "./models/chatglm-6b-int4/quantization_kernels.so"
19
- # tokenizer = AutoTokenizer.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="")
20
- # model = AutoModel.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
21
- # model = AutoModel.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="").float()
22
 
 
23
 
24
 
25
- # model = model.quantize(bits=model_args.quantization_bit, kernel_file=kernel_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- model = model.eval()
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
 
30
 
31
- def chat(msg):
32
- history = []
33
- response, history = model.chat(tokenizer, msg, history=history)
34
- print("response:", response)
35
- return response
36
 
 
37
 
38
- iface = gr.Interface(fn=chat, inputs="text", outputs="text")
39
- iface.launch()
 
1
+ from transformers import AutoModel, AutoTokenizer
2
  import gradio as gr
3
+ import mdtex2html
4
 
5
+ # tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
6
+ # model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
7
+ tokenizer = AutoTokenizer.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="")
8
+ model = AutoModel.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="").float()
 
9
 
10
+ model = model.eval()
11
 
12
+ """Override Chatbot.postprocess"""
 
 
13
 
14
 
15
+ def postprocess(self, y):
16
+ if y is None:
17
+ return []
18
+ for i, (message, response) in enumerate(y):
19
+ y[i] = (
20
+ None if message is None else mdtex2html.convert((message)),
21
+ None if response is None else mdtex2html.convert(response),
22
+ )
23
+ return y
24
 
 
 
 
 
 
25
 
26
+ gr.Chatbot.postprocess = postprocess
27
 
28
 
29
+ def parse_text(text):
30
+ """copy from https://github.com/GaiZhenbiao/ChuanhuChatGPT/"""
31
+ lines = text.split("\n")
32
+ lines = [line for line in lines if line != ""]
33
+ count = 0
34
+ for i, line in enumerate(lines):
35
+ if "```" in line:
36
+ count += 1
37
+ items = line.split('`')
38
+ if count % 2 == 1:
39
+ lines[i] = f'<pre><code class="language-{items[-1]}">'
40
+ else:
41
+ lines[i] = f'<br></code></pre>'
42
+ else:
43
+ if i > 0:
44
+ if count % 2 == 1:
45
+ line = line.replace("`", "\`")
46
+ line = line.replace("<", "&lt;")
47
+ line = line.replace(">", "&gt;")
48
+ line = line.replace(" ", "&nbsp;")
49
+ line = line.replace("*", "&ast;")
50
+ line = line.replace("_", "&lowbar;")
51
+ line = line.replace("-", "&#45;")
52
+ line = line.replace(".", "&#46;")
53
+ line = line.replace("!", "&#33;")
54
+ line = line.replace("(", "&#40;")
55
+ line = line.replace(")", "&#41;")
56
+ line = line.replace("$", "&#36;")
57
+ lines[i] = "<br>"+line
58
+ text = "".join(lines)
59
+ return text
60
 
 
61
 
62
+ def predict(input, chatbot, max_length, top_p, temperature, history):
63
+ chatbot.append((parse_text(input), ""))
64
+ for response, history in model.stream_chat(tokenizer, input, history, max_length=max_length, top_p=top_p,
65
+ temperature=temperature):
66
+ chatbot[-1] = (parse_text(input), parse_text(response))
67
+
68
+ yield chatbot, history
69
+
70
+
71
+ def reset_user_input():
72
+ return gr.update(value='')
73
+
74
+
75
+ def reset_state():
76
+ return [], []
77
+
78
+
79
+ with gr.Blocks() as demo:
80
+ gr.HTML("""<h1 align="center">ChatGLM</h1>""")
81
+
82
+ chatbot = gr.Chatbot()
83
+ with gr.Row():
84
+ with gr.Column(scale=4):
85
+ with gr.Column(scale=12):
86
+ user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10).style(
87
+ container=False)
88
+ with gr.Column(min_width=32, scale=1):
89
+ submitBtn = gr.Button("Submit", variant="primary")
90
+ with gr.Column(scale=1):
91
+ emptyBtn = gr.Button("Clear History")
92
+ max_length = gr.Slider(0, 4096, value=2048, step=1.0, label="Maximum length", interactive=True)
93
+ top_p = gr.Slider(0, 1, value=0.7, step=0.01, label="Top P", interactive=True)
94
+ temperature = gr.Slider(0, 1, value=0.95, step=0.01, label="Temperature", interactive=True)
95
 
96
+ history = gr.State([])
97
 
98
+ submitBtn.click(predict, [user_input, chatbot, max_length, top_p, temperature, history], [chatbot, history],
99
+ show_progress=True)
100
+ submitBtn.click(reset_user_input, [], [user_input])
 
 
101
 
102
+ emptyBtn.click(reset_state, outputs=[chatbot, history], show_progress=True)
103
 
104
+ demo.queue().launch(share=False, inbrowser=True)
 
app_local.py CHANGED
@@ -1,36 +1,104 @@
1
- from transformers import AutoTokenizer, AutoModel
2
  import gradio as gr
 
3
 
 
 
4
  tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
5
- model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half()
6
- # model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
7
 
 
8
 
9
- # from transformers import AutoTokenizer, AutoModel
10
- # tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
11
- # model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
12
- # model = model.eval()
13
 
14
 
15
- # kernel_file = "./models/chatglm-6b-int4/quantization_kernels.so"
16
- # tokenizer = AutoTokenizer.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="")
17
- # model = AutoModel.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
18
- # model = AutoModel.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="").float()
 
 
 
 
 
19
 
20
 
 
21
 
22
- # model = model.quantize(bits=model_args.quantization_bit, kernel_file=kernel_file)
23
 
24
- model = model.eval()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
 
27
 
28
- def chat(msg):
29
- history = []
30
- response, history = model.chat(tokenizer, msg, history=history)
31
- print("response:", response)
32
- return response
33
 
 
34
 
35
- iface = gr.Interface(fn=chat, inputs="text", outputs="text")
36
- iface.launch()
 
1
+ from transformers import AutoModel, AutoTokenizer
2
  import gradio as gr
3
+ import mdtex2html
4
 
5
+ # tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
6
+ # model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
7
  tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
8
+ model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
 
9
 
10
+ model = model.eval()
11
 
12
+ """Override Chatbot.postprocess"""
 
 
 
13
 
14
 
15
+ def postprocess(self, y):
16
+ if y is None:
17
+ return []
18
+ for i, (message, response) in enumerate(y):
19
+ y[i] = (
20
+ None if message is None else mdtex2html.convert((message)),
21
+ None if response is None else mdtex2html.convert(response),
22
+ )
23
+ return y
24
 
25
 
26
+ gr.Chatbot.postprocess = postprocess
27
 
 
28
 
29
+ def parse_text(text):
30
+ """copy from https://github.com/GaiZhenbiao/ChuanhuChatGPT/"""
31
+ lines = text.split("\n")
32
+ lines = [line for line in lines if line != ""]
33
+ count = 0
34
+ for i, line in enumerate(lines):
35
+ if "```" in line:
36
+ count += 1
37
+ items = line.split('`')
38
+ if count % 2 == 1:
39
+ lines[i] = f'<pre><code class="language-{items[-1]}">'
40
+ else:
41
+ lines[i] = f'<br></code></pre>'
42
+ else:
43
+ if i > 0:
44
+ if count % 2 == 1:
45
+ line = line.replace("`", "\`")
46
+ line = line.replace("<", "&lt;")
47
+ line = line.replace(">", "&gt;")
48
+ line = line.replace(" ", "&nbsp;")
49
+ line = line.replace("*", "&ast;")
50
+ line = line.replace("_", "&lowbar;")
51
+ line = line.replace("-", "&#45;")
52
+ line = line.replace(".", "&#46;")
53
+ line = line.replace("!", "&#33;")
54
+ line = line.replace("(", "&#40;")
55
+ line = line.replace(")", "&#41;")
56
+ line = line.replace("$", "&#36;")
57
+ lines[i] = "<br>"+line
58
+ text = "".join(lines)
59
+ return text
60
+
61
+
62
+ def predict(input, chatbot, max_length, top_p, temperature, history):
63
+ chatbot.append((parse_text(input), ""))
64
+ for response, history in model.stream_chat(tokenizer, input, history, max_length=max_length, top_p=top_p,
65
+ temperature=temperature):
66
+ chatbot[-1] = (parse_text(input), parse_text(response))
67
+
68
+ yield chatbot, history
69
+
70
+
71
+ def reset_user_input():
72
+ return gr.update(value='')
73
+
74
+
75
+ def reset_state():
76
+ return [], []
77
+
78
+
79
+ with gr.Blocks() as demo:
80
+ gr.HTML("""<h1 align="center">ChatGLM</h1>""")
81
 
82
+ chatbot = gr.Chatbot()
83
+ with gr.Row():
84
+ with gr.Column(scale=4):
85
+ with gr.Column(scale=12):
86
+ user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10).style(
87
+ container=False)
88
+ with gr.Column(min_width=32, scale=1):
89
+ submitBtn = gr.Button("Submit", variant="primary")
90
+ with gr.Column(scale=1):
91
+ emptyBtn = gr.Button("Clear History")
92
+ max_length = gr.Slider(0, 4096, value=2048, step=1.0, label="Maximum length", interactive=True)
93
+ top_p = gr.Slider(0, 1, value=0.7, step=0.01, label="Top P", interactive=True)
94
+ temperature = gr.Slider(0, 1, value=0.95, step=0.01, label="Temperature", interactive=True)
95
 
96
+ history = gr.State([])
97
 
98
+ submitBtn.click(predict, [user_input, chatbot, max_length, top_p, temperature, history], [chatbot, history],
99
+ show_progress=True)
100
+ submitBtn.click(reset_user_input, [], [user_input])
 
 
101
 
102
+ emptyBtn.click(reset_state, outputs=[chatbot, history], show_progress=True)
103
 
104
+ demo.queue().launch(share=False, inbrowser=True)