xu song commited on
Commit
5799733
·
1 Parent(s): 21c9a7d
Files changed (4) hide show
  1. app.py +27 -151
  2. app_util.py +102 -0
  3. models/cpp_qwen2.py +5 -15
  4. log_util.py → utils/logging_util.py +0 -0
app.py CHANGED
@@ -6,140 +6,21 @@
6
 
7
 
8
 
9
- # 单卡报错
10
- python moss_web_demo_gradio.py --model_name fnlp/moss-moon-003-sft --gpu 0,1,2,3
11
 
12
- # TODO
13
  - 第一句:
14
  - 代码和表格的预览
 
15
  - 可编辑chatbot:https://github.com/gradio-app/gradio/issues/4444
16
  - 一个button,
17
 
 
18
  ## Reference
19
 
20
- -
21
  """
22
 
23
-
24
- import gradio as gr
25
- # from models.hf_qwen2 import bot
26
- from models.cpp_qwen2 import bot
27
-
28
-
29
- #
30
- # def postprocess(self, y):
31
- # if y is None:
32
- # return []
33
- # for i, (message, response) in enumerate(y):
34
- # y[i] = (
35
- # None if message is None else mdtex2html.convert((message)),
36
- # None if response is None else mdtex2html.convert(response),
37
- # )
38
- # return y
39
- #
40
- #
41
- # gr.Chatbot.postprocess = postprocess
42
-
43
-
44
- def parse_text(text):
45
- """copy from https://github.com/GaiZhenbiao/ChuanhuChatGPT/"""
46
- lines = text.split("\n")
47
- lines = [line for line in lines if line != ""]
48
- count = 0
49
- for i, line in enumerate(lines):
50
- if "```" in line:
51
- count += 1
52
- items = line.split('`')
53
- if count % 2 == 1:
54
- lines[i] = f'<pre><code class="language-{items[-1]}">'
55
- else:
56
- lines[i] = f'<br></code></pre>'
57
- else:
58
- if i > 0:
59
- if count % 2 == 1:
60
- line = line.replace("`", "\`")
61
- line = line.replace("<", "&lt;")
62
- line = line.replace(">", "&gt;")
63
- line = line.replace(" ", "&nbsp;")
64
- line = line.replace("*", "&ast;")
65
- line = line.replace("_", "&lowbar;")
66
- line = line.replace("-", "&#45;")
67
- line = line.replace(".", "&#46;")
68
- line = line.replace("!", "&#33;")
69
- line = line.replace("(", "&#40;")
70
- line = line.replace(")", "&#41;")
71
- line = line.replace("$", "&#36;")
72
- lines[i] = "<br>" + line
73
- text = "".join(lines)
74
- return text
75
-
76
-
77
- def generate_query(chatbot, history):
78
- if history and history[-1]["role"] == "user": # 该生成response了
79
- gr.Warning('You should generate assistant-response.')
80
- yield None, chatbot, history
81
- else:
82
- chatbot.append(None)
83
- streamer = bot.generate_query(history, stream=True)
84
- for query in streamer:
85
- chatbot[-1] = (query, None)
86
- yield query, chatbot, history
87
-
88
- history.append({"role": "user", "content": query})
89
- yield query, chatbot, history
90
-
91
-
92
- def generate_response(query, chatbot, history):
93
- """
94
- 自动模式下:query is None
95
- 人工模式下:query 是用户输入
96
- :param query:
97
- :param chatbot:
98
- :param history:
99
- :return:
100
- """
101
- if query and history[-1]["role"] != "user":
102
- history.append({"role": "user", "content": query})
103
- query = history[-1]["content"]
104
-
105
- if history[-1]["role"] != "user":
106
- gr.Warning('You should generate or type user-input first.')
107
- yield chatbot, history
108
- else:
109
- streamer = bot.generate_response(history, stream=True)
110
- for response in streamer:
111
- chatbot[-1] = (query, response)
112
- yield chatbot, history
113
-
114
- history.append({"role": "assistant", "content": response})
115
- print(f"chatbot is {chatbot}")
116
- print(f"history is {history}")
117
- yield chatbot, history
118
-
119
-
120
- def generate():
121
- """
122
-
123
- :return:
124
- """
125
- pass
126
-
127
-
128
- def regenerate():
129
- """
130
- 删除上一轮,重新生成。
131
- :return:
132
- """
133
- pass
134
-
135
-
136
- def reset_user_input():
137
- return gr.update(value='')
138
-
139
-
140
- def reset_state(system):
141
- return [], [{"role": "system", "content": system}]
142
-
143
 
144
  system_list = [
145
  "You are a helpful assistant.",
@@ -165,55 +46,50 @@ with gr.Blocks() as demo:
165
  chatbot = gr.Chatbot(avatar_images=("assets/man.png", "assets/bot.png"))
166
  with gr.Row():
167
  with gr.Column(scale=4):
168
- user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10)
169
  with gr.Row():
170
- generate_query_btn = gr.Button("生成问题")
171
- regen_btn = gr.Button("🤔️ Regenerate (重试)")
172
- submit_btn = gr.Button("生成回复", variant="primary")
 
173
  stop_btn = gr.Button("停止生成", variant="primary")
174
- empty_btn = gr.Button("🧹 Clear History (清除历史)")
175
  with gr.Column(scale=1):
176
  # generate_query_btn = gr.Button("Generate First Query")
177
-
178
- clear_btn = gr.Button("重置")
179
  gr.Dropdown(
180
  ["moss", "chatglm-2", "chatpdf"],
181
  value="moss",
182
  label="问题生成器",
183
  # info="Will add more animals later!"
184
- ),
185
  gr.Dropdown(
186
  ["moss", "chatglm-2", "gpt3.5-turbo"],
187
  value="gpt3.5-turbo",
188
  label="回复生成器",
189
  # info="Will add more animals later!"
190
- ),
191
-
 
 
 
 
 
 
 
 
 
 
 
 
192
  history = gr.State([{"role": "system", "content": system_list[0]}])
193
-
194
  system.change(reset_state, inputs=[system], outputs=[chatbot, history], show_progress="full")
195
-
196
- submit_btn.click(generate_response, [user_input, chatbot, history], [chatbot, history],
197
- show_progress="full")
198
  # submit_btn.click(reset_user_input, [], [user_input])
199
 
200
  clear_btn.click(reset_state, inputs=[system], outputs=[chatbot, history], show_progress="full")
201
 
202
- generate_query_btn.click(generate_query, [chatbot, history], outputs=[user_input, chatbot, history],
203
  show_progress="full")
204
 
205
- # generate_query_btn.
206
-
207
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
208
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature",
209
- info="Larger temperature increase the randomness"),
210
- gr.Slider(
211
- minimum=0.1,
212
- maximum=1.0,
213
- value=0.95,
214
- step=0.05,
215
- label="Top-p (nucleus sampling)",
216
- ),
217
 
218
  demo.queue().launch(share=False, server_name="0.0.0.0")
219
  # demo.queue().launch(share=True)
 
6
 
7
 
8
 
9
+ ## TODO
 
10
 
 
11
  - 第一句:
12
  - 代码和表格的预览
13
+ - markdown解析:mdtex2html
14
  - 可编辑chatbot:https://github.com/gradio-app/gradio/issues/4444
15
  - 一个button,
16
 
17
+
18
  ## Reference
19
 
20
+ - https://github.com/GaiZhenbiao/ChuanhuChatGPT/
21
  """
22
 
23
+ from app_util import *
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  system_list = [
26
  "You are a helpful assistant.",
 
46
  chatbot = gr.Chatbot(avatar_images=("assets/man.png", "assets/bot.png"))
47
  with gr.Row():
48
  with gr.Column(scale=4):
49
+ # user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10)
50
  with gr.Row():
51
+ generate_btn = gr.Button("🤔️ Generate")
52
+ retry_btn = gr.Button("🔄 Regenerate")
53
+ undo_btn = gr.Button("↩️ Undo")
54
+ clear_btn = gr.Button("🗑️ Clear") # 🧹 Clear History (清除历史)
55
  stop_btn = gr.Button("停止生成", variant="primary")
 
56
  with gr.Column(scale=1):
57
  # generate_query_btn = gr.Button("Generate First Query")
 
 
58
  gr.Dropdown(
59
  ["moss", "chatglm-2", "chatpdf"],
60
  value="moss",
61
  label="问题生成器",
62
  # info="Will add more animals later!"
63
+ )
64
  gr.Dropdown(
65
  ["moss", "chatglm-2", "gpt3.5-turbo"],
66
  value="gpt3.5-turbo",
67
  label="回复生成器",
68
  # info="Will add more animals later!"
69
+ )
70
+
71
+ slider_max_new_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
72
+ slider_temperature = gr.Slider(minimum=0.1, maximum=10.0, value=5, step=0.1, label="Temperature",
73
+ info="Larger temperature increase the randomness")
74
+ slider_top_p = gr.Slider(
75
+ minimum=0.1,
76
+ maximum=1.0,
77
+ value=0.95,
78
+ step=0.05,
79
+ label="Top-p (nucleus sampling)",
80
+ )
81
+
82
+ ########
83
  history = gr.State([{"role": "system", "content": system_list[0]}])
 
84
  system.change(reset_state, inputs=[system], outputs=[chatbot, history], show_progress="full")
 
 
 
85
  # submit_btn.click(reset_user_input, [], [user_input])
86
 
87
  clear_btn.click(reset_state, inputs=[system], outputs=[chatbot, history], show_progress="full")
88
 
89
+ generate_btn.click(generate, [chatbot, history], outputs=[chatbot, history],
90
  show_progress="full")
91
 
92
+ slider_max_new_tokens.change(set_max_tokens, inputs=[slider_max_new_tokens])
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  demo.queue().launch(share=False, server_name="0.0.0.0")
95
  # demo.queue().launch(share=True)
app_util.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ from utils.logging_util import logger
4
+ from models.cpp_qwen2 import bot
5
+
6
+
7
+
8
+
9
+ #
10
+ # def postprocess(self, y):
11
+ # if y is None:
12
+ # return []
13
+ # for i, (message, response) in enumerate(y):
14
+ # y[i] = (
15
+ # None if message is None else mdtex2html.convert((message)),
16
+ # None if response is None else mdtex2html.convert(response),
17
+ # )
18
+ # return y
19
+ #
20
+ # gr.Chatbot.postprocess = postprocess
21
+
22
+
23
+ def generate_query(chatbot, history):
24
+ if history and history[-1]["role"] == "user":
25
+ gr.Warning('You should generate assistant-response.')
26
+ yield None, chatbot, history
27
+ else:
28
+ chatbot.append(None)
29
+ streamer = bot.generate_query(history, stream=True)
30
+ for query in streamer:
31
+ chatbot[-1] = (query, None)
32
+ yield query, chatbot, history
33
+ history.append({"role": "user", "content": query})
34
+ yield query, chatbot, history
35
+
36
+
37
+ def generate_response(query, chatbot, history):
38
+ """
39
+ auto-mode:query is None
40
+ manual-mode:query 是用户输入
41
+ :param query:
42
+ :param chatbot:
43
+ :param history:
44
+ :return:
45
+ """
46
+ if query and history[-1]["role"] != "user":
47
+ history.append({"role": "user", "content": query})
48
+ query = history[-1]["content"]
49
+
50
+ if history[-1]["role"] != "user":
51
+ gr.Warning('You should generate or type user-input first.')
52
+ yield chatbot, history
53
+ else:
54
+ streamer = bot.generate_response(history, stream=True)
55
+ for response in streamer:
56
+ chatbot[-1] = (query, response)
57
+ yield chatbot, history
58
+
59
+ history.append({"role": "assistant", "content": response})
60
+ print(f"chatbot is {chatbot}")
61
+ print(f"history is {history}")
62
+ yield chatbot, history
63
+
64
+
65
+ def generate(chatbot, history):
66
+ if history[-1]["role"] == "assistant":
67
+ return generate_query(chatbot, history)
68
+ elif history[-1]["role"] in ["user", "system"]:
69
+ return generate_query(chatbot, history)
70
+ else:
71
+
72
+ gr.Warning("")
73
+
74
+
75
+ def regenerate():
76
+ """
77
+ 删除上一轮,重新生成。
78
+ :return:
79
+ """
80
+ pass
81
+
82
+
83
+ def reset_user_input():
84
+ return gr.update(value='')
85
+
86
+
87
+ def reset_state(system):
88
+ return [], [{"role": "system", "content": system}]
89
+
90
+
91
+ def set_max_tokens(max_tokens):
92
+ bot.generation_kwargs["max_tokens"] = max_tokens
93
+
94
+
95
+ def clear_history():
96
+ pass
97
+
98
+
99
+ def undo_generate():
100
+ pass
101
+
102
+
models/cpp_qwen2.py CHANGED
@@ -21,7 +21,7 @@ from simulator import Simulator
21
  import llama_cpp
22
  # import llama_cpp.llama_tokenizer
23
  from transformers import AutoTokenizer
24
- from log_util import logger
25
 
26
 
27
  class Qwen2Simulator(Simulator):
@@ -44,8 +44,6 @@ class Qwen2Simulator(Simulator):
44
  verbose=False,
45
  )
46
  logger.info(f"llm has been initialized: {self.llm}")
47
- # warmup
48
-
49
 
50
  self.generation_kwargs = dict(
51
  temperature=5,
@@ -53,6 +51,7 @@ class Qwen2Simulator(Simulator):
53
  top_k=40,
54
  max_tokens=20,
55
  repeat_penalty=1.1,
 
56
  stop=[
57
  "<|im_end|>",
58
  "<|im_start|>",
@@ -82,7 +81,7 @@ class Qwen2Simulator(Simulator):
82
 
83
  def generate_response(self, messages, stream=True):
84
  assert messages[-1]["role"] == "user"
85
- logger.info(f"generating {json.dumps(messages)}")
86
  inputs = self.hf_tokenizer.apply_chat_template(
87
  messages,
88
  tokenize=False,
@@ -96,18 +95,9 @@ class Qwen2Simulator(Simulator):
96
  def _generate(self, inputs):
97
  """
98
  TODO: chat with cache.
99
- qwen2-0.5b-chat 有bug:有时user生成结束没有<|im_end|>,示例:
100
- <|im_start|>system
101
- you are a helpful assistant<|im_end|>
102
- <|im_start|>user
103
- hi, what your name<|im_end|>
104
- <|im_start|>assistant
105
- My name is Jordan<|im_end|>
106
- <|im_start|>user # 以上是输入,以下是生成
107
- how old are you?
108
- <|im_start|>assistant
109
- I am a 41-year-old man.<|im_end|>
110
  """
 
111
  output = self.llm(
112
  inputs,
113
  **self.generation_kwargs
 
21
  import llama_cpp
22
  # import llama_cpp.llama_tokenizer
23
  from transformers import AutoTokenizer
24
+ from utils.logging_util import logger
25
 
26
 
27
  class Qwen2Simulator(Simulator):
 
44
  verbose=False,
45
  )
46
  logger.info(f"llm has been initialized: {self.llm}")
 
 
47
 
48
  self.generation_kwargs = dict(
49
  temperature=5,
 
51
  top_k=40,
52
  max_tokens=20,
53
  repeat_penalty=1.1,
54
+ # qwen2-0.5b-chat 有时内容生成结束没有<|im_end|>,直接跟 <|im_start|>
55
  stop=[
56
  "<|im_end|>",
57
  "<|im_start|>",
 
81
 
82
  def generate_response(self, messages, stream=True):
83
  assert messages[-1]["role"] == "user"
84
+ logger.info(f"generating {json.dumps(messages, ensure_ascii=False)}")
85
  inputs = self.hf_tokenizer.apply_chat_template(
86
  messages,
87
  tokenize=False,
 
95
  def _generate(self, inputs):
96
  """
97
  TODO: chat with cache.
98
+
 
 
 
 
 
 
 
 
 
 
99
  """
100
+ logger.info(f"generation_kwargs {self.generation_kwargs}")
101
  output = self.llm(
102
  inputs,
103
  **self.generation_kwargs
log_util.py → utils/logging_util.py RENAMED
File without changes