xu song commited on
Commit
8344bac
·
1 Parent(s): 2fa4e4c
Files changed (4) hide show
  1. README.md +9 -1
  2. app.py +32 -30
  3. app_util.py +17 -3
  4. config.py +2 -2
README.md CHANGED
@@ -21,4 +21,12 @@ An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https
21
  直接从源码安装,推理速度较慢,因此加入以下参数。
22
  ```sh
23
  pip install git+https://github.com/abetlen/llama-cpp-python.git -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
24
- ```
 
 
 
 
 
 
 
 
 
21
  直接从源码安装,推理速度较慢,因此加入以下参数。
22
  ```sh
23
  pip install git+https://github.com/abetlen/llama-cpp-python.git -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
24
+ ```
25
+
26
+
27
+ ## Serverless Inference API
28
+
29
+
30
+ client.py
31
+
32
+ ## Reference
app.py CHANGED
@@ -123,47 +123,49 @@ with gr.Blocks(head=None) as demo:
123
  label="Top-k",
124
  )
125
 
 
126
  history = gr.State([{"role": "system", "content": system_list[0]}]) # 有用信息只有个system,其他和chatbot内容重叠
127
  system.change(reset_state, inputs=[system], outputs=[chatbot, history])
128
 
129
- ######## tab1
130
- generate_btn.click(generate, [chatbot, history], outputs=[chatbot, history],
131
  show_progress="full")
132
- retry_btn.click(undo_generate, [chatbot, history], outputs=[chatbot, history]) \
133
- .then(generate, [chatbot, history], outputs=[chatbot, history],
134
- show_progress="full")
135
- undo_btn.click(undo_generate, [chatbot, history], outputs=[chatbot, history])
136
- clear_btn.click(reset_state, inputs=[system], outputs=[chatbot, history])
137
-
138
- ######## tab2
139
- generate_btn_2.click(append_user, [input_text_2, chatbot, history], outputs=[chatbot, history]) \
 
140
  .then(generate_assistant_message, [chatbot, history], outputs=[chatbot, history],
141
- show_progress="full")
142
- retry_btn_2.click(undo_generate, [chatbot, history], outputs=[chatbot, history]) \
143
- .then(generate, [chatbot, history], outputs=[chatbot, history],
144
- show_progress="full")
145
- undo_btn_2.click(undo_generate, [chatbot, history], outputs=[chatbot, history])
146
- clear_btn_2.click(reset_state, inputs=[system], outputs=[chatbot, history]) \
147
- .then(reset_user_input, outputs=[input_text_2])
148
-
149
- ######## tab3
150
- generate_btn_3.click(append_assistant, [input_text_3, chatbot, history], outputs=[chatbot, history]) \
151
- .then(generate_assistant_message, [chatbot, history], outputs=[chatbot, history],
152
- show_progress="full")
153
- retry_btn_3.click(undo_generate, [chatbot, history], outputs=[chatbot, history]) \
154
- .then(generate, [chatbot, history], outputs=[chatbot, history],
155
- show_progress="full")
156
- undo_btn_3.click(undo_generate, [chatbot, history], outputs=[chatbot, history])
157
- clear_btn_3.click(reset_state, inputs=[system], outputs=[chatbot, history]) \
158
- .then(reset_user_input, outputs=[input_text_3])
 
159
 
160
  slider_max_new_tokens.change(set_max_new_tokens, inputs=[slider_max_new_tokens])
161
  slider_temperature.change(set_temperature, inputs=[slider_temperature])
162
  slider_top_p.change(set_top_p, inputs=[slider_top_p])
163
  slider_top_k.change(set_top_k, inputs=[slider_top_k])
164
 
165
-
166
- demo.load(lambda: gr.update(value=random.choice(system_list)), None, system)
167
 
168
  # demo.queue().launch(share=False, server_name="0.0.0.0", debug=True)
169
  # demo.queue().launch(concurrency_count=1, max_size=5)
 
123
  label="Top-k",
124
  )
125
 
126
+ # TODO: gr.State 不能通过API传参。
127
  history = gr.State([{"role": "system", "content": system_list[0]}]) # 有用信息只有个system,其他和chatbot内容重叠
128
  system.change(reset_state, inputs=[system], outputs=[chatbot, history])
129
 
130
+ ######## tab1: self-chat
131
+ generate_btn.click(chat, [chatbot, history], outputs=[chatbot, history],
132
  show_progress="full")
133
+ retry_btn.click(undo_generate, [chatbot, history], outputs=[chatbot, history], show_api=False) \
134
+ .then(chat, [chatbot, history], outputs=[chatbot, history],
135
+ show_progress="full", show_api=False)
136
+ undo_btn.click(undo_generate, [chatbot, history], outputs=[chatbot, history], show_api=False)
137
+ clear_btn.click(reset_state, inputs=[system], outputs=[chatbot, history], show_api=False)
138
+
139
+ ######## tab2: response-generator
140
+ generate_btn_2.click(append_user_to_history, [input_text_2, chatbot, history], outputs=[chatbot, history],
141
+ show_api=False) \
142
  .then(generate_assistant_message, [chatbot, history], outputs=[chatbot, history],
143
+ show_progress="full", show_api=False)
144
+ retry_btn_2.click(undo_generate, [chatbot, history], outputs=[chatbot, history], show_api=False) \
145
+ .then(chat, [chatbot, history], outputs=[chatbot, history],
146
+ show_progress="full", show_api=False)
147
+ undo_btn_2.click(undo_generate, [chatbot, history], outputs=[chatbot, history], show_api=False)
148
+ clear_btn_2.click(reset_state, inputs=[system], outputs=[chatbot, history], show_api=False) \
149
+ .then(reset_user_input, outputs=[input_text_2], show_api=False)
150
+
151
+ ######## tab3: user-simulator
152
+ generate_btn_3.click(append_assistant_to_history, [input_text_3, chatbot, history], outputs=[chatbot, history],
153
+ show_api=False) \
154
+ .then(generate_user_message, [chatbot, history], outputs=[chatbot, history],
155
+ show_progress="full", show_api=False)
156
+ retry_btn_3.click(undo_generate, [chatbot, history], outputs=[chatbot, history], show_api=False) \
157
+ .then(chat, [chatbot, history], outputs=[chatbot, history],
158
+ show_progress="full", show_api=False)
159
+ undo_btn_3.click(undo_generate, [chatbot, history], outputs=[chatbot, history], show_api=False)
160
+ clear_btn_3.click(reset_state, inputs=[system], outputs=[chatbot, history], show_api=False) \
161
+ .then(reset_user_input, outputs=[input_text_3], show_api=False)
162
 
163
  slider_max_new_tokens.change(set_max_new_tokens, inputs=[slider_max_new_tokens])
164
  slider_temperature.change(set_temperature, inputs=[slider_temperature])
165
  slider_top_p.change(set_top_p, inputs=[slider_top_p])
166
  slider_top_k.change(set_top_k, inputs=[slider_top_k])
167
 
168
+ demo.load(lambda: gr.update(value=random.choice(system_list)), None, system, show_api=False)
 
169
 
170
  # demo.queue().launch(share=False, server_name="0.0.0.0", debug=True)
171
  # demo.queue().launch(concurrency_count=1, max_size=5)
app_util.py CHANGED
@@ -1,5 +1,6 @@
1
  import json
2
  import gradio as gr
 
3
  from utils.logging_util import logger
4
  from models.cpp_qwen2 import Qwen2Simulator as Bot
5
  # from models.hf_qwen2 import Qwen2Simulator as Bot
@@ -68,7 +69,13 @@ def generate_assistant_message(chatbot, history):
68
  yield chatbot, history
69
 
70
 
71
- def generate(chatbot, history):
 
 
 
 
 
 
72
  request_param = json.dumps({'chatbot': chatbot, 'history': history}, ensure_ascii=False)
73
  logger.info(f"request_param: {request_param}")
74
  streamer = None
@@ -83,7 +90,14 @@ def generate(chatbot, history):
83
  yield out
84
 
85
 
86
- def append_user(input_content, chatbot, history):
 
 
 
 
 
 
 
87
  if history[-1]["role"] == "user":
88
  gr.Warning('You should generate assistant-response.')
89
  return chatbot, history
@@ -93,7 +107,7 @@ def append_user(input_content, chatbot, history):
93
  return chatbot, history
94
 
95
 
96
- def append_assistant(input_content, chatbot, history):
97
  if history[-1]["role"] != "user":
98
  gr.Warning('You should generate or type user-input first.')
99
  return chatbot, history
 
1
  import json
2
  import gradio as gr
3
+ from typing import List, Dict
4
  from utils.logging_util import logger
5
  from models.cpp_qwen2 import Qwen2Simulator as Bot
6
  # from models.hf_qwen2 import Qwen2Simulator as Bot
 
69
  yield chatbot, history
70
 
71
 
72
+ def chat(chatbot: List[str], history: List[Dict]):
73
+ """
74
+ self chat
75
+ :param chatbot:
76
+ :param history:
77
+ :return:
78
+ """
79
  request_param = json.dumps({'chatbot': chatbot, 'history': history}, ensure_ascii=False)
80
  logger.info(f"request_param: {request_param}")
81
  streamer = None
 
90
  yield out
91
 
92
 
93
+ def append_user_to_history(input_content, chatbot, history):
94
+ """
95
+
96
+ :param input_content:
97
+ :param chatbot:
98
+ :param history:
99
+ :return:
100
+ """
101
  if history[-1]["role"] == "user":
102
  gr.Warning('You should generate assistant-response.')
103
  return chatbot, history
 
107
  return chatbot, history
108
 
109
 
110
+ def append_assistant_to_history(input_content, chatbot, history):
111
  if history[-1]["role"] != "user":
112
  gr.Warning('You should generate or type user-input first.')
113
  return chatbot, history
config.py CHANGED
@@ -5,5 +5,5 @@ MAX_SEQUENCE_LENGTH = 8192 #
5
 
6
  DEFAULT_MAX_NEW_TOKENS = 128
7
  DEFAULT_TOP_K = 100
8
- DEFAULT_TOP_P = 0.95
9
- DEFAULT_TEMPERATURE = 5
 
5
 
6
  DEFAULT_MAX_NEW_TOKENS = 128
7
  DEFAULT_TOP_K = 100
8
+ DEFAULT_TOP_P = 0.98
9
+ DEFAULT_TEMPERATURE = 2