Spaces:
Sleeping
Sleeping
Fishead_East
commited on
Commit
•
d35b9cf
1
Parent(s):
733d7c5
跑通Prompt模块
Browse files- README.md +19 -10
- chat_poets/chat.py +88 -42
- chat_poets/prompts.json +2 -2
- gradio_ui/gr_chat.py +11 -2
README.md
CHANGED
@@ -17,25 +17,39 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
|
|
17 |
|
18 |
---
|
19 |
|
|
|
|
|
|
|
|
|
20 |
## 结构说明
|
21 |
|
22 |
### 开发模块
|
23 |
-
|
24 |
-
-
|
25 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
### 其他
|
28 |
- LLM: 与星火交互的功能封装
|
29 |
- requirement.txt:依赖包列表
|
30 |
- .gitattributes:hf配置
|
31 |
-
- .env:环境变量文件,存储星火api
|
32 |
|
33 |
---
|
34 |
|
35 |
## 本地依赖
|
36 |
- Python-3.9
|
37 |
|
38 |
-
###
|
39 |
*注意新的依赖要同步更新requirement.txt文件*
|
40 |
- websocket-client
|
41 |
- langchain
|
@@ -43,8 +57,3 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
|
|
43 |
- bs4
|
44 |
- python-dotenv
|
45 |
|
46 |
-
### (2) Conda
|
47 |
-
- websocket-client-0.58.0
|
48 |
-
#### conda-forge
|
49 |
-
- langchain-0.0.239
|
50 |
-
- gradio-3.23.0
|
|
|
17 |
|
18 |
---
|
19 |
|
20 |
+
## 调试说明
|
21 |
+
|
22 |
+
在**app.py**文件下,运行main函数,待终端输出本地网址后双击打开,在弹出的窗口进行交互调试。
|
23 |
+
|
24 |
## 结构说明
|
25 |
|
26 |
### 开发模块
|
27 |
+
🌟表示核心模块,🌛表示尚未使用或计划优化的模块
|
28 |
+
- chat_poets
|
29 |
+
- prompts.json:🌟所有的Prompts提示词
|
30 |
+
- get_path.py:🌟根据系统环境,获取json文件的绝对路径以供访问
|
31 |
+
- poet_search:实时检索古诗信息(古诗文网)
|
32 |
+
- gradio_ui
|
33 |
+
- gr_chat:🌟使用gradio搭建demo的模块
|
34 |
+
- gushiwen_vector_database:🌛向量知识库【已跑通验证,需要进一步处理】
|
35 |
+
- gushiwen.json 古诗文数据
|
36 |
+
- search_vectors.py:计算向量并获得相似文本
|
37 |
+
- local_vectors:向量化的数据文件
|
38 |
+
- embedding_model:下载的模型【未附上】
|
39 |
+
- txt2img:🌛文生图的模块
|
40 |
|
41 |
### 其他
|
42 |
- LLM: 与星火交互的功能封装
|
43 |
- requirement.txt:依赖包列表
|
44 |
- .gitattributes:hf配置
|
45 |
+
- .env:环境变量文件,存储星火api访问信息⚠️注意不要上传具体值
|
46 |
|
47 |
---
|
48 |
|
49 |
## 本地依赖
|
50 |
- Python-3.9
|
51 |
|
52 |
+
### Pip ✅
|
53 |
*注意新的依赖要同步更新requirement.txt文件*
|
54 |
- websocket-client
|
55 |
- langchain
|
|
|
57 |
- bs4
|
58 |
- python-dotenv
|
59 |
|
|
|
|
|
|
|
|
|
|
chat_poets/chat.py
CHANGED
@@ -16,23 +16,46 @@ class ChatPoet:
|
|
16 |
# 记录所有Prompts的文件
|
17 |
with open(get_prompts_path("prompts.json")) as f:
|
18 |
prompts = json.load(f)
|
|
|
|
|
19 |
|
20 |
@classmethod
|
21 |
-
def allow_chat(cls, user_message: str)
|
22 |
"""
|
23 |
-
对话开始时判断用户输入是否合规:提及诗人或古诗
|
|
|
24 |
"""
|
25 |
-
prompt_allow =
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
while True:
|
31 |
try:
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
34 |
except ValueError:
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
@classmethod
|
38 |
def get_question_type(cls, user_message: str) -> str:
|
@@ -41,12 +64,10 @@ class ChatPoet:
|
|
41 |
:param user_message:
|
42 |
:return:
|
43 |
"""
|
44 |
-
|
45 |
prompt_question_type = cls.prompts["get_question_type"].format(user_message=user_message)
|
46 |
-
|
47 |
-
print(f"response: {response}")
|
48 |
|
49 |
-
return
|
50 |
|
51 |
@classmethod
|
52 |
def gen_response(cls, pattern: str, history: list[list]) -> str:
|
@@ -55,16 +76,23 @@ class ChatPoet:
|
|
55 |
pattern = "adult" | "teen" | "child",模式
|
56 |
注:对话历史的最后一项是需要填充的内容,即history[-1] = [Question, ];Question为用户刚提出的问题,尚未回答
|
57 |
"""
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
if pattern == "adult":
|
60 |
-
cls.chat_adult(question_type=question_type, history=history)
|
61 |
else:
|
62 |
-
cls.chat_teen_or_child(pattern=pattern, question_type=question_type, history=history)
|
63 |
-
|
64 |
-
|
65 |
-
#
|
66 |
-
# elif pattern == "child":
|
67 |
-
# return cls.chat_child(question_type, history)
|
68 |
|
69 |
@classmethod
|
70 |
def get_str_history(cls, history: list[list]) -> str:
|
@@ -89,17 +117,17 @@ class ChatPoet:
|
|
89 |
:param history: 对话记录,最后一项是[Question, ],即答案待给出
|
90 |
:return: 给出答案
|
91 |
"""
|
92 |
-
if question_type == "
|
93 |
return cls.get_str_response_origin(history)
|
94 |
-
elif question_type == "
|
95 |
return cls.get_str_response_vernacular(history)
|
96 |
-
elif question_type == "
|
97 |
return cls.get_str_response_appreciate(history)
|
98 |
-
elif question_type == "
|
99 |
return cls.get_str_response_vocab(history)
|
100 |
-
elif question_type == "
|
101 |
return cls.get_str_response_author(history)
|
102 |
-
elif question_type == "
|
103 |
return cls.get_str_response_background(history)
|
104 |
|
105 |
@classmethod
|
@@ -108,7 +136,7 @@ class ChatPoet:
|
|
108 |
青少年模式或儿童模式
|
109 |
:param pattern: 交互模式 "teen" | "child"
|
110 |
:param history: 对话记录,最后一项是[Question, ],即答案待给出
|
111 |
-
:param question_type:
|
112 |
:return: 给出答案
|
113 |
"""
|
114 |
prompt_mode = cls.prompts["pattern"][pattern]
|
@@ -134,29 +162,43 @@ class ChatPoet:
|
|
134 |
@classmethod
|
135 |
def get_str_response_origin(cls, history: list[list]) -> str:
|
136 |
"""古诗原文"""
|
137 |
-
|
138 |
-
|
|
|
|
|
|
|
|
|
139 |
return str_response
|
140 |
|
141 |
@classmethod
|
142 |
def get_str_response_vernacular(cls, history: list[list]) -> str:
|
143 |
"""古诗白话文翻译"""
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
146 |
return str_response
|
147 |
|
148 |
@classmethod
|
149 |
def get_str_response_appreciate(cls, history: list[list]) -> str:
|
150 |
"""古诗鉴赏"""
|
151 |
-
|
152 |
-
|
|
|
|
|
|
|
|
|
|
|
153 |
return str_response
|
154 |
|
155 |
@classmethod
|
156 |
def get_str_response_vocab(cls, history: list[list]) -> str:
|
157 |
-
|
158 |
-
|
159 |
-
|
|
|
|
|
160 |
response_origin=cls.get_str_response_origin(
|
161 |
history=history))
|
162 |
str_response = cls.llm(cls.get_str_history(history=history) + str_prompt)
|
@@ -164,19 +206,23 @@ class ChatPoet:
|
|
164 |
|
165 |
@classmethod
|
166 |
def get_str_response_author(cls, history: list[list]) -> str:
|
167 |
-
|
|
|
|
|
168 |
str_response = cls.llm(cls.get_str_history(history=history) + str_prompt)
|
169 |
return str_response
|
170 |
|
171 |
@classmethod
|
172 |
def get_str_response_background(cls, history: list[list]) -> str:
|
173 |
-
|
174 |
-
|
|
|
|
|
175 |
response_origin=cls.get_str_response_origin(
|
176 |
history=history))
|
177 |
str_response = cls.llm(cls.get_str_history(history=history) + str_prompt)
|
178 |
return str_response
|
179 |
|
180 |
|
181 |
-
if __name__ == '__main__':
|
182 |
-
ChatPoet.
|
|
|
16 |
# 记录所有Prompts的文件
|
17 |
with open(get_prompts_path("prompts.json")) as f:
|
18 |
prompts = json.load(f)
|
19 |
+
# 记录单轮有效对话的关键内容(是否有效、诗人、古诗)——{"exist": int, "author": str, "poem": str}
|
20 |
+
res_dict = dict()
|
21 |
|
22 |
@classmethod
|
23 |
+
def allow_chat(cls, user_message: str):
|
24 |
"""
|
25 |
+
对话开始时判断用户输入是否合规:提及诗人或古诗
|
26 |
+
:param user_message:用户的问题
|
27 |
"""
|
28 |
+
prompt_allow = cls.prompts["allow_chat"].format(user_message=user_message)
|
29 |
+
print(f"prompt_allow:{prompt_allow}")
|
30 |
+
while True:
|
31 |
+
try:
|
32 |
+
res_json_str = cls.llm(prompt_allow)
|
33 |
+
print(f"allow_chat: {res_json_str}")
|
34 |
+
cls.res_dict = json.loads(res_json_str)
|
35 |
+
break
|
36 |
+
except:
|
37 |
+
continue
|
38 |
|
39 |
while True:
|
40 |
try:
|
41 |
+
cls.res_dict["exist"] = int(cls.res_dict["exist"])
|
42 |
+
if cls.res_dict["author"] is None:
|
43 |
+
cls.res_dict["author"] = ""
|
44 |
+
if cls.res_dict["poem"] is None:
|
45 |
+
cls.res_dict["poem"] = ""
|
46 |
+
print(f"res_dict:{cls.res_dict}")
|
47 |
+
return
|
48 |
except ValueError:
|
49 |
+
prompt_allow = cls.prompts["allow_chat"].format(user_message=user_message)
|
50 |
+
print(prompt_allow)
|
51 |
+
res_json_str = cls.llm(prompt_allow)
|
52 |
+
print(f"allow_chat: {res_json_str}")
|
53 |
+
cls.res_dict = json.loads(res_json_str)
|
54 |
+
|
55 |
+
@classmethod
|
56 |
+
def stop_chat(cls):
|
57 |
+
"""结束本次对话,清空对话关键内容"""
|
58 |
+
cls.res_dict.clear()
|
59 |
|
60 |
@classmethod
|
61 |
def get_question_type(cls, user_message: str) -> str:
|
|
|
64 |
:param user_message:
|
65 |
:return:
|
66 |
"""
|
|
|
67 |
prompt_question_type = cls.prompts["get_question_type"].format(user_message=user_message)
|
68 |
+
question_type = cls.llm(prompt_question_type)
|
|
|
69 |
|
70 |
+
return question_type
|
71 |
|
72 |
@classmethod
|
73 |
def gen_response(cls, pattern: str, history: list[list]) -> str:
|
|
|
76 |
pattern = "adult" | "teen" | "child",模式
|
77 |
注:对话历史的最后一项是需要填充的内容,即history[-1] = [Question, ];Question为用户刚提出的问题,尚未回答
|
78 |
"""
|
79 |
+
limit_list = ["诗词原文", "诗词白话文翻译", "诗词鉴赏", "词语解释", "写作背景", "作者简介"]
|
80 |
+
while True:
|
81 |
+
# 循环纠错,保证输出的问题类型在给定范围内
|
82 |
+
question_type = cls.get_question_type(history[-1][0])
|
83 |
+
if limit_list.count(question_type) > 0:
|
84 |
+
print(f"question_type:{question_type}")
|
85 |
+
break
|
86 |
+
else:
|
87 |
+
print(f"错误的question_type:{question_type}")
|
88 |
+
|
89 |
if pattern == "adult":
|
90 |
+
response = cls.chat_adult(question_type=question_type, history=history)
|
91 |
else:
|
92 |
+
response = cls.chat_teen_or_child(pattern=pattern, question_type=question_type, history=history)
|
93 |
+
print(f"response:{response}")
|
94 |
+
return response
|
95 |
+
# return "wow"
|
|
|
|
|
96 |
|
97 |
@classmethod
|
98 |
def get_str_history(cls, history: list[list]) -> str:
|
|
|
117 |
:param history: 对话记录,最后一项是[Question, ],即答案待给出
|
118 |
:return: 给出答案
|
119 |
"""
|
120 |
+
if question_type == "诗词原文":
|
121 |
return cls.get_str_response_origin(history)
|
122 |
+
elif question_type == "诗词白话文翻译":
|
123 |
return cls.get_str_response_vernacular(history)
|
124 |
+
elif question_type == "诗词鉴赏":
|
125 |
return cls.get_str_response_appreciate(history)
|
126 |
+
elif question_type == "词语解释":
|
127 |
return cls.get_str_response_vocab(history)
|
128 |
+
elif question_type == "写作背景":
|
129 |
return cls.get_str_response_author(history)
|
130 |
+
elif question_type == "作者简介":
|
131 |
return cls.get_str_response_background(history)
|
132 |
|
133 |
@classmethod
|
|
|
136 |
青少年模式或儿童模式
|
137 |
:param pattern: 交互模式 "teen" | "child"
|
138 |
:param history: 对话记录,最后一项是[Question, ],即答案待给出
|
139 |
+
:param question_type:
|
140 |
:return: 给出答案
|
141 |
"""
|
142 |
prompt_mode = cls.prompts["pattern"][pattern]
|
|
|
162 |
@classmethod
|
163 |
def get_str_response_origin(cls, history: list[list]) -> str:
|
164 |
"""古诗原文"""
|
165 |
+
print(f"res_dict:{cls.res_dict}")
|
166 |
+
print("----古诗原文----")
|
167 |
+
str_prompt = cls.get_str_history(history=history) + \
|
168 |
+
cls.prompts["questions_type"]["origin"].format(author=cls.res_dict["author"],
|
169 |
+
poem=cls.res_dict["poem"])
|
170 |
+
str_response = cls.llm(str_prompt)
|
171 |
return str_response
|
172 |
|
173 |
@classmethod
|
174 |
def get_str_response_vernacular(cls, history: list[list]) -> str:
|
175 |
"""古诗白话文翻译"""
|
176 |
+
print(" ----古诗白话文翻译 ----")
|
177 |
+
str_prompt = cls.prompts["questions_type"]["vernacular"].format(author=cls.res_dict["author"],
|
178 |
+
poem=cls.res_dict["poem"]) + \
|
179 |
+
cls.get_str_response_origin(history=history)
|
180 |
+
str_response = cls.llm(cls.get_str_history(history=history) + str_prompt)
|
181 |
return str_response
|
182 |
|
183 |
@classmethod
|
184 |
def get_str_response_appreciate(cls, history: list[list]) -> str:
|
185 |
"""古诗鉴赏"""
|
186 |
+
print(" ----古诗鉴赏 ----")
|
187 |
+
str_prompt = cls.prompts["questions_type"]["appreciate"].format(author=cls.res_dict["author"],
|
188 |
+
poem=cls.res_dict["poem"]) + \
|
189 |
+
cls.get_str_response_origin(history=history)
|
190 |
+
print(f"该问题的最终Prompt(除历史记录):{str_prompt}")
|
191 |
+
|
192 |
+
str_response = cls.llm(cls.get_str_history(history=history) + str_prompt)
|
193 |
return str_response
|
194 |
|
195 |
@classmethod
|
196 |
def get_str_response_vocab(cls, history: list[list]) -> str:
|
197 |
+
"""词语解释"""
|
198 |
+
print(" ----词语解释 ----")
|
199 |
+
str_prompt = cls.prompts["questions_type"]["vocab"].format(author=cls.res_dict["author"],
|
200 |
+
poem=cls.res_dict["poem"],
|
201 |
+
word="需要解释的词语", # todo 这是个啥
|
202 |
response_origin=cls.get_str_response_origin(
|
203 |
history=history))
|
204 |
str_response = cls.llm(cls.get_str_history(history=history) + str_prompt)
|
|
|
206 |
|
207 |
@classmethod
|
208 |
def get_str_response_author(cls, history: list[list]) -> str:
|
209 |
+
"""作者简介"""
|
210 |
+
print(" ----作者简介 ----")
|
211 |
+
str_prompt = cls.prompts["questions_type"]["author"].format(author=cls.res_dict["poem"])
|
212 |
str_response = cls.llm(cls.get_str_history(history=history) + str_prompt)
|
213 |
return str_response
|
214 |
|
215 |
@classmethod
|
216 |
def get_str_response_background(cls, history: list[list]) -> str:
|
217 |
+
"""写作背景"""
|
218 |
+
print(" ----写作背景 ----")
|
219 |
+
str_prompt = cls.prompts["questions_type"]["background"].format(author=cls.res_dict["author"],
|
220 |
+
poem=cls.res_dict["poem"],
|
221 |
response_origin=cls.get_str_response_origin(
|
222 |
history=history))
|
223 |
str_response = cls.llm(cls.get_str_history(history=history) + str_prompt)
|
224 |
return str_response
|
225 |
|
226 |
|
227 |
+
# if __name__ == '__main__':
|
228 |
+
# ChatPoet.allow_chat("你知道《静夜思》这首诗吗?")
|
chat_poets/prompts.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"allow_chat": "
|
3 |
|
4 |
-
"get_question_type": "你是一个古诗老师,需要对问题进行分类。问题的类型有:1.诗词原文
|
5 |
|
6 |
"history": "{history}是你和用户之前的对话记录,Q:代表了用户的输入,A:代表了你的回复,根据历史对话记录和下面的要求继续回答用户。",
|
7 |
|
|
|
1 |
{
|
2 |
+
"allow_chat": "[任务]判断给出的问题中是否提到了诗人或古诗。[要求]输出JSON格式的字符串,其中包含三个键:“exist”、“author”、“poem”;当问题没有提到任何诗人或古诗时,“exist”的取值为0,“author”和“poem”的值都为\" \",千万注意不是None也不是null,而是一个空格字符 ;当问题提到了诗人或古诗,“exist”的值为1,并将识别到的诗人姓名、古诗标题填入“author”和“poem”中。不输出除结果外的任何内容,不要填入问题中没有的内容。问题如下->Q:{user_message}",
|
3 |
|
4 |
+
"get_question_type": "你是一个古诗老师,需要对问题进行分类。问题的类型有:1.诗词原文 2.诗词白话文翻译 3.诗词鉴赏 4.词语解释 5.写作背景 6.作者简介。请分析问题属于哪一类型,挑选出来后返回对应的类型,如“诗词原文”,不要自己翻译,不要输出类型的中文,不要输出其他内容。问题如下->{user_message}",
|
5 |
|
6 |
"history": "{history}是你和用户之前的对话记录,Q:代表了用户的输入,A:代表了你的回复,根据历史对话记录和下面的要求继续回答用户。",
|
7 |
|
gradio_ui/gr_chat.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
"""
|
2 |
助手交互所需函数
|
3 |
"""
|
|
|
4 |
import time
|
5 |
import json
|
6 |
import gradio as gr
|
@@ -54,6 +55,8 @@ def newchat(chat_history: list[list]):
|
|
54 |
print("#########")
|
55 |
for chat_list in chat_history:
|
56 |
print(f"##用户:{chat_list[0]} ## Bot:{chat_list[1]}")
|
|
|
|
|
57 |
# todo 将已有聊天记录存入历史记录
|
58 |
|
59 |
# 刷新聊天记录
|
@@ -64,7 +67,11 @@ def chat_user(user_message: str, history: list[list], action: bool):
|
|
64 |
"""
|
65 |
单次对话中,首先调用的函数
|
66 |
"""
|
67 |
-
|
|
|
|
|
|
|
|
|
68 |
allowed = True
|
69 |
else:
|
70 |
allowed = False
|
@@ -78,11 +85,13 @@ def chat_respond(history: list[list], pattern: str, action: bool):
|
|
78 |
"""
|
79 |
单次对话中,在调用chat_user后调用,实现流式输出
|
80 |
"""
|
|
|
81 |
if action is False:
|
82 |
bot_message = "您似乎没有提到诗人或古诗,请再试试~"
|
83 |
else:
|
84 |
# 调用功能函数,获取助手的回答
|
85 |
-
|
|
|
86 |
|
87 |
history[-1][1] = "" # 下标-1表示最后一个
|
88 |
for char in bot_message:
|
|
|
1 |
"""
|
2 |
助手交互所需函数
|
3 |
"""
|
4 |
+
import re
|
5 |
import time
|
6 |
import json
|
7 |
import gradio as gr
|
|
|
55 |
print("#########")
|
56 |
for chat_list in chat_history:
|
57 |
print(f"##用户:{chat_list[0]} ## Bot:{chat_list[1]}")
|
58 |
+
|
59 |
+
ChatPoet.stop_chat()
|
60 |
# todo 将已有聊天记录存入历史记录
|
61 |
|
62 |
# 刷新聊天记录
|
|
|
67 |
"""
|
68 |
单次对话中,首先调用的函数
|
69 |
"""
|
70 |
+
print("调用chat_user")
|
71 |
+
ChatPoet.allow_chat(user_message=history[-1][0])
|
72 |
+
if (action is True) or (ChatPoet.res_dict != {}) and (ChatPoet.res_dict["exist"]):
|
73 |
+
print(f"action is True: {action is True}")
|
74 |
+
print(f"ChatPoet.res_dict: {ChatPoet.res_dict == {}}")
|
75 |
allowed = True
|
76 |
else:
|
77 |
allowed = False
|
|
|
85 |
"""
|
86 |
单次对话中,在调用chat_user后调用,实现流式输出
|
87 |
"""
|
88 |
+
print("调用chat_respond")
|
89 |
if action is False:
|
90 |
bot_message = "您似乎没有提到诗人或古诗,请再试试~"
|
91 |
else:
|
92 |
# 调用功能函数,获取助手的回答
|
93 |
+
pattern_str = re.sub(r'\n', "", re.sub(r'<[^>]+>', "", pattern))
|
94 |
+
bot_message = ChatPoet.gen_response(pattern=pattern_str, history=history)
|
95 |
|
96 |
history[-1][1] = "" # 下标-1表示最后一个
|
97 |
for char in bot_message:
|