Upload free_ask_internet.py
Browse files- free_ask_internet.py +38 -59
free_ask_internet.py
CHANGED
@@ -116,58 +116,25 @@ def gen_prompt(question,content_list, lang="zh-CN", context_length_limit=11000,d
|
|
116 |
if lang == "en-US":
|
117 |
answer_language = ' English '
|
118 |
if len(ref_content) > 0:
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
-
|
121 |
-
|
122 |
-
|
|
|
123 |
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
# 生成上标数字引用标记
|
133 |
-
for idx, ref_text in enumerate(ref_content):
|
134 |
-
symbol = f"^{idx+1}"
|
135 |
-
prompts = prompts + f"\n\n{symbol} {ref_text}"
|
136 |
-
|
137 |
-
if len(prompts) >= limit_len:
|
138 |
-
prompts = prompts[0:limit_len]
|
139 |
-
prompts = prompts + '''
|
140 |
-
```
|
141 |
-
记住:
|
142 |
-
1. 不要一字不差的重复上下文内容。回答必须使用简体中文
|
143 |
-
2. 如果回答很长,请尽量结构化、分段落总结
|
144 |
-
3. 严格使用符号引用(如 ^1、^2)的格式在答案中对应部分引用上下文
|
145 |
-
4. 如果一句话源自多个上下文,请列出所有相关的引用编号(如 ^3^5)
|
146 |
-
5. 不要将引用集中在最后返回,而是在答案对应部分列出,并检查编号的有效性
|
147 |
-
下面是用户问题:
|
148 |
-
''' + question
|
149 |
-
else:
|
150 |
-
prompts = '''
|
151 |
-
You are a large language AI assistant. You are given a user question, and please write clean, concise and accurate answer to the question. You will be given a set of related contexts to the question, each starting with a reference symbol like ^1. Please use the context and cite the context at the end of each sentence if applicable.
|
152 |
-
Your answer must be correct, accurate and written by an expert using an unbiased and professional tone. Please limit to 1024 tokens. Do not give any information that is not related to the question, and do not repeat. Say "information is missing on" followed by the related topic, if the given context do not provide sufficient information.
|
153 |
-
Please cite the contexts with the reference symbols. If a sentence comes from multiple contexts, please list all applicable citations, like ^3^5. Other than code and specific names and citations, your answer must be written in the same language as the question.
|
154 |
-
Here are the set of contexts:
|
155 |
-
''' + "\n\n" + "```"
|
156 |
-
|
157 |
-
# 修改引用标记为上标格式
|
158 |
-
for idx, ref_text in enumerate(ref_content):
|
159 |
-
symbol = f"^{idx+1}"
|
160 |
-
prompts = prompts + f"\n\n{symbol} {ref_text}"
|
161 |
-
|
162 |
-
if len(prompts) >= limit_len:
|
163 |
-
prompts = prompts[0:limit_len]
|
164 |
-
prompts = prompts + '''
|
165 |
-
```
|
166 |
-
Above is the reference contexts. Remember, don't repeat the context word for word. Answer in ''' + answer_language + '''. If the response is lengthy, structure it in paragraphs and summarize where possible. Cite the context using symbols like ^1 where they correspond. If a sentence originates from multiple contexts, list all relevant symbols, like ^3^5. Don't cluster the citations at the end but include them in the answer where they correspond.
|
167 |
-
Remember, don't blindly repeat the contexts verbatim. And here is the user question:
|
168 |
-
''' + question
|
169 |
-
|
170 |
-
|
171 |
else:
|
172 |
prompts = question
|
173 |
if debug:
|
@@ -268,22 +235,34 @@ def ask_internet(query:str, model:str, debug=False):
|
|
268 |
total_token += token
|
269 |
response_content += token
|
270 |
|
271 |
-
# 处理引用链接 -
|
272 |
if content_list:
|
273 |
-
#
|
274 |
symbol_url_map = {
|
275 |
-
f"^{i+1}": content_list[i].get('url')
|
276 |
for i in range(len(content_list))
|
277 |
}
|
278 |
|
279 |
modified_content = response_content
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
yield modified_content
|
|
|
|
|
|
|
287 |
yield "\n\n"
|
288 |
# 参考资料部分
|
289 |
if True:
|
|
|
116 |
if lang == "en-US":
|
117 |
answer_language = ' English '
|
118 |
if len(ref_content) > 0:
|
119 |
+
prompts = '''
|
120 |
+
You are a large language AI assistant. You are given a user question, and please write clean, concise and accurate answer to the question. You will be given a set of related contexts to the question, each starting with a reference symbol like [^1]. Please use the context and cite the context at the end of each sentence if applicable.
|
121 |
+
Your answer must be correct, accurate and written by an expert using an unbiased and professional tone. Please limit to 1024 tokens. Do not give any information that is not related to the question, and do not repeat. Say "information is missing on" followed by the related topic, if the given context do not provide sufficient information.
|
122 |
+
Please cite the contexts with the reference symbols. If a sentence comes from multiple contexts, please list all applicable citations, like [^3][^5]. Other than code and specific names and citations, your answer must be written in the same language as the question.
|
123 |
+
Here are the set of contexts:
|
124 |
+
''' + "\n\n" + "```"
|
125 |
|
126 |
+
# 修改引用标记为脚注格式
|
127 |
+
for idx, ref_text in enumerate(ref_content):
|
128 |
+
symbol = f"[^{idx+1}]"
|
129 |
+
prompts = prompts + f"\n\n{symbol} {ref_text}"
|
130 |
|
131 |
+
if len(prompts) >= limit_len:
|
132 |
+
prompts = prompts[0:limit_len]
|
133 |
+
prompts = prompts + '''
|
134 |
+
```
|
135 |
+
Above is the reference contexts. Remember, don't repeat the context word for word. Answer in ''' + answer_language + '''. If the response is lengthy, structure it in paragraphs and summarize where possible. Cite the context using symbols like [^1] where they correspond. If a sentence originates from multiple contexts, list all relevant symbols, like [^3][^5]. Don't cluster the citations at the end but include them in the answer where they correspond.
|
136 |
+
Remember, don't blindly repeat the contexts verbatim. And here is the user question:
|
137 |
+
''' + question
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
else:
|
139 |
prompts = question
|
140 |
if debug:
|
|
|
235 |
total_token += token
|
236 |
response_content += token
|
237 |
|
238 |
+
# 处理引用链接 - 使用脚注格式
|
239 |
if content_list:
|
240 |
+
# 创建脚注符号到URL的映射
|
241 |
symbol_url_map = {
|
242 |
+
f"[^{i+1}]": content_list[i].get('url')
|
243 |
for i in range(len(content_list))
|
244 |
}
|
245 |
|
246 |
modified_content = response_content
|
247 |
+
|
248 |
+
# 检查是否已经包含脚注定义
|
249 |
+
has_footnote_definitions = any(f"[^{i+1}]:" in modified_content for i in range(len(content_list)))
|
250 |
+
|
251 |
+
# 如果没有脚注定义,添加它们
|
252 |
+
if not has_footnote_definitions:
|
253 |
+
# 添加一个空行作为分隔
|
254 |
+
modified_content += "\n\n"
|
255 |
+
|
256 |
+
# 添加脚注定义
|
257 |
+
for i in range(len(content_list)):
|
258 |
+
url = content_list[i].get('url')
|
259 |
+
if url:
|
260 |
+
modified_content += f"[^{i+1}]: {url}\n"
|
261 |
+
|
262 |
yield modified_content
|
263 |
+
else:
|
264 |
+
yield response_content
|
265 |
+
|
266 |
yield "\n\n"
|
267 |
# 参考资料部分
|
268 |
if True:
|