Upload 65 files
Browse files- .github/ISSUE_TEMPLATE/bug_report.md +19 -0
- .github/ISSUE_TEMPLATE/feature_request.md +10 -0
- .gitignore +3 -1
- Dockerfile +1 -1
- check_proxy.py +110 -3
- config.py +13 -6
- core_functional.py +71 -0
- crazy_functional.py +135 -0
- crazy_functions/__init__.py +0 -0
- crazy_functions/crazy_utils.py +159 -0
- crazy_functions/下载arxiv论文翻译摘要.py +186 -0
- crazy_functions/代码重写为全英文_多线程.py +86 -22
- crazy_functions/总结word文档.py +1 -1
- crazy_functions/批量总结PDF文档.py +1 -1
- crazy_functions/批量总结PDF文档pdfminer.py +1 -1
- crazy_functions/批量翻译PDF文档_多线程.py +203 -0
- crazy_functions/理解PDF文档内容.py +185 -0
- crazy_functions/生成函数注释.py +1 -1
- crazy_functions/解析项目源代码.py +67 -3
- crazy_functions/读文章写摘要.py +1 -1
- crazy_functions/谷歌检索小助手.py +106 -0
- crazy_functions/高级功能函数模板.py +9 -14
- main.py +82 -43
- request_llm/README.md +36 -0
- request_llm/bridge_chatgpt.py +271 -0
- request_llm/bridge_tgui.py +167 -0
- requirements.txt +9 -1
- self_analysis.md +262 -0
- theme.py +147 -12
- toolbox.py +213 -46
- version +5 -0
.github/ISSUE_TEMPLATE/bug_report.md
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
name: Bug report
|
3 |
+
about: Create a report to help us improve
|
4 |
+
title: ''
|
5 |
+
labels: ''
|
6 |
+
assignees: ''
|
7 |
+
|
8 |
+
---
|
9 |
+
|
10 |
+
**Describe the bug 简述**
|
11 |
+
|
12 |
+
**Screen Shot 截图**
|
13 |
+
|
14 |
+
**Terminal Traceback 终端traceback(如果有)**
|
15 |
+
|
16 |
+
|
17 |
+
Before submitting an issue 提交issue之前:
|
18 |
+
- Please try to upgrade your code. 如果您的代码不是最新的,建议您先尝试更新代码
|
19 |
+
- Please check project wiki for common problem solutions.项目[wiki](https://github.com/binary-husky/chatgpt_academic/wiki)有一些常见问题的解决方法
|
.github/ISSUE_TEMPLATE/feature_request.md
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
name: Feature request
|
3 |
+
about: Suggest an idea for this project
|
4 |
+
title: ''
|
5 |
+
labels: ''
|
6 |
+
assignees: ''
|
7 |
+
|
8 |
+
---
|
9 |
+
|
10 |
+
|
.gitignore
CHANGED
@@ -139,4 +139,6 @@ config_private.py
|
|
139 |
gpt_log
|
140 |
private.md
|
141 |
private_upload
|
142 |
-
other_llms
|
|
|
|
|
|
139 |
gpt_log
|
140 |
private.md
|
141 |
private_upload
|
142 |
+
other_llms
|
143 |
+
cradle*
|
144 |
+
debug*
|
Dockerfile
CHANGED
@@ -4,10 +4,10 @@ RUN echo '[global]' > /etc/pip.conf && \
|
|
4 |
echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
|
5 |
echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf
|
6 |
|
7 |
-
RUN pip3 install gradio requests[socks] mdtex2html
|
8 |
|
9 |
COPY . /gpt
|
10 |
WORKDIR /gpt
|
|
|
11 |
|
12 |
|
13 |
CMD ["python3", "main.py"]
|
|
|
4 |
echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
|
5 |
echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf
|
6 |
|
|
|
7 |
|
8 |
COPY . /gpt
|
9 |
WORKDIR /gpt
|
10 |
+
RUN pip3 install -r requirements.txt
|
11 |
|
12 |
|
13 |
CMD ["python3", "main.py"]
|
check_proxy.py
CHANGED
@@ -3,7 +3,8 @@ def check_proxy(proxies):
|
|
3 |
import requests
|
4 |
proxies_https = proxies['https'] if proxies is not None else '无'
|
5 |
try:
|
6 |
-
response = requests.get("https://ipapi.co/json/",
|
|
|
7 |
data = response.json()
|
8 |
print(f'查询代理的地理位置,返回的结果是{data}')
|
9 |
if 'country_name' in data:
|
@@ -19,9 +20,115 @@ def check_proxy(proxies):
|
|
19 |
return result
|
20 |
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
if __name__ == '__main__':
|
23 |
-
import os
|
|
|
24 |
from toolbox import get_conf
|
25 |
proxies, = get_conf('proxies')
|
26 |
check_proxy(proxies)
|
27 |
-
|
|
|
3 |
import requests
|
4 |
proxies_https = proxies['https'] if proxies is not None else '无'
|
5 |
try:
|
6 |
+
response = requests.get("https://ipapi.co/json/",
|
7 |
+
proxies=proxies, timeout=4)
|
8 |
data = response.json()
|
9 |
print(f'查询代理的地理位置,返回的结果是{data}')
|
10 |
if 'country_name' in data:
|
|
|
20 |
return result
|
21 |
|
22 |
|
23 |
+
def backup_and_download(current_version, remote_version):
|
24 |
+
"""
|
25 |
+
一键更新协议:备份和下载
|
26 |
+
"""
|
27 |
+
from toolbox import get_conf
|
28 |
+
import shutil
|
29 |
+
import os
|
30 |
+
import requests
|
31 |
+
import zipfile
|
32 |
+
os.makedirs(f'./history', exist_ok=True)
|
33 |
+
backup_dir = f'./history/backup-{current_version}/'
|
34 |
+
new_version_dir = f'./history/new-version-{remote_version}/'
|
35 |
+
if os.path.exists(new_version_dir):
|
36 |
+
return new_version_dir
|
37 |
+
os.makedirs(new_version_dir)
|
38 |
+
shutil.copytree('./', backup_dir, ignore=lambda x, y: ['history'])
|
39 |
+
proxies, = get_conf('proxies')
|
40 |
+
r = requests.get(
|
41 |
+
'https://github.com/binary-husky/chatgpt_academic/archive/refs/heads/master.zip', proxies=proxies, stream=True)
|
42 |
+
zip_file_path = backup_dir+'/master.zip'
|
43 |
+
with open(zip_file_path, 'wb+') as f:
|
44 |
+
f.write(r.content)
|
45 |
+
dst_path = new_version_dir
|
46 |
+
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
|
47 |
+
for zip_info in zip_ref.infolist():
|
48 |
+
dst_file_path = os.path.join(dst_path, zip_info.filename)
|
49 |
+
if os.path.exists(dst_file_path):
|
50 |
+
os.remove(dst_file_path)
|
51 |
+
zip_ref.extract(zip_info, dst_path)
|
52 |
+
return new_version_dir
|
53 |
+
|
54 |
+
|
55 |
+
def patch_and_restart(path):
|
56 |
+
"""
|
57 |
+
一键更新协议:覆盖和重启
|
58 |
+
"""
|
59 |
+
import distutils
|
60 |
+
import shutil
|
61 |
+
import os
|
62 |
+
import sys
|
63 |
+
import time
|
64 |
+
# if not using config_private, move origin config.py as config_private.py
|
65 |
+
if not os.path.exists('config_private.py'):
|
66 |
+
print('由于您没有设置config_private.py私密配置,现将您的现有配置移动至config_private.py以防止配置丢失,',
|
67 |
+
'另外您可以随时在history子文件夹下找回旧版的程序。')
|
68 |
+
shutil.copyfile('config.py', 'config_private.py')
|
69 |
+
distutils.dir_util.copy_tree(path+'/chatgpt_academic-master', './')
|
70 |
+
print('更新完成,您可以随时在history子文件夹下找回旧版的程序,5s之后重启')
|
71 |
+
for i in reversed(range(5)):
|
72 |
+
time.sleep(1)
|
73 |
+
print(i)
|
74 |
+
print(' ------------------------------ -----------------------------------')
|
75 |
+
os.execl(sys.executable, 'python', 'main.py')
|
76 |
+
|
77 |
+
|
78 |
+
def get_current_version():
|
79 |
+
import json
|
80 |
+
try:
|
81 |
+
with open('./version', 'r', encoding='utf8') as f:
|
82 |
+
current_version = json.loads(f.read())['version']
|
83 |
+
except:
|
84 |
+
current_version = ""
|
85 |
+
return current_version
|
86 |
+
|
87 |
+
|
88 |
+
def auto_update():
|
89 |
+
"""
|
90 |
+
一键更新协议:查询版本和用户意见
|
91 |
+
"""
|
92 |
+
try:
|
93 |
+
from toolbox import get_conf
|
94 |
+
import requests
|
95 |
+
import time
|
96 |
+
import json
|
97 |
+
proxies, = get_conf('proxies')
|
98 |
+
response = requests.get(
|
99 |
+
"https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version", proxies=proxies, timeout=1)
|
100 |
+
remote_json_data = json.loads(response.text)
|
101 |
+
remote_version = remote_json_data['version']
|
102 |
+
if remote_json_data["show_feature"]:
|
103 |
+
new_feature = "新功能:" + remote_json_data["new_feature"]
|
104 |
+
else:
|
105 |
+
new_feature = ""
|
106 |
+
with open('./version', 'r', encoding='utf8') as f:
|
107 |
+
current_version = f.read()
|
108 |
+
current_version = json.loads(current_version)['version']
|
109 |
+
if (remote_version - current_version) >= 0.05:
|
110 |
+
print(
|
111 |
+
f'\n新版本可用。新版本:{remote_version},当前版本:{current_version}。{new_feature}')
|
112 |
+
print('(1)Github更新地址:\nhttps://github.com/binary-husky/chatgpt_academic\n')
|
113 |
+
user_instruction = input('(2)是否一键更新代码(Y/y+回车=确认,输入其他/无输入+回车=不更新)?')
|
114 |
+
if user_instruction in ['Y', 'y']:
|
115 |
+
path = backup_and_download(current_version, remote_version)
|
116 |
+
try:
|
117 |
+
patch_and_restart(path)
|
118 |
+
except:
|
119 |
+
print('更新失败。')
|
120 |
+
else:
|
121 |
+
print('自动更新程序:已禁用')
|
122 |
+
return
|
123 |
+
else:
|
124 |
+
return
|
125 |
+
except:
|
126 |
+
print('自动更新程序:已禁用')
|
127 |
+
|
128 |
+
|
129 |
if __name__ == '__main__':
|
130 |
+
import os
|
131 |
+
os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
|
132 |
from toolbox import get_conf
|
133 |
proxies, = get_conf('proxies')
|
134 |
check_proxy(proxies)
|
|
config.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import os
|
2 |
|
3 |
# [step 1]>> 例如: API_KEY = "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" (此key无效)
|
4 |
-
API_KEY = os.environ.get("GPT_KEY
|
5 |
|
6 |
# [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改
|
7 |
USE_PROXY = False
|
@@ -13,10 +13,10 @@ if USE_PROXY:
|
|
13 |
# [端口] 在代理软件的设置里找。虽然不同的代理软件界面不一样,但端口号都应该在最显眼的位置上
|
14 |
|
15 |
# 代理网络的地址,打开你的科学上网软件查看代理的协议(socks5/http)、地址(localhost)和端口(11284)
|
16 |
-
proxies = {
|
17 |
# [协议]:// [地址] :[端口]
|
18 |
-
"http": "socks5h://localhost:11284",
|
19 |
-
"https": "socks5h://localhost:11284",
|
20 |
}
|
21 |
else:
|
22 |
proxies = None
|
@@ -26,6 +26,12 @@ else:
|
|
26 |
# 对话窗的高度
|
27 |
CHATBOT_HEIGHT = 600
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
# 发送请求到OpenAI后,等待多久判定为超时
|
30 |
TIMEOUT_SECONDS = 25
|
31 |
|
@@ -44,5 +50,6 @@ API_URL = "https://api.openai.com/v1/chat/completions"
|
|
44 |
# 设置并行使用的线程数
|
45 |
CONCURRENT_COUNT = 100
|
46 |
|
47 |
-
#
|
48 |
-
|
|
|
|
1 |
import os
|
2 |
|
3 |
# [step 1]>> 例如: API_KEY = "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" (此key无效)
|
4 |
+
API_KEY = os.environ.get("GPT_KEY)
|
5 |
|
6 |
# [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改
|
7 |
USE_PROXY = False
|
|
|
13 |
# [端口] 在代理软件的设置里找。虽然不同的代理软件界面不一样,但端口号都应该在最显眼的位置上
|
14 |
|
15 |
# 代理网络的地址,打开你的科学上网软件查看代理的协议(socks5/http)、地址(localhost)和端口(11284)
|
16 |
+
proxies = {
|
17 |
# [协议]:// [地址] :[端口]
|
18 |
+
"http": "socks5h://localhost:11284",
|
19 |
+
"https": "socks5h://localhost:11284",
|
20 |
}
|
21 |
else:
|
22 |
proxies = None
|
|
|
26 |
# 对话窗的高度
|
27 |
CHATBOT_HEIGHT = 600
|
28 |
|
29 |
+
# 代码高亮
|
30 |
+
CODE_HIGHLIGHT = True
|
31 |
+
|
32 |
+
# 窗口布局
|
33 |
+
LAYOUT = "LEFT-RIGHT" # "LEFT-RIGHT"(左右布局) # "TOP-DOWN"(上下布局)
|
34 |
+
|
35 |
# 发送请求到OpenAI后,等待多久判定为超时
|
36 |
TIMEOUT_SECONDS = 25
|
37 |
|
|
|
50 |
# 设置并行使用的线程数
|
51 |
CONCURRENT_COUNT = 100
|
52 |
|
53 |
+
# 设置用户名和密码(相关功能不稳定,与gradio版本和网络都相关,如果本地使用不建议加这个)
|
54 |
+
# [("username", "password"), ("username2", "password2"), ...]
|
55 |
+
AUTHENTICATION = []
|
core_functional.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 'primary' 颜色对应 theme.py 中的 primary_hue
|
2 |
+
# 'secondary' 颜色对应 theme.py 中的 neutral_hue
|
3 |
+
# 'stop' 颜色对应 theme.py 中的 color_er
|
4 |
+
# 默认按钮颜色是 secondary
|
5 |
+
from toolbox import clear_line_break
|
6 |
+
|
7 |
+
|
8 |
+
def get_core_functions():
|
9 |
+
return {
|
10 |
+
"英语学术润色": {
|
11 |
+
# 前言
|
12 |
+
"Prefix": r"Below is a paragraph from an academic paper. Polish the writing to meet the academic style, " +
|
13 |
+
r"improve the spelling, grammar, clarity, concision and overall readability. When necessary, rewrite the whole sentence. " +
|
14 |
+
r"Furthermore, list all modification and explain the reasons to do so in markdown table." + "\n\n",
|
15 |
+
# 后语
|
16 |
+
"Suffix": r"",
|
17 |
+
"Color": r"secondary", # 按钮颜色
|
18 |
+
},
|
19 |
+
"中文学术润色": {
|
20 |
+
"Prefix": r"作为一名中文学术论文写作改进助理,你的任务是改进所提供文本的拼写、语法、清晰、简洁和整体可读性," +
|
21 |
+
r"同时分解长句,减少重复,并提供改进建议。请只提供文本的更正版本,避免包括解释。请编辑以下文本" + "\n\n",
|
22 |
+
"Suffix": r"",
|
23 |
+
},
|
24 |
+
"查找语法错误": {
|
25 |
+
"Prefix": r"Can you help me ensure that the grammar and the spelling is correct? " +
|
26 |
+
r"Do not try to polish the text, if no mistake is found, tell me that this paragraph is good." +
|
27 |
+
r"If you find grammar or spelling mistakes, please list mistakes you find in a two-column markdown table, " +
|
28 |
+
r"put the original text the first column, " +
|
29 |
+
r"put the corrected text in the second column and highlight the key words you fixed.""\n"
|
30 |
+
r"Example:""\n"
|
31 |
+
r"Paragraph: How is you? Do you knows what is it?""\n"
|
32 |
+
r"| Original sentence | Corrected sentence |""\n"
|
33 |
+
r"| :--- | :--- |""\n"
|
34 |
+
r"| How **is** you? | How **are** you? |""\n"
|
35 |
+
r"| Do you **knows** what **is** **it**? | Do you **know** what **it** **is** ? |""\n"
|
36 |
+
r"Below is a paragraph from an academic paper. "
|
37 |
+
r"You need to report all grammar and spelling mistakes as the example before."
|
38 |
+
+ "\n\n",
|
39 |
+
"Suffix": r"",
|
40 |
+
"PreProcess": clear_line_break, # 预处理:清除换行符
|
41 |
+
},
|
42 |
+
"中译英": {
|
43 |
+
"Prefix": r"Please translate following sentence to English:" + "\n\n",
|
44 |
+
"Suffix": r"",
|
45 |
+
},
|
46 |
+
"学术中英互译": {
|
47 |
+
"Prefix": r"I want you to act as a scientific English-Chinese translator, " +
|
48 |
+
r"I will provide you with some paragraphs in one language " +
|
49 |
+
r"and your task is to accurately and academically translate the paragraphs only into the other language. " +
|
50 |
+
r"Do not repeat the original provided paragraphs after translation. " +
|
51 |
+
r"You should use artificial intelligence tools, " +
|
52 |
+
r"such as natural language processing, and rhetorical knowledge " +
|
53 |
+
r"and experience about effective writing techniques to reply. " +
|
54 |
+
r"I'll give you my paragraphs as follows, tell me what language it is written in, and then translate:" + "\n\n",
|
55 |
+
"Suffix": "",
|
56 |
+
"Color": "secondary",
|
57 |
+
},
|
58 |
+
"英译中": {
|
59 |
+
"Prefix": r"请翻译成中文:" + "\n\n",
|
60 |
+
"Suffix": r"",
|
61 |
+
},
|
62 |
+
"找图片": {
|
63 |
+
"Prefix": r"我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL," +
|
64 |
+
r"然后请使用Markdown格式封装,并且不要有反斜线,不要用代码块。现在,请按以下描述给我发送图片:" + "\n\n",
|
65 |
+
"Suffix": r"",
|
66 |
+
},
|
67 |
+
"解释代码": {
|
68 |
+
"Prefix": r"请解释以下代码:" + "\n```\n",
|
69 |
+
"Suffix": "\n```\n",
|
70 |
+
},
|
71 |
+
}
|
crazy_functional.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from toolbox import HotReload # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效
|
2 |
+
|
3 |
+
|
4 |
+
def get_crazy_functions():
|
5 |
+
###################### 第一组插件 ###########################
|
6 |
+
# [第一组插件]: 最早期编写的项目插件和一些demo
|
7 |
+
from crazy_functions.读文章写摘要 import 读文章写摘要
|
8 |
+
from crazy_functions.生成函数注释 import 批量生成函数注释
|
9 |
+
from crazy_functions.解析项目源代码 import 解析项目本身
|
10 |
+
from crazy_functions.解析项目源代码 import 解析一个Python项目
|
11 |
+
from crazy_functions.解析项目源代码 import 解析一个C项目的头文件
|
12 |
+
from crazy_functions.解析项目源代码 import 解析一个C项目
|
13 |
+
from crazy_functions.解析项目源代码 import 解析一个Golang项目
|
14 |
+
from crazy_functions.解析项目源代码 import 解析一个Java项目
|
15 |
+
from crazy_functions.解析项目源代码 import 解析一个Rect项目
|
16 |
+
from crazy_functions.高级功能函数模板 import 高阶功能模板函数
|
17 |
+
from crazy_functions.代码重写为全英文_多线程 import 全项目切换英文
|
18 |
+
|
19 |
+
function_plugins = {
|
20 |
+
"请解析并解构此项目本身(源码自译解)": {
|
21 |
+
"AsButton": False, # 加入下拉菜单中
|
22 |
+
"Function": HotReload(解析项目本身)
|
23 |
+
},
|
24 |
+
"解析整个Python项目": {
|
25 |
+
"Color": "stop", # 按钮颜色
|
26 |
+
"Function": HotReload(解析一个Python项目)
|
27 |
+
},
|
28 |
+
"解析整个C++项目头文件": {
|
29 |
+
"Color": "stop", # 按钮颜色
|
30 |
+
"Function": HotReload(解析一个C项目的头文件)
|
31 |
+
},
|
32 |
+
"解析整个C++项目(.cpp/.h)": {
|
33 |
+
"Color": "stop", # 按钮颜色
|
34 |
+
"AsButton": False, # 加入下拉菜单中
|
35 |
+
"Function": HotReload(解析一个C项目)
|
36 |
+
},
|
37 |
+
"解析整个Go项目": {
|
38 |
+
"Color": "stop", # 按钮颜色
|
39 |
+
"AsButton": False, # 加入下拉菜单中
|
40 |
+
"Function": HotReload(解析一个Golang项目)
|
41 |
+
},
|
42 |
+
"解析整个Java项目": {
|
43 |
+
"Color": "stop", # 按钮颜色
|
44 |
+
"AsButton": False, # 加入下拉菜单中
|
45 |
+
"Function": HotReload(解析一个Java项目)
|
46 |
+
},
|
47 |
+
"解析整个React项目": {
|
48 |
+
"Color": "stop", # 按钮颜色
|
49 |
+
"AsButton": False, # 加入下拉菜单中
|
50 |
+
"Function": HotReload(解析一个Rect项目)
|
51 |
+
},
|
52 |
+
"读Tex论文写摘要": {
|
53 |
+
"Color": "stop", # 按钮颜色
|
54 |
+
"Function": HotReload(读文章写摘要)
|
55 |
+
},
|
56 |
+
"批量生成函数注释": {
|
57 |
+
"Color": "stop", # 按钮颜色
|
58 |
+
"Function": HotReload(批量生成函数注释)
|
59 |
+
},
|
60 |
+
"[多线程demo] 把本项目源代码切换成全英文": {
|
61 |
+
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
|
62 |
+
"Function": HotReload(全项目切换英文)
|
63 |
+
},
|
64 |
+
"[函数插件模板demo] 历史上的今天": {
|
65 |
+
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
|
66 |
+
"Function": HotReload(高阶功能模板函数)
|
67 |
+
},
|
68 |
+
|
69 |
+
}
|
70 |
+
###################### 第二组插件 ###########################
|
71 |
+
# [第二组插件]: 经过充分测试,但功能上距离达到完美状态还差一点点
|
72 |
+
from crazy_functions.批量总结PDF文档 import 批量总结PDF文档
|
73 |
+
from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
|
74 |
+
from crazy_functions.总结word文档 import 总结word文档
|
75 |
+
from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档
|
76 |
+
from crazy_functions.谷歌检索小助手 import 谷歌检索小助手
|
77 |
+
from crazy_functions.理解PDF文档内容 import 理解PDF文档内容
|
78 |
+
from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入
|
79 |
+
|
80 |
+
function_plugins.update({
|
81 |
+
"批量翻译PDF文档(多线程)": {
|
82 |
+
"Color": "stop",
|
83 |
+
"AsButton": True, # 加入下拉菜单中
|
84 |
+
"Function": HotReload(批量翻译PDF文档)
|
85 |
+
},
|
86 |
+
"[仅供开发调试] 批量总结PDF文档": {
|
87 |
+
"Color": "stop",
|
88 |
+
"AsButton": False, # 加入下拉菜单中
|
89 |
+
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
|
90 |
+
"Function": HotReload(批量总结PDF文档)
|
91 |
+
},
|
92 |
+
"[仅供开发调试] 批量总结PDF文档pdfminer": {
|
93 |
+
"Color": "stop",
|
94 |
+
"AsButton": False, # 加入下拉菜单中
|
95 |
+
"Function": HotReload(批量总结PDF文档pdfminer)
|
96 |
+
},
|
97 |
+
"谷歌学术检索助手(输入谷歌学术搜索页url)": {
|
98 |
+
"Color": "stop",
|
99 |
+
"AsButton": False, # 加入下拉菜单中
|
100 |
+
"Function": HotReload(谷歌检索小助手)
|
101 |
+
},
|
102 |
+
"批量总结Word文档": {
|
103 |
+
"Color": "stop",
|
104 |
+
"Function": HotReload(总结word文档)
|
105 |
+
},
|
106 |
+
"理解PDF文档内容(Tk文件选择接口,仅本地)": {
|
107 |
+
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
|
108 |
+
"AsButton": False, # 加入下拉菜单中
|
109 |
+
"Function": HotReload(理解PDF文档内容)
|
110 |
+
},
|
111 |
+
"理解PDF文档内容(通用接口,读取文件输入区)": {
|
112 |
+
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
|
113 |
+
"Color": "stop",
|
114 |
+
"AsButton": False, # 加入下拉菜单中
|
115 |
+
"Function": HotReload(理解PDF文档内容标准文件输入)
|
116 |
+
},
|
117 |
+
})
|
118 |
+
|
119 |
+
###################### 第三组插件 ###########################
|
120 |
+
# [第三组插件]: 尚未充分测试的函数插件,放在这里
|
121 |
+
try:
|
122 |
+
from crazy_functions.下载arxiv论文翻译摘要 import 下载arxiv论文并翻译摘要
|
123 |
+
function_plugins.update({
|
124 |
+
"一键下载arxiv论文并翻译摘要(先在input输入编号,如1812.10695)": {
|
125 |
+
"Color": "stop",
|
126 |
+
"AsButton": False, # 加入下拉菜单中
|
127 |
+
"Function": HotReload(下载arxiv论文并翻译摘要)
|
128 |
+
}
|
129 |
+
})
|
130 |
+
|
131 |
+
except Exception as err:
|
132 |
+
print(f'[下载arxiv论文并翻译摘要] 插件导入失败 {str(err)}')
|
133 |
+
|
134 |
+
###################### 第n组插件 ###########################
|
135 |
+
return function_plugins
|
crazy_functions/__init__.py
ADDED
File without changes
|
crazy_functions/crazy_utils.py
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import traceback
|
2 |
+
|
3 |
+
def request_gpt_model_in_new_thread_with_ui_alive(inputs, inputs_show_user, top_p, temperature, chatbot, history, sys_prompt, refresh_interval=0.2):
|
4 |
+
import time
|
5 |
+
from concurrent.futures import ThreadPoolExecutor
|
6 |
+
from request_llm.bridge_chatgpt import predict_no_ui_long_connection
|
7 |
+
# 用户反馈
|
8 |
+
chatbot.append([inputs_show_user, ""])
|
9 |
+
msg = '正常'
|
10 |
+
yield chatbot, [], msg
|
11 |
+
executor = ThreadPoolExecutor(max_workers=16)
|
12 |
+
mutable = ["", time.time()]
|
13 |
+
future = executor.submit(lambda:
|
14 |
+
predict_no_ui_long_connection(
|
15 |
+
inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable)
|
16 |
+
)
|
17 |
+
while True:
|
18 |
+
# yield一次以刷新前端页面
|
19 |
+
time.sleep(refresh_interval)
|
20 |
+
# “喂狗”(看门狗)
|
21 |
+
mutable[1] = time.time()
|
22 |
+
if future.done():
|
23 |
+
break
|
24 |
+
chatbot[-1] = [chatbot[-1][0], mutable[0]]
|
25 |
+
msg = "正常"
|
26 |
+
yield chatbot, [], msg
|
27 |
+
return future.result()
|
28 |
+
|
29 |
+
|
30 |
+
def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inputs_array, inputs_show_user_array, top_p, temperature, chatbot, history_array, sys_prompt_array, refresh_interval=0.2, max_workers=10, scroller_max_len=30):
|
31 |
+
import time
|
32 |
+
from concurrent.futures import ThreadPoolExecutor
|
33 |
+
from request_llm.bridge_chatgpt import predict_no_ui_long_connection
|
34 |
+
assert len(inputs_array) == len(history_array)
|
35 |
+
assert len(inputs_array) == len(sys_prompt_array)
|
36 |
+
executor = ThreadPoolExecutor(max_workers=max_workers)
|
37 |
+
n_frag = len(inputs_array)
|
38 |
+
# 用户反馈
|
39 |
+
chatbot.append(["请开始多线程操作。", ""])
|
40 |
+
msg = '正常'
|
41 |
+
yield chatbot, [], msg
|
42 |
+
# 异步原子
|
43 |
+
mutable = [["", time.time()] for _ in range(n_frag)]
|
44 |
+
|
45 |
+
def _req_gpt(index, inputs, history, sys_prompt):
|
46 |
+
try:
|
47 |
+
gpt_say = predict_no_ui_long_connection(
|
48 |
+
inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable[index]
|
49 |
+
)
|
50 |
+
except:
|
51 |
+
# 收拾残局
|
52 |
+
tb_str = '```\n' + traceback.format_exc() + '```'
|
53 |
+
gpt_say = f"[Local Message] 线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
54 |
+
if len(mutable[index][0]) > 0:
|
55 |
+
gpt_say += "此线程失败前收到的回答:" + mutable[index][0]
|
56 |
+
return gpt_say
|
57 |
+
# 异步任务开始
|
58 |
+
futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(
|
59 |
+
range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
|
60 |
+
cnt = 0
|
61 |
+
while True:
|
62 |
+
# yield一次以刷新前端页面
|
63 |
+
time.sleep(refresh_interval)
|
64 |
+
cnt += 1
|
65 |
+
worker_done = [h.done() for h in futures]
|
66 |
+
if all(worker_done):
|
67 |
+
executor.shutdown()
|
68 |
+
break
|
69 |
+
# 更好的UI视觉效果
|
70 |
+
observe_win = []
|
71 |
+
# 每个线程都要“喂狗”(看门狗)
|
72 |
+
for thread_index, _ in enumerate(worker_done):
|
73 |
+
mutable[thread_index][1] = time.time()
|
74 |
+
# 在前端打印些好玩的东西
|
75 |
+
for thread_index, _ in enumerate(worker_done):
|
76 |
+
print_something_really_funny = "[ ...`"+mutable[thread_index][0][-scroller_max_len:].\
|
77 |
+
replace('\n', '').replace('```', '...').replace(
|
78 |
+
' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
|
79 |
+
observe_win.append(print_something_really_funny)
|
80 |
+
stat_str = ''.join([f'执行中: {obs}\n\n' if not done else '已完成\n\n' for done, obs in zip(
|
81 |
+
worker_done, observe_win)])
|
82 |
+
chatbot[-1] = [chatbot[-1][0],
|
83 |
+
f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
|
84 |
+
msg = "正常"
|
85 |
+
yield chatbot, [], msg
|
86 |
+
# 异步任务结束
|
87 |
+
gpt_response_collection = []
|
88 |
+
for inputs_show_user, f in zip(inputs_show_user_array, futures):
|
89 |
+
gpt_res = f.result()
|
90 |
+
gpt_response_collection.extend([inputs_show_user, gpt_res])
|
91 |
+
return gpt_response_collection
|
92 |
+
|
93 |
+
|
94 |
+
def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
|
95 |
+
def cut(txt_tocut, must_break_at_empty_line): # 递归
|
96 |
+
if get_token_fn(txt_tocut) <= limit:
|
97 |
+
return [txt_tocut]
|
98 |
+
else:
|
99 |
+
lines = txt_tocut.split('\n')
|
100 |
+
estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
|
101 |
+
estimated_line_cut = int(estimated_line_cut)
|
102 |
+
for cnt in reversed(range(estimated_line_cut)):
|
103 |
+
if must_break_at_empty_line:
|
104 |
+
if lines[cnt] != "":
|
105 |
+
continue
|
106 |
+
print(cnt)
|
107 |
+
prev = "\n".join(lines[:cnt])
|
108 |
+
post = "\n".join(lines[cnt:])
|
109 |
+
if get_token_fn(prev) < limit:
|
110 |
+
break
|
111 |
+
if cnt == 0:
|
112 |
+
print('what the fuck ?')
|
113 |
+
raise RuntimeError("存在一行极长的文本!")
|
114 |
+
# print(len(post))
|
115 |
+
# 列表递归接龙
|
116 |
+
result = [prev]
|
117 |
+
result.extend(cut(post, must_break_at_empty_line))
|
118 |
+
return result
|
119 |
+
try:
|
120 |
+
return cut(txt, must_break_at_empty_line=True)
|
121 |
+
except RuntimeError:
|
122 |
+
return cut(txt, must_break_at_empty_line=False)
|
123 |
+
|
124 |
+
|
125 |
+
def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
|
126 |
+
def cut(txt_tocut, must_break_at_empty_line): # 递归
|
127 |
+
if get_token_fn(txt_tocut) <= limit:
|
128 |
+
return [txt_tocut]
|
129 |
+
else:
|
130 |
+
lines = txt_tocut.split('\n')
|
131 |
+
estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
|
132 |
+
estimated_line_cut = int(estimated_line_cut)
|
133 |
+
cnt = 0
|
134 |
+
for cnt in reversed(range(estimated_line_cut)):
|
135 |
+
if must_break_at_empty_line:
|
136 |
+
if lines[cnt] != "":
|
137 |
+
continue
|
138 |
+
print(cnt)
|
139 |
+
prev = "\n".join(lines[:cnt])
|
140 |
+
post = "\n".join(lines[cnt:])
|
141 |
+
if get_token_fn(prev) < limit:
|
142 |
+
break
|
143 |
+
if cnt == 0:
|
144 |
+
# print('what the fuck ? 存在一行极长的文本!')
|
145 |
+
raise RuntimeError("存在一行极长的文本!")
|
146 |
+
# print(len(post))
|
147 |
+
# 列表递归接龙
|
148 |
+
result = [prev]
|
149 |
+
result.extend(cut(post, must_break_at_empty_line))
|
150 |
+
return result
|
151 |
+
try:
|
152 |
+
return cut(txt, must_break_at_empty_line=True)
|
153 |
+
except RuntimeError:
|
154 |
+
try:
|
155 |
+
return cut(txt, must_break_at_empty_line=False)
|
156 |
+
except RuntimeError:
|
157 |
+
# 这个中文的句号是故意的,作为一个标识而存在
|
158 |
+
res = cut(txt.replace('.', '。\n'), must_break_at_empty_line=False)
|
159 |
+
return [r.replace('。\n', '.') for r in res]
|
crazy_functions/下载arxiv论文翻译摘要.py
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from request_llm.bridge_chatgpt import predict_no_ui
|
2 |
+
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down, get_conf
|
3 |
+
import re, requests, unicodedata, os
|
4 |
+
|
5 |
+
def download_arxiv_(url_pdf):
|
6 |
+
if 'arxiv.org' not in url_pdf:
|
7 |
+
if ('.' in url_pdf) and ('/' not in url_pdf):
|
8 |
+
new_url = 'https://arxiv.org/abs/'+url_pdf
|
9 |
+
print('下载编号:', url_pdf, '自动定位:', new_url)
|
10 |
+
# download_arxiv_(new_url)
|
11 |
+
return download_arxiv_(new_url)
|
12 |
+
else:
|
13 |
+
print('不能识别的URL!')
|
14 |
+
return None
|
15 |
+
if 'abs' in url_pdf:
|
16 |
+
url_pdf = url_pdf.replace('abs', 'pdf')
|
17 |
+
url_pdf = url_pdf + '.pdf'
|
18 |
+
|
19 |
+
url_abs = url_pdf.replace('.pdf', '').replace('pdf', 'abs')
|
20 |
+
title, other_info = get_name(_url_=url_abs)
|
21 |
+
|
22 |
+
paper_id = title.split()[0] # '[1712.00559]'
|
23 |
+
if '2' in other_info['year']:
|
24 |
+
title = other_info['year'] + ' ' + title
|
25 |
+
|
26 |
+
known_conf = ['NeurIPS', 'NIPS', 'Nature', 'Science', 'ICLR', 'AAAI']
|
27 |
+
for k in known_conf:
|
28 |
+
if k in other_info['comment']:
|
29 |
+
title = k + ' ' + title
|
30 |
+
|
31 |
+
download_dir = './gpt_log/arxiv/'
|
32 |
+
os.makedirs(download_dir, exist_ok=True)
|
33 |
+
|
34 |
+
title_str = title.replace('?', '?')\
|
35 |
+
.replace(':', ':')\
|
36 |
+
.replace('\"', '“')\
|
37 |
+
.replace('\n', '')\
|
38 |
+
.replace(' ', ' ')\
|
39 |
+
.replace(' ', ' ')
|
40 |
+
|
41 |
+
requests_pdf_url = url_pdf
|
42 |
+
file_path = download_dir+title_str
|
43 |
+
# if os.path.exists(file_path):
|
44 |
+
# print('返回缓存文件')
|
45 |
+
# return './gpt_log/arxiv/'+title_str
|
46 |
+
|
47 |
+
print('下载中')
|
48 |
+
proxies, = get_conf('proxies')
|
49 |
+
r = requests.get(requests_pdf_url, proxies=proxies)
|
50 |
+
with open(file_path, 'wb+') as f:
|
51 |
+
f.write(r.content)
|
52 |
+
print('下载完成')
|
53 |
+
|
54 |
+
# print('输出下载命令:','aria2c -o \"%s\" %s'%(title_str,url_pdf))
|
55 |
+
# subprocess.call('aria2c --all-proxy=\"172.18.116.150:11084\" -o \"%s\" %s'%(download_dir+title_str,url_pdf), shell=True)
|
56 |
+
|
57 |
+
x = "%s %s %s.bib" % (paper_id, other_info['year'], other_info['authors'])
|
58 |
+
x = x.replace('?', '?')\
|
59 |
+
.replace(':', ':')\
|
60 |
+
.replace('\"', '“')\
|
61 |
+
.replace('\n', '')\
|
62 |
+
.replace(' ', ' ')\
|
63 |
+
.replace(' ', ' ')
|
64 |
+
return './gpt_log/arxiv/'+title_str, other_info
|
65 |
+
|
66 |
+
|
67 |
+
def get_name(_url_):
|
68 |
+
import os
|
69 |
+
from bs4 import BeautifulSoup
|
70 |
+
print('正在获取文献名!')
|
71 |
+
print(_url_)
|
72 |
+
|
73 |
+
# arxiv_recall = {}
|
74 |
+
# if os.path.exists('./arxiv_recall.pkl'):
|
75 |
+
# with open('./arxiv_recall.pkl', 'rb') as f:
|
76 |
+
# arxiv_recall = pickle.load(f)
|
77 |
+
|
78 |
+
# if _url_ in arxiv_recall:
|
79 |
+
# print('在缓存中')
|
80 |
+
# return arxiv_recall[_url_]
|
81 |
+
|
82 |
+
proxies, = get_conf('proxies')
|
83 |
+
res = requests.get(_url_, proxies=proxies)
|
84 |
+
|
85 |
+
bs = BeautifulSoup(res.text, 'html.parser')
|
86 |
+
other_details = {}
|
87 |
+
|
88 |
+
# get year
|
89 |
+
try:
|
90 |
+
year = bs.find_all(class_='dateline')[0].text
|
91 |
+
year = re.search(r'(\d{4})', year, re.M | re.I).group(1)
|
92 |
+
other_details['year'] = year
|
93 |
+
abstract = bs.find_all(class_='abstract mathjax')[0].text
|
94 |
+
other_details['abstract'] = abstract
|
95 |
+
except:
|
96 |
+
other_details['year'] = ''
|
97 |
+
print('年份获取失败')
|
98 |
+
|
99 |
+
# get author
|
100 |
+
try:
|
101 |
+
authors = bs.find_all(class_='authors')[0].text
|
102 |
+
authors = authors.split('Authors:')[1]
|
103 |
+
other_details['authors'] = authors
|
104 |
+
except:
|
105 |
+
other_details['authors'] = ''
|
106 |
+
print('authors获取失败')
|
107 |
+
|
108 |
+
# get comment
|
109 |
+
try:
|
110 |
+
comment = bs.find_all(class_='metatable')[0].text
|
111 |
+
real_comment = None
|
112 |
+
for item in comment.replace('\n', ' ').split(' '):
|
113 |
+
if 'Comments' in item:
|
114 |
+
real_comment = item
|
115 |
+
if real_comment is not None:
|
116 |
+
other_details['comment'] = real_comment
|
117 |
+
else:
|
118 |
+
other_details['comment'] = ''
|
119 |
+
except:
|
120 |
+
other_details['comment'] = ''
|
121 |
+
print('年份获取失败')
|
122 |
+
|
123 |
+
title_str = BeautifulSoup(
|
124 |
+
res.text, 'html.parser').find('title').contents[0]
|
125 |
+
print('获取成功:', title_str)
|
126 |
+
# arxiv_recall[_url_] = (title_str+'.pdf', other_details)
|
127 |
+
# with open('./arxiv_recall.pkl', 'wb') as f:
|
128 |
+
# pickle.dump(arxiv_recall, f)
|
129 |
+
|
130 |
+
return title_str+'.pdf', other_details
|
131 |
+
|
132 |
+
|
133 |
+
|
134 |
+
@CatchException
|
135 |
+
def 下载arxiv论文并翻译摘要(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
136 |
+
|
137 |
+
CRAZY_FUNCTION_INFO = "下载arxiv论文并翻译摘要,函数插件作者[binary-husky]。正在提取摘要并下载PDF文档……"
|
138 |
+
import glob
|
139 |
+
import os
|
140 |
+
|
141 |
+
# 基本信息:功能、贡献者
|
142 |
+
chatbot.append(["函数插件功能?", CRAZY_FUNCTION_INFO])
|
143 |
+
yield chatbot, history, '正常'
|
144 |
+
|
145 |
+
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
146 |
+
try:
|
147 |
+
import pdfminer, bs4
|
148 |
+
except:
|
149 |
+
report_execption(chatbot, history,
|
150 |
+
a = f"解析项目: {txt}",
|
151 |
+
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pdfminer beautifulsoup4```。")
|
152 |
+
yield chatbot, history, '正常'
|
153 |
+
return
|
154 |
+
|
155 |
+
# 清空历史,以免输入溢出
|
156 |
+
history = []
|
157 |
+
|
158 |
+
# 提取摘要,下载PDF文档
|
159 |
+
try:
|
160 |
+
pdf_path, info = download_arxiv_(txt)
|
161 |
+
except:
|
162 |
+
report_execption(chatbot, history,
|
163 |
+
a = f"解析项目: {txt}",
|
164 |
+
b = f"下载pdf文件未成功")
|
165 |
+
yield chatbot, history, '正常'
|
166 |
+
return
|
167 |
+
|
168 |
+
# 翻译摘要等
|
169 |
+
i_say = f"请你阅读以下学术论文相关的材料,提取摘要,翻译为中文。材料如下:{str(info)}"
|
170 |
+
i_say_show_user = f'请你阅读以下学术论文相关的材料,提取摘要,翻译为中文。论文:{pdf_path}'
|
171 |
+
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
|
172 |
+
yield chatbot, history, '正常'
|
173 |
+
msg = '正常'
|
174 |
+
# ** gpt request **
|
175 |
+
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时
|
176 |
+
chatbot[-1] = (i_say_show_user, gpt_say)
|
177 |
+
history.append(i_say_show_user); history.append(gpt_say)
|
178 |
+
yield chatbot, history, msg
|
179 |
+
# 写入文件
|
180 |
+
import shutil
|
181 |
+
# 重置文件的创建时间
|
182 |
+
shutil.copyfile(pdf_path, f'./gpt_log/{os.path.basename(pdf_path)}'); os.remove(pdf_path)
|
183 |
+
res = write_results_to_file(history)
|
184 |
+
chatbot.append(("完成了吗?", res + "\n\nPDF文件也已经下载"))
|
185 |
+
yield chatbot, history, msg
|
186 |
+
|
crazy_functions/代码重写为全英文_多线程.py
CHANGED
@@ -1,41 +1,97 @@
|
|
1 |
import threading
|
2 |
-
from
|
3 |
-
from toolbox import CatchException, write_results_to_file
|
|
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
|
7 |
@CatchException
|
8 |
def 全项目切换英文(txt, top_p, temperature, chatbot, history, sys_prompt, WEB_PORT):
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
os.makedirs('gpt_log/generated_english_version', exist_ok=True)
|
13 |
os.makedirs('gpt_log/generated_english_version/crazy_functions', exist_ok=True)
|
14 |
file_manifest = [f for f in glob.glob('./*.py') if ('test_project' not in f) and ('gpt_log' not in f)] + \
|
15 |
[f for f in glob.glob('./crazy_functions/*.py') if ('test_project' not in f) and ('gpt_log' not in f)]
|
|
|
16 |
i_say_show_user_buffer = []
|
17 |
|
18 |
-
#
|
19 |
for index, fp in enumerate(file_manifest):
|
20 |
# if 'test_project' in fp: continue
|
21 |
with open(fp, 'r', encoding='utf-8') as f:
|
22 |
file_content = f.read()
|
23 |
-
i_say_show_user =f'[{index}/{len(file_manifest)}]
|
24 |
i_say_show_user_buffer.append(i_say_show_user)
|
25 |
chatbot.append((i_say_show_user, "[Local Message] 等待多线程操作,中间过程不予显示."))
|
26 |
yield chatbot, history, '正常'
|
27 |
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
mutable_return = [None for _ in file_manifest]
|
|
|
30 |
def thread_worker(fp,index):
|
|
|
|
|
|
|
31 |
with open(fp, 'r', encoding='utf-8') as f:
|
32 |
file_content = f.read()
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
-
#
|
39 |
handles = [threading.Thread(target=thread_worker, args=(fp,index)) for index, fp in enumerate(file_manifest)]
|
40 |
for h in handles:
|
41 |
h.daemon = True
|
@@ -43,19 +99,23 @@ def 全项目切换英文(txt, top_p, temperature, chatbot, history, sys_prompt,
|
|
43 |
chatbot.append(('开始了吗?', f'多线程操作已经开始'))
|
44 |
yield chatbot, history, '正常'
|
45 |
|
46 |
-
#
|
47 |
cnt = 0
|
48 |
while True:
|
49 |
-
|
|
|
50 |
th_alive = [h.is_alive() for h in handles]
|
51 |
if not any(th_alive): break
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
56 |
yield chatbot, history, '正常'
|
57 |
|
58 |
-
#
|
59 |
for index, h in enumerate(handles):
|
60 |
h.join() # 这里其实不需要join了,肯定已经都结束了
|
61 |
fp = file_manifest[index]
|
@@ -63,13 +123,17 @@ def 全项目切换英文(txt, top_p, temperature, chatbot, history, sys_prompt,
|
|
63 |
i_say_show_user = i_say_show_user_buffer[index]
|
64 |
|
65 |
where_to_relocate = f'gpt_log/generated_english_version/{fp}'
|
66 |
-
|
|
|
|
|
|
|
|
|
67 |
chatbot.append((i_say_show_user, f'[Local Message] 已完成{os.path.abspath(fp)}的转化,\n\n存入{os.path.abspath(where_to_relocate)}'))
|
68 |
history.append(i_say_show_user); history.append(gpt_say)
|
69 |
yield chatbot, history, '正常'
|
70 |
time.sleep(1)
|
71 |
|
72 |
-
#
|
73 |
res = write_results_to_file(history)
|
74 |
chatbot.append(("生成一份任务执行报告", res))
|
75 |
yield chatbot, history, '正常'
|
|
|
1 |
import threading
|
2 |
+
from request_llm.bridge_chatgpt import predict_no_ui_long_connection
|
3 |
+
from toolbox import CatchException, write_results_to_file, report_execption
|
4 |
+
from .crazy_utils import breakdown_txt_to_satisfy_token_limit
|
5 |
|
6 |
+
def extract_code_block_carefully(txt):
|
7 |
+
splitted = txt.split('```')
|
8 |
+
n_code_block_seg = len(splitted) - 1
|
9 |
+
if n_code_block_seg <= 1: return txt
|
10 |
+
# 剩下的情况都开头除去 ``` 结尾除去一次 ```
|
11 |
+
txt_out = '```'.join(splitted[1:-1])
|
12 |
+
return txt_out
|
13 |
+
|
14 |
+
|
15 |
+
|
16 |
+
def break_txt_into_half_at_some_linebreak(txt):
|
17 |
+
lines = txt.split('\n')
|
18 |
+
n_lines = len(lines)
|
19 |
+
pre = lines[:(n_lines//2)]
|
20 |
+
post = lines[(n_lines//2):]
|
21 |
+
return "\n".join(pre), "\n".join(post)
|
22 |
|
23 |
|
24 |
@CatchException
|
25 |
def 全项目切换英文(txt, top_p, temperature, chatbot, history, sys_prompt, WEB_PORT):
|
26 |
+
# 第1步:清空历史,以免输入溢出
|
27 |
+
history = []
|
28 |
+
|
29 |
+
# 第2步:尝试导入依赖,如果缺少依赖,则给出安装建议
|
30 |
+
try:
|
31 |
+
import openai, transformers
|
32 |
+
except:
|
33 |
+
report_execption(chatbot, history,
|
34 |
+
a = f"解析项目: {txt}",
|
35 |
+
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade openai transformers```。")
|
36 |
+
yield chatbot, history, '正常'
|
37 |
+
return
|
38 |
+
|
39 |
+
# 第3步:集合文件
|
40 |
+
import time, glob, os, shutil, re, openai
|
41 |
os.makedirs('gpt_log/generated_english_version', exist_ok=True)
|
42 |
os.makedirs('gpt_log/generated_english_version/crazy_functions', exist_ok=True)
|
43 |
file_manifest = [f for f in glob.glob('./*.py') if ('test_project' not in f) and ('gpt_log' not in f)] + \
|
44 |
[f for f in glob.glob('./crazy_functions/*.py') if ('test_project' not in f) and ('gpt_log' not in f)]
|
45 |
+
# file_manifest = ['./toolbox.py']
|
46 |
i_say_show_user_buffer = []
|
47 |
|
48 |
+
# 第4步:随便显示点什么防止卡顿的感觉
|
49 |
for index, fp in enumerate(file_manifest):
|
50 |
# if 'test_project' in fp: continue
|
51 |
with open(fp, 'r', encoding='utf-8') as f:
|
52 |
file_content = f.read()
|
53 |
+
i_say_show_user =f'[{index}/{len(file_manifest)}] 接下来请将以下代码中包含的所有中文转化为英文,只输出转化后的英文代码,请用代码块输出代码: {os.path.abspath(fp)}'
|
54 |
i_say_show_user_buffer.append(i_say_show_user)
|
55 |
chatbot.append((i_say_show_user, "[Local Message] 等待多线程操作,中间过程不予显示."))
|
56 |
yield chatbot, history, '正常'
|
57 |
|
58 |
+
|
59 |
+
# 第5步:Token限制下的截断与处理
|
60 |
+
MAX_TOKEN = 3000
|
61 |
+
from transformers import GPT2TokenizerFast
|
62 |
+
print('加载tokenizer中')
|
63 |
+
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
|
64 |
+
get_token_fn = lambda txt: len(tokenizer(txt)["input_ids"])
|
65 |
+
print('加载tokenizer结束')
|
66 |
+
|
67 |
+
|
68 |
+
# 第6步:任务函数
|
69 |
mutable_return = [None for _ in file_manifest]
|
70 |
+
observe_window = [[""] for _ in file_manifest]
|
71 |
def thread_worker(fp,index):
|
72 |
+
if index > 10:
|
73 |
+
time.sleep(60)
|
74 |
+
print('Openai 限制免费用户每分钟20次请求,降低请求频率中。')
|
75 |
with open(fp, 'r', encoding='utf-8') as f:
|
76 |
file_content = f.read()
|
77 |
+
i_say_template = lambda fp, file_content: f'接下来请将以下代码中包含的所有中文转化为英文,只输出代码,文件名是{fp},文件代码是 ```{file_content}```'
|
78 |
+
try:
|
79 |
+
gpt_say = ""
|
80 |
+
# 分解代码文件
|
81 |
+
file_content_breakdown = breakdown_txt_to_satisfy_token_limit(file_content, get_token_fn, MAX_TOKEN)
|
82 |
+
for file_content_partial in file_content_breakdown:
|
83 |
+
i_say = i_say_template(fp, file_content_partial)
|
84 |
+
# # ** gpt request **
|
85 |
+
gpt_say_partial = predict_no_ui_long_connection(inputs=i_say, top_p=top_p, temperature=temperature, history=[], sys_prompt=sys_prompt, observe_window=observe_window[index])
|
86 |
+
gpt_say_partial = extract_code_block_carefully(gpt_say_partial)
|
87 |
+
gpt_say += gpt_say_partial
|
88 |
+
mutable_return[index] = gpt_say
|
89 |
+
except ConnectionAbortedError as token_exceed_err:
|
90 |
+
print('至少一个线程任务Token溢出而失败', e)
|
91 |
+
except Exception as e:
|
92 |
+
print('至少一个线程任务意外失败', e)
|
93 |
|
94 |
+
# 第7步:所有线程同时开始执行任务函数
|
95 |
handles = [threading.Thread(target=thread_worker, args=(fp,index)) for index, fp in enumerate(file_manifest)]
|
96 |
for h in handles:
|
97 |
h.daemon = True
|
|
|
99 |
chatbot.append(('开始了吗?', f'多线程操作已经开始'))
|
100 |
yield chatbot, history, '正常'
|
101 |
|
102 |
+
# 第8步:循环轮询各个线程是否执行完毕
|
103 |
cnt = 0
|
104 |
while True:
|
105 |
+
cnt += 1
|
106 |
+
time.sleep(0.2)
|
107 |
th_alive = [h.is_alive() for h in handles]
|
108 |
if not any(th_alive): break
|
109 |
+
# 更好的UI视觉效果
|
110 |
+
observe_win = []
|
111 |
+
for thread_index, alive in enumerate(th_alive):
|
112 |
+
observe_win.append("[ ..."+observe_window[thread_index][0][-60:].replace('\n','').replace('```','...').replace(' ','.').replace('<br/>','.....').replace('$','.')+"... ]")
|
113 |
+
stat = [f'执行中: {obs}\n\n' if alive else '已完成\n\n' for alive, obs in zip(th_alive, observe_win)]
|
114 |
+
stat_str = ''.join(stat)
|
115 |
+
chatbot[-1] = (chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt%10+1)))
|
116 |
yield chatbot, history, '正常'
|
117 |
|
118 |
+
# 第9步:把结果写入文件
|
119 |
for index, h in enumerate(handles):
|
120 |
h.join() # 这里其实不需要join了,肯定已经都结束了
|
121 |
fp = file_manifest[index]
|
|
|
123 |
i_say_show_user = i_say_show_user_buffer[index]
|
124 |
|
125 |
where_to_relocate = f'gpt_log/generated_english_version/{fp}'
|
126 |
+
if gpt_say is not None:
|
127 |
+
with open(where_to_relocate, 'w+', encoding='utf-8') as f:
|
128 |
+
f.write(gpt_say)
|
129 |
+
else: # 失败
|
130 |
+
shutil.copyfile(file_manifest[index], where_to_relocate)
|
131 |
chatbot.append((i_say_show_user, f'[Local Message] 已完成{os.path.abspath(fp)}的转化,\n\n存入{os.path.abspath(where_to_relocate)}'))
|
132 |
history.append(i_say_show_user); history.append(gpt_say)
|
133 |
yield chatbot, history, '正常'
|
134 |
time.sleep(1)
|
135 |
|
136 |
+
# 第10步:备份一个文件
|
137 |
res = write_results_to_file(history)
|
138 |
chatbot.append(("生成一份任务执行报告", res))
|
139 |
yield chatbot, history, '正常'
|
crazy_functions/总结word文档.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from
|
2 |
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
|
3 |
fast_debug = False
|
4 |
|
|
|
1 |
+
from request_llm.bridge_chatgpt import predict_no_ui
|
2 |
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
|
3 |
fast_debug = False
|
4 |
|
crazy_functions/批量总结PDF文档.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from
|
2 |
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
|
3 |
import re
|
4 |
import unicodedata
|
|
|
1 |
+
from request_llm.bridge_chatgpt import predict_no_ui
|
2 |
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
|
3 |
import re
|
4 |
import unicodedata
|
crazy_functions/批量总结PDF文档pdfminer.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from
|
2 |
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
|
3 |
|
4 |
fast_debug = False
|
|
|
1 |
+
from request_llm.bridge_chatgpt import predict_no_ui
|
2 |
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
|
3 |
|
4 |
fast_debug = False
|
crazy_functions/批量翻译PDF文档_多线程.py
ADDED
@@ -0,0 +1,203 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from toolbox import CatchException, report_execption, write_results_to_file
|
2 |
+
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
3 |
+
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
4 |
+
|
5 |
+
|
6 |
+
def read_and_clean_pdf_text(fp):
|
7 |
+
"""
|
8 |
+
**输入参数说明**
|
9 |
+
- `fp`:需要读取和清理文本的pdf文件路径
|
10 |
+
|
11 |
+
**输出参数说明**
|
12 |
+
- `meta_txt`:清理后的文本内容字符串
|
13 |
+
- `page_one_meta`:第一页清理后的文本内容列表
|
14 |
+
|
15 |
+
**函数功能**
|
16 |
+
读取pdf文件并清理其中的文本内容,清理规则包括:
|
17 |
+
- 提取所有块元的文本信息,并合并为一个字符串
|
18 |
+
- 去除短块(字符数小于100)并替换为回车符
|
19 |
+
- 清理多余的空行
|
20 |
+
- 合并小写字母开头的段落块并替换为空格
|
21 |
+
- 清除重复的换行
|
22 |
+
- 将每个换行符替换为两个换行符,使每个段落之间有两个换行符分隔
|
23 |
+
"""
|
24 |
+
import fitz
|
25 |
+
import re
|
26 |
+
import numpy as np
|
27 |
+
# file_content = ""
|
28 |
+
with fitz.open(fp) as doc:
|
29 |
+
meta_txt = []
|
30 |
+
meta_font = []
|
31 |
+
for index, page in enumerate(doc):
|
32 |
+
# file_content += page.get_text()
|
33 |
+
text_areas = page.get_text("dict") # 获取页面上的文本信息
|
34 |
+
|
35 |
+
# 块元提取 for each word segment with in line for each line cross-line words for each block
|
36 |
+
meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
|
37 |
+
'- ', '') for t in text_areas['blocks'] if 'lines' in t])
|
38 |
+
meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']])
|
39 |
+
for l in t['lines']]) for t in text_areas['blocks'] if 'lines' in t])
|
40 |
+
if index == 0:
|
41 |
+
page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
|
42 |
+
'- ', '') for t in text_areas['blocks'] if 'lines' in t]
|
43 |
+
|
44 |
+
def 把字符太少的块清除为回车(meta_txt):
|
45 |
+
for index, block_txt in enumerate(meta_txt):
|
46 |
+
if len(block_txt) < 100:
|
47 |
+
meta_txt[index] = '\n'
|
48 |
+
return meta_txt
|
49 |
+
meta_txt = 把字符太少的块清除为回车(meta_txt)
|
50 |
+
|
51 |
+
def 清理多余的空行(meta_txt):
|
52 |
+
for index in reversed(range(1, len(meta_txt))):
|
53 |
+
if meta_txt[index] == '\n' and meta_txt[index-1] == '\n':
|
54 |
+
meta_txt.pop(index)
|
55 |
+
return meta_txt
|
56 |
+
meta_txt = 清理多余的空行(meta_txt)
|
57 |
+
|
58 |
+
def 合并小写开头的段落块(meta_txt):
|
59 |
+
def starts_with_lowercase_word(s):
|
60 |
+
pattern = r"^[a-z]+"
|
61 |
+
match = re.match(pattern, s)
|
62 |
+
if match:
|
63 |
+
return True
|
64 |
+
else:
|
65 |
+
return False
|
66 |
+
for _ in range(100):
|
67 |
+
for index, block_txt in enumerate(meta_txt):
|
68 |
+
if starts_with_lowercase_word(block_txt):
|
69 |
+
if meta_txt[index-1] != '\n':
|
70 |
+
meta_txt[index-1] += ' '
|
71 |
+
else:
|
72 |
+
meta_txt[index-1] = ''
|
73 |
+
meta_txt[index-1] += meta_txt[index]
|
74 |
+
meta_txt[index] = '\n'
|
75 |
+
return meta_txt
|
76 |
+
meta_txt = 合并小写开头的段落块(meta_txt)
|
77 |
+
meta_txt = 清理多余的空行(meta_txt)
|
78 |
+
|
79 |
+
meta_txt = '\n'.join(meta_txt)
|
80 |
+
# 清除重复的换行
|
81 |
+
for _ in range(5):
|
82 |
+
meta_txt = meta_txt.replace('\n\n', '\n')
|
83 |
+
|
84 |
+
# 换行 -> 双换行
|
85 |
+
meta_txt = meta_txt.replace('\n', '\n\n')
|
86 |
+
|
87 |
+
return meta_txt, page_one_meta
|
88 |
+
|
89 |
+
|
90 |
+
@CatchException
|
91 |
+
def 批量翻译PDF文档(txt, top_p, temperature, chatbot, history, sys_prompt, WEB_PORT):
|
92 |
+
import glob
|
93 |
+
import os
|
94 |
+
|
95 |
+
# 基本信息:功能、贡献者
|
96 |
+
chatbot.append([
|
97 |
+
"函数插件功能?",
|
98 |
+
"批量总结PDF文档。函数插件贡献者: Binary-Husky(二进制哈士奇)"])
|
99 |
+
yield chatbot, history, '正常'
|
100 |
+
|
101 |
+
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
102 |
+
try:
|
103 |
+
import fitz
|
104 |
+
import tiktoken
|
105 |
+
except:
|
106 |
+
report_execption(chatbot, history,
|
107 |
+
a=f"解析项目: {txt}",
|
108 |
+
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf tiktoken```。")
|
109 |
+
yield chatbot, history, '正常'
|
110 |
+
return
|
111 |
+
|
112 |
+
# 清空历史,以免输入溢出
|
113 |
+
history = []
|
114 |
+
|
115 |
+
# 检测输入参数,如没有给定输入参数,直接退出
|
116 |
+
if os.path.exists(txt):
|
117 |
+
project_folder = txt
|
118 |
+
else:
|
119 |
+
if txt == "":
|
120 |
+
txt = '空空如也的输入栏'
|
121 |
+
report_execption(chatbot, history,
|
122 |
+
a=f"解��项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
|
123 |
+
yield chatbot, history, '正常'
|
124 |
+
return
|
125 |
+
|
126 |
+
# 搜索需要处理的文件清单
|
127 |
+
file_manifest = [f for f in glob.glob(
|
128 |
+
f'{project_folder}/**/*.pdf', recursive=True)]
|
129 |
+
|
130 |
+
# 如果没找到任何文件
|
131 |
+
if len(file_manifest) == 0:
|
132 |
+
report_execption(chatbot, history,
|
133 |
+
a=f"解析项目: {txt}", b=f"找不到任何.tex或.pdf文件: {txt}")
|
134 |
+
yield chatbot, history, '正常'
|
135 |
+
return
|
136 |
+
|
137 |
+
# 开始正式执行任务
|
138 |
+
yield from 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, sys_prompt)
|
139 |
+
|
140 |
+
|
141 |
+
def 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, sys_prompt):
|
142 |
+
import os
|
143 |
+
import tiktoken
|
144 |
+
TOKEN_LIMIT_PER_FRAGMENT = 1600
|
145 |
+
generated_conclusion_files = []
|
146 |
+
for index, fp in enumerate(file_manifest):
|
147 |
+
# 读取PDF文件
|
148 |
+
file_content, page_one = read_and_clean_pdf_text(fp)
|
149 |
+
# 递归地切割PDF文件
|
150 |
+
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
151 |
+
enc = tiktoken.get_encoding("gpt2")
|
152 |
+
def get_token_num(txt): return len(enc.encode(txt))
|
153 |
+
# 分解文本
|
154 |
+
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
155 |
+
txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)
|
156 |
+
page_one_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
157 |
+
txt=str(page_one), get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT//4)
|
158 |
+
# 为了更好的效果,我们剥离Introduction之后的部分
|
159 |
+
paper_meta = page_one_fragments[0].split('introduction')[0].split(
|
160 |
+
'Introduction')[0].split('INTRODUCTION')[0]
|
161 |
+
# 单线,获取文章meta信息
|
162 |
+
paper_meta_info = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
163 |
+
inputs=f"以下是一篇学术论文的基础信息,请从中提取出“标题”、“收录会议或期刊”、“作者”、“摘要”、“编号”、“作者邮箱”这六个部分。请用markdown格式输出,最后用中文翻译摘要部分。请提取:{paper_meta}",
|
164 |
+
inputs_show_user=f"请从{fp}中提取出“标题”、“收录会议或期刊”等基本信息。",
|
165 |
+
top_p=top_p, temperature=temperature,
|
166 |
+
chatbot=chatbot, history=[],
|
167 |
+
sys_prompt="Your job is to collect information from materials。",
|
168 |
+
)
|
169 |
+
# 多线,翻译
|
170 |
+
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
171 |
+
inputs_array=[
|
172 |
+
f"以下是你需要翻译的文章段落:\n{frag}" for frag in paper_fragments],
|
173 |
+
inputs_show_user_array=[f"" for _ in paper_fragments],
|
174 |
+
top_p=top_p, temperature=temperature,
|
175 |
+
chatbot=chatbot,
|
176 |
+
history_array=[[paper_meta] for _ in paper_fragments],
|
177 |
+
sys_prompt_array=[
|
178 |
+
"请你作为一个学术翻译,把整个段落翻译成中文,要求语言简洁,禁止重复输出原文。" for _ in paper_fragments],
|
179 |
+
max_workers=16 # OpenAI所允许的最大并行过载
|
180 |
+
)
|
181 |
+
|
182 |
+
final = ["", paper_meta_info + '\n\n---\n\n---\n\n---\n\n']
|
183 |
+
final.extend(gpt_response_collection)
|
184 |
+
create_report_file_name = f"{os.path.basename(fp)}.trans.md"
|
185 |
+
res = write_results_to_file(final, file_name=create_report_file_name)
|
186 |
+
generated_conclusion_files.append(
|
187 |
+
f'./gpt_log/{create_report_file_name}')
|
188 |
+
chatbot.append((f"{fp}完成了吗?", res))
|
189 |
+
msg = "完成"
|
190 |
+
yield chatbot, history, msg
|
191 |
+
|
192 |
+
# 准备文件的下载
|
193 |
+
import shutil
|
194 |
+
for pdf_path in generated_conclusion_files:
|
195 |
+
# 重命名文件
|
196 |
+
rename_file = f'./gpt_log/总结论文-{os.path.basename(pdf_path)}'
|
197 |
+
if os.path.exists(rename_file):
|
198 |
+
os.remove(rename_file)
|
199 |
+
shutil.copyfile(pdf_path, rename_file)
|
200 |
+
if os.path.exists(pdf_path):
|
201 |
+
os.remove(pdf_path)
|
202 |
+
chatbot.append(("给出输出文件清单", str(generated_conclusion_files)))
|
203 |
+
yield chatbot, history, msg
|
crazy_functions/理解PDF文档内容.py
ADDED
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from request_llm.bridge_chatgpt import predict_no_ui
|
2 |
+
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
|
3 |
+
import re
|
4 |
+
import unicodedata
|
5 |
+
fast_debug = False
|
6 |
+
|
7 |
+
def is_paragraph_break(match):
|
8 |
+
"""
|
9 |
+
根据给定的匹配结果来判断换行符是否表示段落分隔。
|
10 |
+
如果换行符前为句子结束标志(句号,感叹号,问号),且下一个字符为大写字母,则换行符更有可能表示段落分隔。
|
11 |
+
也可以根据之前的内容长度来判断段落是否已经足够长。
|
12 |
+
"""
|
13 |
+
prev_char, next_char = match.groups()
|
14 |
+
|
15 |
+
# 句子结束标志
|
16 |
+
sentence_endings = ".!?"
|
17 |
+
|
18 |
+
# 设定一个最小段落长度阈值
|
19 |
+
min_paragraph_length = 140
|
20 |
+
|
21 |
+
if prev_char in sentence_endings and next_char.isupper() and len(match.string[:match.start(1)]) > min_paragraph_length:
|
22 |
+
return "\n\n"
|
23 |
+
else:
|
24 |
+
return " "
|
25 |
+
|
26 |
+
def normalize_text(text):
|
27 |
+
"""
|
28 |
+
通过把连字(ligatures)等文本特殊符号转换为其基本形式来对文本进行归一化处理。
|
29 |
+
例如,将连字 "fi" 转换为 "f" 和 "i"。
|
30 |
+
"""
|
31 |
+
# 对文本进行归一化处理,分解连字
|
32 |
+
normalized_text = unicodedata.normalize("NFKD", text)
|
33 |
+
|
34 |
+
# 替换其他特殊字符
|
35 |
+
cleaned_text = re.sub(r'[^\x00-\x7F]+', '', normalized_text)
|
36 |
+
|
37 |
+
return cleaned_text
|
38 |
+
|
39 |
+
def clean_text(raw_text):
|
40 |
+
"""
|
41 |
+
对从 PDF 提取出的原始文本进行清洗和格式化处理。
|
42 |
+
1. 对原始文本进行归一化处理。
|
43 |
+
2. 替换跨行的连词,例如 “Espe-\ncially” 转换为 “Especially”。
|
44 |
+
3. 根据 heuristic 规则判断换行符是否是段落分隔,并相应地进行替换。
|
45 |
+
"""
|
46 |
+
# 对文本进行归一化处理
|
47 |
+
normalized_text = normalize_text(raw_text)
|
48 |
+
|
49 |
+
# 替换跨行的连词
|
50 |
+
text = re.sub(r'(\w+-\n\w+)', lambda m: m.group(1).replace('-\n', ''), normalized_text)
|
51 |
+
|
52 |
+
# 根据前后相邻字符的特点,找到原文本中的换行符
|
53 |
+
newlines = re.compile(r'(\S)\n(\S)')
|
54 |
+
|
55 |
+
# 根据 heuristic 规则,用空格或段落分隔符替换原换行符
|
56 |
+
final_text = re.sub(newlines, lambda m: m.group(1) + is_paragraph_break(m) + m.group(2), text)
|
57 |
+
|
58 |
+
return final_text.strip()
|
59 |
+
|
60 |
+
def 解析PDF(file_name, top_p, temperature, chatbot, history, systemPromptTxt):
|
61 |
+
import time, glob, os, fitz
|
62 |
+
print('begin analysis on:', file_name)
|
63 |
+
|
64 |
+
with fitz.open(file_name) as doc:
|
65 |
+
file_content = ""
|
66 |
+
for page in doc:
|
67 |
+
file_content += page.get_text()
|
68 |
+
file_content = clean_text(file_content)
|
69 |
+
# print(file_content)
|
70 |
+
split_number = 10000
|
71 |
+
split_group = (len(file_content)//split_number)+1
|
72 |
+
for i in range(0,split_group):
|
73 |
+
if i==0:
|
74 |
+
prefix = "接下来请你仔细分析下面的论文,学习里面的内容(专业术语、公式、数学概念).并且注意:由于论文内容较多,将分批次发送,每次发送完之后,你只需要回答“接受完成”"
|
75 |
+
i_say = prefix + f'文件名是{file_name},文章内容第{i+1}部分是 ```{file_content[i*split_number:(i+1)*split_number]}```'
|
76 |
+
i_say_show_user = f'文件名是:\n{file_name},\n由于论文内容过长,将分批请求(共{len(file_content)}字符,将分为{split_group}批,每批{split_number}字符)。\n当前发送{i+1}/{split_group}部分'
|
77 |
+
elif i==split_group-1:
|
78 |
+
i_say = f'你只需要回答“所有论文接受完成,请进行下一步”。文章内容第{i+1}/{split_group}部分是 ```{file_content[i*split_number:]}```'
|
79 |
+
i_say_show_user = f'当前发送{i+1}/{split_group}部分'
|
80 |
+
else:
|
81 |
+
i_say = f'你只需要回答“接受完成”。文章内容第{i+1}/{split_group}部分是 ```{file_content[i*split_number:(i+1)*split_number]}```'
|
82 |
+
i_say_show_user = f'当前发送{i+1}/{split_group}部分'
|
83 |
+
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
|
84 |
+
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时
|
85 |
+
while "完成" not in gpt_say:
|
86 |
+
i_say = f'你只需要回答“接受完成”。文章内容第{i+1}/{split_group}部分是 ```{file_content[i*split_number:(i+1)*split_number]}```'
|
87 |
+
i_say_show_user = f'出现error,重新发送{i+1}/{split_group}部分'
|
88 |
+
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时
|
89 |
+
time.sleep(1)
|
90 |
+
chatbot[-1] = (i_say_show_user, gpt_say)
|
91 |
+
history.append(i_say_show_user); history.append(gpt_say)
|
92 |
+
yield chatbot, history, '正常'
|
93 |
+
time.sleep(2)
|
94 |
+
|
95 |
+
i_say = f'接下来,请你扮演一名专业的学术教授,利用你的所有知识并且结合这篇文章,回答我的问题。(请牢记:1.直到我说“退出”,你才能结束任务;2.所有问���需要紧密围绕文章内容;3.如果有公式,请使用tex渲染)'
|
96 |
+
chatbot.append((i_say, "[Local Message] waiting gpt response."))
|
97 |
+
yield chatbot, history, '正常'
|
98 |
+
|
99 |
+
# ** gpt request **
|
100 |
+
gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history) # 带超时倒计时
|
101 |
+
chatbot[-1] = (i_say, gpt_say)
|
102 |
+
history.append(i_say); history.append(gpt_say)
|
103 |
+
yield chatbot, history, '正常'
|
104 |
+
|
105 |
+
|
106 |
+
@CatchException
|
107 |
+
def 理解PDF文档内容(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
108 |
+
import glob, os
|
109 |
+
|
110 |
+
# 基本信息:功能、贡献者
|
111 |
+
chatbot.append([
|
112 |
+
"函数插件功能?",
|
113 |
+
"理解PDF论文内容,并且将结合上下文内容,进行学术解答。函数插件贡献者: Hanzoe。"])
|
114 |
+
yield chatbot, history, '正常'
|
115 |
+
|
116 |
+
import tkinter as tk
|
117 |
+
from tkinter import filedialog
|
118 |
+
|
119 |
+
root = tk.Tk()
|
120 |
+
root.withdraw()
|
121 |
+
txt = filedialog.askopenfilename()
|
122 |
+
|
123 |
+
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
124 |
+
try:
|
125 |
+
import fitz
|
126 |
+
except:
|
127 |
+
report_execption(chatbot, history,
|
128 |
+
a = f"解析项目: {txt}",
|
129 |
+
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf```。")
|
130 |
+
yield chatbot, history, '正常'
|
131 |
+
return
|
132 |
+
|
133 |
+
# 清空历史,以免输入溢出
|
134 |
+
history = []
|
135 |
+
|
136 |
+
# 开始正式执行任务
|
137 |
+
yield from 解析PDF(txt, top_p, temperature, chatbot, history, systemPromptTxt)
|
138 |
+
|
139 |
+
|
140 |
+
|
141 |
+
@CatchException
|
142 |
+
def 理解PDF文档内容标准文件输入(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
143 |
+
import glob, os
|
144 |
+
|
145 |
+
# 基本信息:功能、贡献者
|
146 |
+
chatbot.append([
|
147 |
+
"函数插件功能?",
|
148 |
+
"理解PDF论文内容,并且将结合上下文内容,进行学术解答。函数插件贡献者: Hanzoe。"])
|
149 |
+
yield chatbot, history, '正常'
|
150 |
+
|
151 |
+
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
152 |
+
try:
|
153 |
+
import fitz
|
154 |
+
except:
|
155 |
+
report_execption(chatbot, history,
|
156 |
+
a = f"解析项目: {txt}",
|
157 |
+
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf```。")
|
158 |
+
yield chatbot, history, '正常'
|
159 |
+
return
|
160 |
+
|
161 |
+
# 清空历史,以免输入溢出
|
162 |
+
history = []
|
163 |
+
|
164 |
+
# 检测输入参数,如没有给定输入参数,直接退出
|
165 |
+
if os.path.exists(txt):
|
166 |
+
project_folder = txt
|
167 |
+
else:
|
168 |
+
if txt == "":
|
169 |
+
txt = '空空如也的输入栏'
|
170 |
+
report_execption(chatbot, history,
|
171 |
+
a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
|
172 |
+
yield chatbot, history, '正常'
|
173 |
+
return
|
174 |
+
|
175 |
+
# 搜索需要处理的文件清单
|
176 |
+
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)]
|
177 |
+
# 如果没找到任何文件
|
178 |
+
if len(file_manifest) == 0:
|
179 |
+
report_execption(chatbot, history,
|
180 |
+
a=f"解析项目: {txt}", b=f"找不到任何.tex或.pdf文件: {txt}")
|
181 |
+
yield chatbot, history, '正常'
|
182 |
+
return
|
183 |
+
txt = file_manifest[0]
|
184 |
+
# 开始正式执行任务
|
185 |
+
yield from 解析PDF(txt, top_p, temperature, chatbot, history, systemPromptTxt)
|
crazy_functions/生成函数注释.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from
|
2 |
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
|
3 |
fast_debug = False
|
4 |
|
|
|
1 |
+
from request_llm.bridge_chatgpt import predict_no_ui
|
2 |
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
|
3 |
fast_debug = False
|
4 |
|
crazy_functions/解析项目源代码.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from
|
2 |
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
|
3 |
fast_debug = False
|
4 |
|
@@ -119,8 +119,8 @@ def 解析一个C项目的头文件(txt, top_p, temperature, chatbot, history, s
|
|
119 |
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
120 |
yield chatbot, history, '正常'
|
121 |
return
|
122 |
-
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.h', recursive=True)]
|
123 |
-
|
124 |
# [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
|
125 |
if len(file_manifest) == 0:
|
126 |
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
|
@@ -141,6 +141,7 @@ def 解析一个C项目(txt, top_p, temperature, chatbot, history, systemPromptT
|
|
141 |
return
|
142 |
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.h', recursive=True)] + \
|
143 |
[f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
|
|
|
144 |
[f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
|
145 |
if len(file_manifest) == 0:
|
146 |
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
|
@@ -148,3 +149,66 @@ def 解析一个C项目(txt, top_p, temperature, chatbot, history, systemPromptT
|
|
148 |
return
|
149 |
yield from 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
|
150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from request_llm.bridge_chatgpt import predict_no_ui
|
2 |
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
|
3 |
fast_debug = False
|
4 |
|
|
|
119 |
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
120 |
yield chatbot, history, '正常'
|
121 |
return
|
122 |
+
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.h', recursive=True)] + \
|
123 |
+
[f for f in glob.glob(f'{project_folder}/**/*.hpp', recursive=True)] #+ \
|
124 |
# [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
|
125 |
if len(file_manifest) == 0:
|
126 |
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
|
|
|
141 |
return
|
142 |
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.h', recursive=True)] + \
|
143 |
[f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
|
144 |
+
[f for f in glob.glob(f'{project_folder}/**/*.hpp', recursive=True)] + \
|
145 |
[f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
|
146 |
if len(file_manifest) == 0:
|
147 |
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
|
|
|
149 |
return
|
150 |
yield from 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
|
151 |
|
152 |
+
|
153 |
+
@CatchException
|
154 |
+
def 解析一个Java项目(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
155 |
+
history = [] # 清空历史,以免输入溢出
|
156 |
+
import glob, os
|
157 |
+
if os.path.exists(txt):
|
158 |
+
project_folder = txt
|
159 |
+
else:
|
160 |
+
if txt == "": txt = '空空如也的输入栏'
|
161 |
+
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
|
162 |
+
yield chatbot, history, '正常'
|
163 |
+
return
|
164 |
+
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.java', recursive=True)] + \
|
165 |
+
[f for f in glob.glob(f'{project_folder}/**/*.jar', recursive=True)] + \
|
166 |
+
[f for f in glob.glob(f'{project_folder}/**/*.xml', recursive=True)] + \
|
167 |
+
[f for f in glob.glob(f'{project_folder}/**/*.sh', recursive=True)]
|
168 |
+
if len(file_manifest) == 0:
|
169 |
+
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何java文件: {txt}")
|
170 |
+
yield chatbot, history, '正常'
|
171 |
+
return
|
172 |
+
yield from 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
|
173 |
+
|
174 |
+
|
175 |
+
@CatchException
|
176 |
+
def 解析一个Rect项目(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
177 |
+
history = [] # 清空历史,以免输入溢出
|
178 |
+
import glob, os
|
179 |
+
if os.path.exists(txt):
|
180 |
+
project_folder = txt
|
181 |
+
else:
|
182 |
+
if txt == "": txt = '空空如也的输入栏'
|
183 |
+
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
|
184 |
+
yield chatbot, history, '正常'
|
185 |
+
return
|
186 |
+
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.ts', recursive=True)] + \
|
187 |
+
[f for f in glob.glob(f'{project_folder}/**/*.tsx', recursive=True)] + \
|
188 |
+
[f for f in glob.glob(f'{project_folder}/**/*.json', recursive=True)] + \
|
189 |
+
[f for f in glob.glob(f'{project_folder}/**/*.js', recursive=True)] + \
|
190 |
+
[f for f in glob.glob(f'{project_folder}/**/*.jsx', recursive=True)]
|
191 |
+
if len(file_manifest) == 0:
|
192 |
+
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何Rect文件: {txt}")
|
193 |
+
yield chatbot, history, '正常'
|
194 |
+
return
|
195 |
+
yield from 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
|
196 |
+
|
197 |
+
|
198 |
+
@CatchException
|
199 |
+
def 解析一个Golang项目(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
200 |
+
history = [] # 清空历史,以免输入溢出
|
201 |
+
import glob, os
|
202 |
+
if os.path.exists(txt):
|
203 |
+
project_folder = txt
|
204 |
+
else:
|
205 |
+
if txt == "": txt = '空空如也的输入栏'
|
206 |
+
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
|
207 |
+
yield chatbot, history, '正常'
|
208 |
+
return
|
209 |
+
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.go', recursive=True)]
|
210 |
+
if len(file_manifest) == 0:
|
211 |
+
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何golang文件: {txt}")
|
212 |
+
yield chatbot, history, '正常'
|
213 |
+
return
|
214 |
+
yield from 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
|
crazy_functions/读文章写摘要.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from
|
2 |
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
|
3 |
fast_debug = False
|
4 |
|
|
|
1 |
+
from request_llm.bridge_chatgpt import predict_no_ui
|
2 |
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
|
3 |
fast_debug = False
|
4 |
|
crazy_functions/谷歌检索小助手.py
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
2 |
+
from toolbox import CatchException, report_execption, write_results_to_file
|
3 |
+
|
4 |
+
def get_meta_information(url, chatbot, history):
|
5 |
+
import requests
|
6 |
+
import arxiv
|
7 |
+
import difflib
|
8 |
+
from bs4 import BeautifulSoup
|
9 |
+
from toolbox import get_conf
|
10 |
+
proxies, = get_conf('proxies')
|
11 |
+
headers = {
|
12 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36',
|
13 |
+
}
|
14 |
+
# 发送 GET 请求
|
15 |
+
response = requests.get(url, proxies=proxies, headers=headers)
|
16 |
+
|
17 |
+
# 解析网页内容
|
18 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
19 |
+
|
20 |
+
def string_similar(s1, s2):
|
21 |
+
return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
|
22 |
+
|
23 |
+
profile = []
|
24 |
+
# 获取所有文章的标题和作者
|
25 |
+
for result in soup.select(".gs_ri"):
|
26 |
+
title = result.a.text.replace('\n', ' ').replace(' ', ' ')
|
27 |
+
author = result.select_one(".gs_a").text
|
28 |
+
try:
|
29 |
+
citation = result.select_one(".gs_fl > a[href*='cites']").text # 引用次数是链接中的文本,直接取出来
|
30 |
+
except:
|
31 |
+
citation = 'cited by 0'
|
32 |
+
abstract = result.select_one(".gs_rs").text.strip() # 摘要在 .gs_rs 中的文本,需要清除首尾空格
|
33 |
+
search = arxiv.Search(
|
34 |
+
query = title,
|
35 |
+
max_results = 1,
|
36 |
+
sort_by = arxiv.SortCriterion.Relevance,
|
37 |
+
)
|
38 |
+
paper = next(search.results())
|
39 |
+
if string_similar(title, paper.title) > 0.90: # same paper
|
40 |
+
abstract = paper.summary.replace('\n', ' ')
|
41 |
+
is_paper_in_arxiv = True
|
42 |
+
else: # different paper
|
43 |
+
abstract = abstract
|
44 |
+
is_paper_in_arxiv = False
|
45 |
+
paper = next(search.results())
|
46 |
+
print(title)
|
47 |
+
print(author)
|
48 |
+
print(citation)
|
49 |
+
profile.append({
|
50 |
+
'title':title,
|
51 |
+
'author':author,
|
52 |
+
'citation':citation,
|
53 |
+
'abstract':abstract,
|
54 |
+
'is_paper_in_arxiv':is_paper_in_arxiv,
|
55 |
+
})
|
56 |
+
|
57 |
+
chatbot[-1] = [chatbot[-1][0], title + f'\n\n是否在arxiv中(不在arxiv中无法获取完整摘要):{is_paper_in_arxiv}\n\n' + abstract]
|
58 |
+
msg = "正常"
|
59 |
+
yield chatbot, [], msg
|
60 |
+
return profile
|
61 |
+
|
62 |
+
@CatchException
|
63 |
+
def 谷歌检索小助手(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
64 |
+
# 基本信息:功能、贡献者
|
65 |
+
chatbot.append([
|
66 |
+
"函数插件功能?",
|
67 |
+
"分析用户提供的谷歌学术(google scholar)搜索页面中,出现的所有文章: binary-husky,插件初始化中..."])
|
68 |
+
yield chatbot, history, '正常'
|
69 |
+
|
70 |
+
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
71 |
+
try:
|
72 |
+
import arxiv
|
73 |
+
from bs4 import BeautifulSoup
|
74 |
+
except:
|
75 |
+
report_execption(chatbot, history,
|
76 |
+
a = f"解析项目: {txt}",
|
77 |
+
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade beautifulsoup4 arxiv```。")
|
78 |
+
yield chatbot, history, '正常'
|
79 |
+
return
|
80 |
+
|
81 |
+
# 清空历史,以免输入溢出
|
82 |
+
history = []
|
83 |
+
|
84 |
+
meta_paper_info_list = yield from get_meta_information(txt, chatbot, history)
|
85 |
+
|
86 |
+
if len(meta_paper_info_list[:10]) > 0:
|
87 |
+
i_say = "下面是一些学术文献的数据,请从中提取出以下内容。" + \
|
88 |
+
"1、英文题目;2、中文题目翻译;3、作者;4、arxiv公开(is_paper_in_arxiv);4、引用数量(cite);5、中文摘要翻译。" + \
|
89 |
+
f"以下是信息源:{str(meta_paper_info_list[:10])}"
|
90 |
+
|
91 |
+
inputs_show_user = f"请分析此页面中出现的所有文章:{txt}"
|
92 |
+
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
93 |
+
inputs=i_say, inputs_show_user=inputs_show_user,
|
94 |
+
top_p=top_p, temperature=temperature, chatbot=chatbot, history=[],
|
95 |
+
sys_prompt="你是一个学术翻译,请从数据中提取信息。你必须使用Markdown格式。你必须逐个文献进行处理。"
|
96 |
+
)
|
97 |
+
|
98 |
+
history.extend([ "第一批", gpt_say ])
|
99 |
+
meta_paper_info_list = meta_paper_info_list[10:]
|
100 |
+
|
101 |
+
chatbot.append(["状态?", "已经全部完成"])
|
102 |
+
msg = '正常'
|
103 |
+
yield chatbot, history, msg
|
104 |
+
res = write_results_to_file(history)
|
105 |
+
chatbot.append(("完成了吗?", res));
|
106 |
+
yield chatbot, history, msg
|
crazy_functions/高级功能函数模板.py
CHANGED
@@ -1,25 +1,20 @@
|
|
1 |
-
from
|
2 |
-
from
|
3 |
import datetime
|
4 |
-
|
5 |
@CatchException
|
6 |
def 高阶功能模板函数(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
7 |
history = [] # 清空历史,以免输入溢出
|
8 |
-
chatbot.append(("这是什么功能?", "[Local Message] 请注意,您正在调用一个[函数插件]
|
9 |
yield chatbot, history, '正常' # 由于请求gpt需要一段时间,我们先及时地做一次状态显示
|
10 |
-
|
11 |
for i in range(5):
|
12 |
currentMonth = (datetime.date.today() + datetime.timedelta(days=i)).month
|
13 |
currentDay = (datetime.date.today() + datetime.timedelta(days=i)).day
|
14 |
i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?列举两条并发送相关图片。发送图片时,请使用Markdown,将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词。'
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
inputs=i_say, top_p=top_p, temperature=temperature, history=[],
|
21 |
-
sys_prompt="当你想发送一张照片时,请使用Markdown, 并且不要有反斜线, 不要用代码块。使用 Unsplash API (https://source.unsplash.com/1280x720/? < PUT_YOUR_QUERY_HERE >)。") # 请求gpt,需要一段时间
|
22 |
-
|
23 |
chatbot[-1] = (i_say, gpt_say)
|
24 |
history.append(i_say);history.append(gpt_say)
|
25 |
-
yield chatbot, history, '正常'
|
|
|
1 |
+
from toolbox import CatchException
|
2 |
+
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
3 |
import datetime
|
|
|
4 |
@CatchException
|
5 |
def 高阶功能模板函数(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
|
6 |
history = [] # 清空历史,以免输入溢出
|
7 |
+
chatbot.append(("这是什么功能?", "[Local Message] 请注意,您正在调用一个[函数插件]的模板,该函数面向希望实现更多有趣功能的开发者,它可以作为创建新功能函数的模板(该函数只有20行代码)。此外我们也提供可同步处理大量文件的多线程Demo供您参考。您若希望分享新的功能模组,请不吝PR!"))
|
8 |
yield chatbot, history, '正常' # 由于请求gpt需要一段时间,我们先及时地做一次状态显示
|
|
|
9 |
for i in range(5):
|
10 |
currentMonth = (datetime.date.today() + datetime.timedelta(days=i)).month
|
11 |
currentDay = (datetime.date.today() + datetime.timedelta(days=i)).day
|
12 |
i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?列举两条并发送相关图片。发送图片时,请使用Markdown,将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词。'
|
13 |
+
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
14 |
+
inputs=i_say, inputs_show_user=i_say,
|
15 |
+
top_p=top_p, temperature=temperature, chatbot=chatbot, history=[],
|
16 |
+
sys_prompt="当你想发送一张照片时,请使用Markdown, 并且不要有反斜线, 不要用代码块。使用 Unsplash API (https://source.unsplash.com/1280x720/? < PUT_YOUR_QUERY_HERE >)。"
|
17 |
+
)
|
|
|
|
|
|
|
18 |
chatbot[-1] = (i_say, gpt_say)
|
19 |
history.append(i_say);history.append(gpt_say)
|
20 |
+
yield chatbot, history, '正常'
|
main.py
CHANGED
@@ -1,26 +1,35 @@
|
|
1 |
import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
|
2 |
import gradio as gr
|
3 |
-
from
|
4 |
-
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf
|
5 |
|
6 |
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
|
7 |
-
proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT = \
|
8 |
-
get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT')
|
9 |
|
10 |
# 如果WEB_PORT是-1, 则随机选取WEB端口
|
11 |
PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
|
12 |
if not AUTHENTICATION: AUTHENTICATION = None
|
13 |
|
|
|
14 |
initial_prompt = "Serve me as a writing and programming assistant."
|
15 |
-
title_html = "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
# 一些普通功能模块
|
18 |
-
from
|
19 |
-
functional =
|
20 |
|
21 |
# 高级函数插件
|
22 |
-
from
|
23 |
-
crazy_fns =
|
24 |
|
25 |
# 处理markdown文本格式的转变
|
26 |
gr.Chatbot.postprocess = format_io
|
@@ -29,40 +38,52 @@ gr.Chatbot.postprocess = format_io
|
|
29 |
from theme import adjust_theme, advanced_css
|
30 |
set_theme = adjust_theme()
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
cancel_handles = []
|
33 |
-
with gr.Blocks(theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
|
34 |
gr.HTML(title_html)
|
35 |
-
with
|
36 |
-
with
|
37 |
chatbot = gr.Chatbot()
|
38 |
chatbot.style(height=CHATBOT_HEIGHT)
|
39 |
history = gr.State([])
|
40 |
-
with
|
41 |
-
with gr.
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
with gr.Accordion("基础功能区", open=True) as area_basic_fn:
|
52 |
with gr.Row():
|
53 |
for k in functional:
|
54 |
variant = functional[k]["Color"] if "Color" in functional[k] else "secondary"
|
55 |
functional[k]["Button"] = gr.Button(k, variant=variant)
|
56 |
-
with gr.Accordion("函数插件区", open=
|
57 |
with gr.Row():
|
58 |
-
gr.Markdown("
|
59 |
with gr.Row():
|
60 |
for k in crazy_fns:
|
61 |
if not crazy_fns[k].get("AsButton", True): continue
|
62 |
variant = crazy_fns[k]["Color"] if "Color" in crazy_fns[k] else "secondary"
|
63 |
crazy_fns[k]["Button"] = gr.Button(k, variant=variant)
|
|
|
64 |
with gr.Row():
|
65 |
-
with gr.Accordion("更多函数插件", open=
|
66 |
dropdown_fn_list = [k for k in crazy_fns.keys() if not crazy_fns[k].get("AsButton", True)]
|
67 |
with gr.Column(scale=1):
|
68 |
dropdown = gr.Dropdown(dropdown_fn_list, value=r"打开插件列表", label="").style(container=False)
|
@@ -71,38 +92,51 @@ with gr.Blocks(theme=set_theme, analytics_enabled=False, css=advanced_css) as de
|
|
71 |
with gr.Row():
|
72 |
with gr.Accordion("点击展开“文件上传区”。上传本地文件可供红色函数插件调用。", open=False) as area_file_up:
|
73 |
file_upload = gr.Files(label="任何文件, 但推荐上传压缩文件(zip, tar)", file_count="multiple")
|
74 |
-
with gr.Accordion("展开SysPrompt &
|
75 |
system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
|
76 |
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
|
77 |
temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
|
78 |
-
checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
# 功能区显示开关与功能区的互动
|
81 |
def fn_area_visibility(a):
|
82 |
ret = {}
|
83 |
ret.update({area_basic_fn: gr.update(visible=("基础功能区" in a))})
|
84 |
ret.update({area_crazy_fn: gr.update(visible=("函数插件区" in a))})
|
|
|
|
|
|
|
85 |
return ret
|
86 |
-
checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn] )
|
87 |
# 整理反复出现的控件句柄组合
|
88 |
-
input_combo = [txt, top_p, temperature, chatbot, history, system_prompt]
|
89 |
output_combo = [chatbot, history, status]
|
90 |
-
predict_args = dict(fn=predict, inputs=input_combo, outputs=output_combo)
|
91 |
-
empty_txt_args = dict(fn=lambda: "", inputs=[], outputs=[txt]) # 用于在提交后清空输入栏
|
92 |
# 提交按钮、重置按钮
|
93 |
-
cancel_handles.append(txt.submit(**predict_args))
|
94 |
-
cancel_handles.append(
|
|
|
|
|
95 |
resetBtn.click(lambda: ([], [], "已重置"), None, output_combo)
|
|
|
96 |
# 基础功能区的回调函数注册
|
97 |
for k in functional:
|
98 |
-
click_handle = functional[k]["Button"].click(predict, [*input_combo, gr.State(True), gr.State(k)], output_combo)
|
99 |
cancel_handles.append(click_handle)
|
100 |
# 文件上传区,接收文件后与chatbot的互动
|
101 |
file_upload.upload(on_file_uploaded, [file_upload, chatbot, txt], [chatbot, txt])
|
102 |
# 函数插件-固定按钮区
|
103 |
for k in crazy_fns:
|
104 |
if not crazy_fns[k].get("AsButton", True): continue
|
105 |
-
click_handle = crazy_fns[k]["Button"].click(crazy_fns[k]["Function"], [*input_combo, gr.State(PORT)], output_combo)
|
106 |
click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot])
|
107 |
cancel_handles.append(click_handle)
|
108 |
# 函数插件-下拉菜单与随变按钮的互动
|
@@ -112,8 +146,8 @@ with gr.Blocks(theme=set_theme, analytics_enabled=False, css=advanced_css) as de
|
|
112 |
dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt] )
|
113 |
# 随变按钮的回调函数注册
|
114 |
def route(k, *args, **kwargs):
|
115 |
-
if k in [r"打开插件列表", r"
|
116 |
-
yield from crazy_fns[k]["Function"](*args, **kwargs)
|
117 |
click_handle = switchy_bt.click(route,[switchy_bt, *input_combo, gr.State(PORT)], output_combo)
|
118 |
click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot])
|
119 |
# def expand_file_area(file_upload, area_file_up):
|
@@ -122,14 +156,19 @@ with gr.Blocks(theme=set_theme, analytics_enabled=False, css=advanced_css) as de
|
|
122 |
cancel_handles.append(click_handle)
|
123 |
# 终止按钮的回调函数注册
|
124 |
stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
|
125 |
-
|
126 |
# gradio的inbrowser触发不太稳定,回滚代码到原始的浏览器打开函数
|
127 |
def auto_opentab_delay():
|
128 |
import threading, webbrowser, time
|
129 |
-
print(f"如果浏览器没有自动打开,请复制并转到以下URL
|
|
|
|
|
130 |
def open():
|
131 |
-
time.sleep(2)
|
132 |
-
webbrowser.open_new_tab(f"http://localhost:{PORT}")
|
133 |
threading.Thread(target=open, name="open-browser", daemon=True).start()
|
|
|
134 |
|
135 |
demo.launch(auth=(os.environ.get("USER"), os.environ.get("PASSWD")))
|
|
|
|
|
|
1 |
import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
|
2 |
import gradio as gr
|
3 |
+
from request_llm.bridge_chatgpt import predict
|
4 |
+
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith
|
5 |
|
6 |
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
|
7 |
+
proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT = \
|
8 |
+
get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT')
|
9 |
|
10 |
# 如果WEB_PORT是-1, 则随机选取WEB端口
|
11 |
PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
|
12 |
if not AUTHENTICATION: AUTHENTICATION = None
|
13 |
|
14 |
+
from check_proxy import get_current_version
|
15 |
initial_prompt = "Serve me as a writing and programming assistant."
|
16 |
+
title_html = f"<h1 align=\"center\">ChatGPT 学术优化 {get_current_version()}</h1>"
|
17 |
+
description = """代码开源和更新[地址🚀](https://github.com/binary-husky/chatgpt_academic),感谢热情的[开发者们❤️](https://github.com/binary-husky/chatgpt_academic/graphs/contributors)"""
|
18 |
+
|
19 |
+
# 问询记录, python 版本建议3.9+(越新越好)
|
20 |
+
import logging
|
21 |
+
os.makedirs("gpt_log", exist_ok=True)
|
22 |
+
try:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO, encoding="utf-8")
|
23 |
+
except:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO)
|
24 |
+
print("所有问询记录将自动保存在本地目录./gpt_log/chat_secrets.log, 请注意自我隐私保护哦!")
|
25 |
|
26 |
# 一些普通功能模块
|
27 |
+
from core_functional import get_core_functions
|
28 |
+
functional = get_core_functions()
|
29 |
|
30 |
# 高级函数插件
|
31 |
+
from crazy_functional import get_crazy_functions
|
32 |
+
crazy_fns = get_crazy_functions()
|
33 |
|
34 |
# 处理markdown文本格式的转变
|
35 |
gr.Chatbot.postprocess = format_io
|
|
|
38 |
from theme import adjust_theme, advanced_css
|
39 |
set_theme = adjust_theme()
|
40 |
|
41 |
+
# 代理与自动更新
|
42 |
+
from check_proxy import check_proxy, auto_update
|
43 |
+
proxy_info = check_proxy(proxies)
|
44 |
+
|
45 |
+
gr_L1 = lambda: gr.Row().style()
|
46 |
+
gr_L2 = lambda scale: gr.Column(scale=scale)
|
47 |
+
if LAYOUT == "TOP-DOWN":
|
48 |
+
gr_L1 = lambda: DummyWith()
|
49 |
+
gr_L2 = lambda scale: gr.Row()
|
50 |
+
CHATBOT_HEIGHT /= 2
|
51 |
+
|
52 |
cancel_handles = []
|
53 |
+
with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
|
54 |
gr.HTML(title_html)
|
55 |
+
with gr_L1():
|
56 |
+
with gr_L2(scale=2):
|
57 |
chatbot = gr.Chatbot()
|
58 |
chatbot.style(height=CHATBOT_HEIGHT)
|
59 |
history = gr.State([])
|
60 |
+
with gr_L2(scale=1):
|
61 |
+
with gr.Accordion("输入区", open=True) as area_input_primary:
|
62 |
+
with gr.Row():
|
63 |
+
txt = gr.Textbox(show_label=False, placeholder="Input question here.").style(container=False)
|
64 |
+
with gr.Row():
|
65 |
+
submitBtn = gr.Button("提交", variant="primary")
|
66 |
+
with gr.Row():
|
67 |
+
resetBtn = gr.Button("重置", variant="secondary"); resetBtn.style(size="sm")
|
68 |
+
stopBtn = gr.Button("停止", variant="secondary"); stopBtn.style(size="sm")
|
69 |
+
with gr.Row():
|
70 |
+
status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。当前模型: {LLM_MODEL} \n {proxy_info}")
|
71 |
with gr.Accordion("基础功能区", open=True) as area_basic_fn:
|
72 |
with gr.Row():
|
73 |
for k in functional:
|
74 |
variant = functional[k]["Color"] if "Color" in functional[k] else "secondary"
|
75 |
functional[k]["Button"] = gr.Button(k, variant=variant)
|
76 |
+
with gr.Accordion("函数插件区", open=True) as area_crazy_fn:
|
77 |
with gr.Row():
|
78 |
+
gr.Markdown("注意:以下“红颜色”标识的函数插件需从输入区读取路径作为参数.")
|
79 |
with gr.Row():
|
80 |
for k in crazy_fns:
|
81 |
if not crazy_fns[k].get("AsButton", True): continue
|
82 |
variant = crazy_fns[k]["Color"] if "Color" in crazy_fns[k] else "secondary"
|
83 |
crazy_fns[k]["Button"] = gr.Button(k, variant=variant)
|
84 |
+
crazy_fns[k]["Button"].style(size="sm")
|
85 |
with gr.Row():
|
86 |
+
with gr.Accordion("更多函数插件", open=True):
|
87 |
dropdown_fn_list = [k for k in crazy_fns.keys() if not crazy_fns[k].get("AsButton", True)]
|
88 |
with gr.Column(scale=1):
|
89 |
dropdown = gr.Dropdown(dropdown_fn_list, value=r"打开插件列表", label="").style(container=False)
|
|
|
92 |
with gr.Row():
|
93 |
with gr.Accordion("点击展开“文件上传区”。上传本地文件可供红色函数插件调用。", open=False) as area_file_up:
|
94 |
file_upload = gr.Files(label="任何文件, 但推荐上传压缩文件(zip, tar)", file_count="multiple")
|
95 |
+
with gr.Accordion("展开SysPrompt & 交互界面布局 & Github地址", open=(LAYOUT == "TOP-DOWN")):
|
96 |
system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
|
97 |
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
|
98 |
temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
|
99 |
+
checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
|
100 |
+
gr.Markdown(description)
|
101 |
+
with gr.Accordion("备选输入区", open=True, visible=False) as area_input_secondary:
|
102 |
+
with gr.Row():
|
103 |
+
txt2 = gr.Textbox(show_label=False, placeholder="Input question here.", label="输入区2").style(container=False)
|
104 |
+
with gr.Row():
|
105 |
+
submitBtn2 = gr.Button("提交", variant="primary")
|
106 |
+
with gr.Row():
|
107 |
+
resetBtn2 = gr.Button("重置", variant="secondary"); resetBtn.style(size="sm")
|
108 |
+
stopBtn2 = gr.Button("停止", variant="secondary"); stopBtn.style(size="sm")
|
109 |
# 功能区显示开关与功能区的互动
|
110 |
def fn_area_visibility(a):
|
111 |
ret = {}
|
112 |
ret.update({area_basic_fn: gr.update(visible=("基础功能区" in a))})
|
113 |
ret.update({area_crazy_fn: gr.update(visible=("函数插件区" in a))})
|
114 |
+
ret.update({area_input_primary: gr.update(visible=("底部输入区" not in a))})
|
115 |
+
ret.update({area_input_secondary: gr.update(visible=("底部输入区" in a))})
|
116 |
+
if "底部输入区" in a: ret.update({txt: gr.update(value="")})
|
117 |
return ret
|
118 |
+
checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, txt, txt2] )
|
119 |
# 整理反复出现的控件句柄组合
|
120 |
+
input_combo = [txt, txt2, top_p, temperature, chatbot, history, system_prompt]
|
121 |
output_combo = [chatbot, history, status]
|
122 |
+
predict_args = dict(fn=ArgsGeneralWrapper(predict), inputs=input_combo, outputs=output_combo)
|
|
|
123 |
# 提交按钮、重置按钮
|
124 |
+
cancel_handles.append(txt.submit(**predict_args))
|
125 |
+
cancel_handles.append(txt2.submit(**predict_args))
|
126 |
+
cancel_handles.append(submitBtn.click(**predict_args))
|
127 |
+
cancel_handles.append(submitBtn2.click(**predict_args))
|
128 |
resetBtn.click(lambda: ([], [], "已重置"), None, output_combo)
|
129 |
+
resetBtn2.click(lambda: ([], [], "已重置"), None, output_combo)
|
130 |
# 基础功能区的回调函数注册
|
131 |
for k in functional:
|
132 |
+
click_handle = functional[k]["Button"].click(fn=ArgsGeneralWrapper(predict), inputs=[*input_combo, gr.State(True), gr.State(k)], outputs=output_combo)
|
133 |
cancel_handles.append(click_handle)
|
134 |
# 文件上传区,接收文件后与chatbot的互动
|
135 |
file_upload.upload(on_file_uploaded, [file_upload, chatbot, txt], [chatbot, txt])
|
136 |
# 函数插件-固定按钮区
|
137 |
for k in crazy_fns:
|
138 |
if not crazy_fns[k].get("AsButton", True): continue
|
139 |
+
click_handle = crazy_fns[k]["Button"].click(ArgsGeneralWrapper(crazy_fns[k]["Function"]), [*input_combo, gr.State(PORT)], output_combo)
|
140 |
click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot])
|
141 |
cancel_handles.append(click_handle)
|
142 |
# 函数插件-下拉菜单与随变按钮的互动
|
|
|
146 |
dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt] )
|
147 |
# 随变按钮的回调函数注册
|
148 |
def route(k, *args, **kwargs):
|
149 |
+
if k in [r"打开插件列表", r"请先从插件列表中选择"]: return
|
150 |
+
yield from ArgsGeneralWrapper(crazy_fns[k]["Function"])(*args, **kwargs)
|
151 |
click_handle = switchy_bt.click(route,[switchy_bt, *input_combo, gr.State(PORT)], output_combo)
|
152 |
click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot])
|
153 |
# def expand_file_area(file_upload, area_file_up):
|
|
|
156 |
cancel_handles.append(click_handle)
|
157 |
# 终止按钮的回调函数注册
|
158 |
stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
|
159 |
+
stopBtn2.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
|
160 |
# gradio的inbrowser触发不太稳定,回滚代码到原始的浏览器打开函数
|
161 |
def auto_opentab_delay():
|
162 |
import threading, webbrowser, time
|
163 |
+
print(f"如果浏览器没有自动打开,请复制并转到以下URL:")
|
164 |
+
print(f"\t(亮色主题): http://localhost:{PORT}")
|
165 |
+
print(f"\t(暗色主题): http://localhost:{PORT}/?__dark-theme=true")
|
166 |
def open():
|
167 |
+
time.sleep(2) # 打开浏览器
|
168 |
+
webbrowser.open_new_tab(f"http://localhost:{PORT}/?__dark-theme=true")
|
169 |
threading.Thread(target=open, name="open-browser", daemon=True).start()
|
170 |
+
threading.Thread(target=auto_update, name="self-upgrade", daemon=True).start()
|
171 |
|
172 |
demo.launch(auth=(os.environ.get("USER"), os.environ.get("PASSWD")))
|
173 |
+
#auto_opentab_delay()
|
174 |
+
#demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", share=True, server_port=PORT, auth=AUTHENTICATION)
|
request_llm/README.md
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 如何使用其他大语言模型(dev分支测试中)
|
2 |
+
|
3 |
+
## 1. 先运行text-generation
|
4 |
+
``` sh
|
5 |
+
# 下载模型( text-generation 这么牛的项目,别忘了给人家star )
|
6 |
+
git clone https://github.com/oobabooga/text-generation-webui.git
|
7 |
+
|
8 |
+
# 安装text-generation的额外依赖
|
9 |
+
pip install accelerate bitsandbytes flexgen gradio llamacpp markdown numpy peft requests rwkv safetensors sentencepiece tqdm datasets git+https://github.com/huggingface/transformers
|
10 |
+
|
11 |
+
# 切换路径
|
12 |
+
cd text-generation-webui
|
13 |
+
|
14 |
+
# 下载模型
|
15 |
+
python download-model.py facebook/galactica-1.3b
|
16 |
+
# 其他可选如 facebook/opt-1.3b
|
17 |
+
# facebook/galactica-6.7b
|
18 |
+
# facebook/galactica-120b
|
19 |
+
# facebook/pygmalion-1.3b 等
|
20 |
+
# 详情见 https://github.com/oobabooga/text-generation-webui
|
21 |
+
|
22 |
+
# 启动text-generation,注意把模型的斜杠改成下划线
|
23 |
+
python server.py --cpu --listen --listen-port 7860 --model facebook_galactica-1.3b
|
24 |
+
```
|
25 |
+
|
26 |
+
## 2. 修改config.py
|
27 |
+
``` sh
|
28 |
+
# LLM_MODEL格式较复杂 TGUI:[模型]@[ws地址]:[ws端口] , 端口要和上面给定的端口一致
|
29 |
+
LLM_MODEL = "TGUI:galactica-1.3b@localhost:7860"
|
30 |
+
```
|
31 |
+
|
32 |
+
## 3. 运行!
|
33 |
+
``` sh
|
34 |
+
cd chatgpt-academic
|
35 |
+
python main.py
|
36 |
+
```
|
request_llm/bridge_chatgpt.py
ADDED
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 借鉴了 https://github.com/GaiZhenbiao/ChuanhuChatGPT 项目
|
2 |
+
|
3 |
+
"""
|
4 |
+
该文件中主要包含三个函数
|
5 |
+
|
6 |
+
不具备多线程能力的函数:
|
7 |
+
1. predict: 正常对话时使用,具备完备的交互功能,不可多线程
|
8 |
+
|
9 |
+
具备多线程调用能力的函数
|
10 |
+
2. predict_no_ui:高级实验性功能模块调用,不会实时显示在界面上,参数简单,可以多线程并行,方便实现复杂的功能逻辑
|
11 |
+
3. predict_no_ui_long_connection:在实验过程中发现调用predict_no_ui处理长文档时,和openai的连接容易断掉,这个函数用stream的方式解决这个问题,同样支持多线程
|
12 |
+
"""
|
13 |
+
|
14 |
+
import json
|
15 |
+
import time
|
16 |
+
import gradio as gr
|
17 |
+
import logging
|
18 |
+
import traceback
|
19 |
+
import requests
|
20 |
+
import importlib
|
21 |
+
|
22 |
+
# config_private.py放自己的秘密如API和代理网址
|
23 |
+
# 读取时首先看是否存在私密的config_private配置文件(不受git管控),如果有,则覆盖原config文件
|
24 |
+
from toolbox import get_conf
|
25 |
+
proxies, API_URL, API_KEY, TIMEOUT_SECONDS, MAX_RETRY, LLM_MODEL = \
|
26 |
+
get_conf('proxies', 'API_URL', 'API_KEY', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'LLM_MODEL')
|
27 |
+
|
28 |
+
timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
|
29 |
+
'网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'
|
30 |
+
|
31 |
+
def get_full_error(chunk, stream_response):
|
32 |
+
"""
|
33 |
+
获取完整的从Openai返回的报错
|
34 |
+
"""
|
35 |
+
while True:
|
36 |
+
try:
|
37 |
+
chunk += next(stream_response)
|
38 |
+
except:
|
39 |
+
break
|
40 |
+
return chunk
|
41 |
+
|
42 |
+
def predict_no_ui(inputs, top_p, temperature, history=[], sys_prompt=""):
|
43 |
+
"""
|
44 |
+
发送至chatGPT,等待回复,一次性完成,不显示中间过程。
|
45 |
+
predict函数的简化版。
|
46 |
+
用于payload比较大的情况,或者用于实现多线、带嵌套的复杂功能。
|
47 |
+
|
48 |
+
inputs 是本次问询的输入
|
49 |
+
top_p, temperature是chatGPT的内部调优参数
|
50 |
+
history 是之前的对话列表
|
51 |
+
(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误,然后raise ConnectionAbortedError)
|
52 |
+
"""
|
53 |
+
headers, payload = generate_payload(inputs, top_p, temperature, history, system_prompt=sys_prompt, stream=False)
|
54 |
+
|
55 |
+
retry = 0
|
56 |
+
while True:
|
57 |
+
try:
|
58 |
+
# make a POST request to the API endpoint, stream=False
|
59 |
+
response = requests.post(API_URL, headers=headers, proxies=proxies,
|
60 |
+
json=payload, stream=False, timeout=TIMEOUT_SECONDS*2); break
|
61 |
+
except requests.exceptions.ReadTimeout as e:
|
62 |
+
retry += 1
|
63 |
+
traceback.print_exc()
|
64 |
+
if retry > MAX_RETRY: raise TimeoutError
|
65 |
+
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
|
66 |
+
|
67 |
+
try:
|
68 |
+
result = json.loads(response.text)["choices"][0]["message"]["content"]
|
69 |
+
return result
|
70 |
+
except Exception as e:
|
71 |
+
if "choices" not in response.text: print(response.text)
|
72 |
+
raise ConnectionAbortedError("Json解析不合常规,可能是文本过长" + response.text)
|
73 |
+
|
74 |
+
|
75 |
+
def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_prompt="", observe_window=None):
|
76 |
+
"""
|
77 |
+
发送至chatGPT,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。
|
78 |
+
inputs:
|
79 |
+
是本次问询的输入
|
80 |
+
sys_prompt:
|
81 |
+
系统静默prompt
|
82 |
+
top_p, temperature:
|
83 |
+
chatGPT的内部调优参数
|
84 |
+
history:
|
85 |
+
是之前的对话列表
|
86 |
+
observe_window = None:
|
87 |
+
用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗
|
88 |
+
"""
|
89 |
+
watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
|
90 |
+
headers, payload = generate_payload(inputs, top_p, temperature, history, system_prompt=sys_prompt, stream=True)
|
91 |
+
retry = 0
|
92 |
+
while True:
|
93 |
+
try:
|
94 |
+
# make a POST request to the API endpoint, stream=False
|
95 |
+
response = requests.post(API_URL, headers=headers, proxies=proxies,
|
96 |
+
json=payload, stream=True, timeout=TIMEOUT_SECONDS); break
|
97 |
+
except requests.exceptions.ReadTimeout as e:
|
98 |
+
retry += 1
|
99 |
+
traceback.print_exc()
|
100 |
+
if retry > MAX_RETRY: raise TimeoutError
|
101 |
+
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
|
102 |
+
|
103 |
+
stream_response = response.iter_lines()
|
104 |
+
result = ''
|
105 |
+
while True:
|
106 |
+
try: chunk = next(stream_response).decode()
|
107 |
+
except StopIteration:
|
108 |
+
break
|
109 |
+
except requests.exceptions.ConnectionError:
|
110 |
+
chunk = next(stream_response).decode() # 失败了,重试一次��再失败就没办法了。
|
111 |
+
if len(chunk)==0: continue
|
112 |
+
if not chunk.startswith('data:'):
|
113 |
+
error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
|
114 |
+
if "reduce the length" in error_msg:
|
115 |
+
raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg)
|
116 |
+
else:
|
117 |
+
raise RuntimeError("OpenAI拒绝了请求:" + error_msg)
|
118 |
+
json_data = json.loads(chunk.lstrip('data:'))['choices'][0]
|
119 |
+
delta = json_data["delta"]
|
120 |
+
if len(delta) == 0: break
|
121 |
+
if "role" in delta: continue
|
122 |
+
if "content" in delta:
|
123 |
+
result += delta["content"]
|
124 |
+
print(delta["content"], end='')
|
125 |
+
if observe_window is not None:
|
126 |
+
# 观测窗,把已经获取的数据显示出去
|
127 |
+
if len(observe_window) >= 1: observe_window[0] += delta["content"]
|
128 |
+
# 看门狗,如果超过期限没有喂狗,则终止
|
129 |
+
if len(observe_window) >= 2:
|
130 |
+
if (time.time()-observe_window[1]) > watch_dog_patience:
|
131 |
+
raise RuntimeError("程序终止。")
|
132 |
+
else: raise RuntimeError("意外Json结构:"+delta)
|
133 |
+
if json_data['finish_reason'] == 'length':
|
134 |
+
raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。")
|
135 |
+
return result
|
136 |
+
|
137 |
+
|
138 |
+
def predict(inputs, top_p, temperature, chatbot=[], history=[], system_prompt='',
|
139 |
+
stream = True, additional_fn=None):
|
140 |
+
"""
|
141 |
+
发送至chatGPT,流式获取输出。
|
142 |
+
用于基础的对话功能。
|
143 |
+
inputs 是本次问询的输入
|
144 |
+
top_p, temperature是chatGPT的内部调优参数
|
145 |
+
history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误)
|
146 |
+
chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容
|
147 |
+
additional_fn代表点击的哪个按钮,按钮见functional.py
|
148 |
+
"""
|
149 |
+
if additional_fn is not None:
|
150 |
+
import core_functional
|
151 |
+
importlib.reload(core_functional) # 热更新prompt
|
152 |
+
core_functional = core_functional.get_core_functions()
|
153 |
+
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
|
154 |
+
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
|
155 |
+
|
156 |
+
if stream:
|
157 |
+
raw_input = inputs
|
158 |
+
logging.info(f'[raw_input] {raw_input}')
|
159 |
+
chatbot.append((inputs, ""))
|
160 |
+
yield chatbot, history, "等待响应"
|
161 |
+
|
162 |
+
headers, payload = generate_payload(inputs, top_p, temperature, history, system_prompt, stream)
|
163 |
+
history.append(inputs); history.append(" ")
|
164 |
+
|
165 |
+
retry = 0
|
166 |
+
while True:
|
167 |
+
try:
|
168 |
+
# make a POST request to the API endpoint, stream=True
|
169 |
+
response = requests.post(API_URL, headers=headers, proxies=proxies,
|
170 |
+
json=payload, stream=True, timeout=TIMEOUT_SECONDS);break
|
171 |
+
except:
|
172 |
+
retry += 1
|
173 |
+
chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
|
174 |
+
retry_msg = f",正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
|
175 |
+
yield chatbot, history, "请求超时"+retry_msg
|
176 |
+
if retry > MAX_RETRY: raise TimeoutError
|
177 |
+
|
178 |
+
gpt_replying_buffer = ""
|
179 |
+
|
180 |
+
is_head_of_the_stream = True
|
181 |
+
if stream:
|
182 |
+
stream_response = response.iter_lines()
|
183 |
+
while True:
|
184 |
+
chunk = next(stream_response)
|
185 |
+
# print(chunk.decode()[6:])
|
186 |
+
if is_head_of_the_stream:
|
187 |
+
# 数据流的第一帧不携带content
|
188 |
+
is_head_of_the_stream = False; continue
|
189 |
+
|
190 |
+
if chunk:
|
191 |
+
try:
|
192 |
+
if len(json.loads(chunk.decode()[6:])['choices'][0]["delta"]) == 0:
|
193 |
+
# 判定为数据流的结束,gpt_replying_buffer也写完了
|
194 |
+
logging.info(f'[response] {gpt_replying_buffer}')
|
195 |
+
break
|
196 |
+
# 处理数据流的主体
|
197 |
+
chunkjson = json.loads(chunk.decode()[6:])
|
198 |
+
status_text = f"finish_reason: {chunkjson['choices'][0]['finish_reason']}"
|
199 |
+
# 如果这里抛出异常,一般是文本过长,详情见get_full_error的输出
|
200 |
+
gpt_replying_buffer = gpt_replying_buffer + json.loads(chunk.decode()[6:])['choices'][0]["delta"]["content"]
|
201 |
+
history[-1] = gpt_replying_buffer
|
202 |
+
chatbot[-1] = (history[-2], history[-1])
|
203 |
+
yield chatbot, history, status_text
|
204 |
+
|
205 |
+
except Exception as e:
|
206 |
+
traceback.print_exc()
|
207 |
+
yield chatbot, history, "Json解析不合常规"
|
208 |
+
chunk = get_full_error(chunk, stream_response)
|
209 |
+
error_msg = chunk.decode()
|
210 |
+
if "reduce the length" in error_msg:
|
211 |
+
chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长,或历史数据过长. 历史缓存数据现已释放,您可以请再次尝试.")
|
212 |
+
history = [] # 清除历史
|
213 |
+
elif "Incorrect API key" in error_msg:
|
214 |
+
chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由,拒绝服务.")
|
215 |
+
elif "exceeded your current quota" in error_msg:
|
216 |
+
chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由,拒绝服务.")
|
217 |
+
else:
|
218 |
+
from toolbox import regular_txt_to_markdown
|
219 |
+
tb_str = '```\n' + traceback.format_exc() + '```'
|
220 |
+
chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk.decode()[4:])}")
|
221 |
+
yield chatbot, history, "Json异常" + error_msg
|
222 |
+
return
|
223 |
+
|
224 |
+
def generate_payload(inputs, top_p, temperature, history, system_prompt, stream):
|
225 |
+
"""
|
226 |
+
整合所有信息,选择LLM模型,生成http请求,为发送请求做准备
|
227 |
+
"""
|
228 |
+
headers = {
|
229 |
+
"Content-Type": "application/json",
|
230 |
+
"Authorization": f"Bearer {API_KEY}"
|
231 |
+
}
|
232 |
+
|
233 |
+
conversation_cnt = len(history) // 2
|
234 |
+
|
235 |
+
messages = [{"role": "system", "content": system_prompt}]
|
236 |
+
if conversation_cnt:
|
237 |
+
for index in range(0, 2*conversation_cnt, 2):
|
238 |
+
what_i_have_asked = {}
|
239 |
+
what_i_have_asked["role"] = "user"
|
240 |
+
what_i_have_asked["content"] = history[index]
|
241 |
+
what_gpt_answer = {}
|
242 |
+
what_gpt_answer["role"] = "assistant"
|
243 |
+
what_gpt_answer["content"] = history[index+1]
|
244 |
+
if what_i_have_asked["content"] != "":
|
245 |
+
if what_gpt_answer["content"] == "": continue
|
246 |
+
if what_gpt_answer["content"] == timeout_bot_msg: continue
|
247 |
+
messages.append(what_i_have_asked)
|
248 |
+
messages.append(what_gpt_answer)
|
249 |
+
else:
|
250 |
+
messages[-1]['content'] = what_gpt_answer['content']
|
251 |
+
|
252 |
+
what_i_ask_now = {}
|
253 |
+
what_i_ask_now["role"] = "user"
|
254 |
+
what_i_ask_now["content"] = inputs
|
255 |
+
messages.append(what_i_ask_now)
|
256 |
+
|
257 |
+
payload = {
|
258 |
+
"model": LLM_MODEL,
|
259 |
+
"messages": messages,
|
260 |
+
"temperature": temperature, # 1.0,
|
261 |
+
"top_p": top_p, # 1.0,
|
262 |
+
"n": 1,
|
263 |
+
"stream": stream,
|
264 |
+
"presence_penalty": 0,
|
265 |
+
"frequency_penalty": 0,
|
266 |
+
}
|
267 |
+
|
268 |
+
print(f" {LLM_MODEL} : {conversation_cnt} : {inputs}")
|
269 |
+
return headers,payload
|
270 |
+
|
271 |
+
|
request_llm/bridge_tgui.py
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''
|
2 |
+
Contributed by SagsMug. Modified by binary-husky
|
3 |
+
https://github.com/oobabooga/text-generation-webui/pull/175
|
4 |
+
'''
|
5 |
+
|
6 |
+
import asyncio
|
7 |
+
import json
|
8 |
+
import random
|
9 |
+
import string
|
10 |
+
import websockets
|
11 |
+
import logging
|
12 |
+
import time
|
13 |
+
import threading
|
14 |
+
import importlib
|
15 |
+
from toolbox import get_conf
|
16 |
+
LLM_MODEL, = get_conf('LLM_MODEL')
|
17 |
+
|
18 |
+
# "TGUI:galactica-1.3b@localhost:7860"
|
19 |
+
model_name, addr_port = LLM_MODEL.split('@')
|
20 |
+
assert ':' in addr_port, "LLM_MODEL 格式不正确!" + LLM_MODEL
|
21 |
+
addr, port = addr_port.split(':')
|
22 |
+
|
23 |
+
def random_hash():
|
24 |
+
letters = string.ascii_lowercase + string.digits
|
25 |
+
return ''.join(random.choice(letters) for i in range(9))
|
26 |
+
|
27 |
+
async def run(context, max_token=512):
|
28 |
+
params = {
|
29 |
+
'max_new_tokens': max_token,
|
30 |
+
'do_sample': True,
|
31 |
+
'temperature': 0.5,
|
32 |
+
'top_p': 0.9,
|
33 |
+
'typical_p': 1,
|
34 |
+
'repetition_penalty': 1.05,
|
35 |
+
'encoder_repetition_penalty': 1.0,
|
36 |
+
'top_k': 0,
|
37 |
+
'min_length': 0,
|
38 |
+
'no_repeat_ngram_size': 0,
|
39 |
+
'num_beams': 1,
|
40 |
+
'penalty_alpha': 0,
|
41 |
+
'length_penalty': 1,
|
42 |
+
'early_stopping': True,
|
43 |
+
'seed': -1,
|
44 |
+
}
|
45 |
+
session = random_hash()
|
46 |
+
|
47 |
+
async with websockets.connect(f"ws://{addr}:{port}/queue/join") as websocket:
|
48 |
+
while content := json.loads(await websocket.recv()):
|
49 |
+
#Python3.10 syntax, replace with if elif on older
|
50 |
+
if content["msg"] == "send_hash":
|
51 |
+
await websocket.send(json.dumps({
|
52 |
+
"session_hash": session,
|
53 |
+
"fn_index": 12
|
54 |
+
}))
|
55 |
+
elif content["msg"] == "estimation":
|
56 |
+
pass
|
57 |
+
elif content["msg"] == "send_data":
|
58 |
+
await websocket.send(json.dumps({
|
59 |
+
"session_hash": session,
|
60 |
+
"fn_index": 12,
|
61 |
+
"data": [
|
62 |
+
context,
|
63 |
+
params['max_new_tokens'],
|
64 |
+
params['do_sample'],
|
65 |
+
params['temperature'],
|
66 |
+
params['top_p'],
|
67 |
+
params['typical_p'],
|
68 |
+
params['repetition_penalty'],
|
69 |
+
params['encoder_repetition_penalty'],
|
70 |
+
params['top_k'],
|
71 |
+
params['min_length'],
|
72 |
+
params['no_repeat_ngram_size'],
|
73 |
+
params['num_beams'],
|
74 |
+
params['penalty_alpha'],
|
75 |
+
params['length_penalty'],
|
76 |
+
params['early_stopping'],
|
77 |
+
params['seed'],
|
78 |
+
]
|
79 |
+
}))
|
80 |
+
elif content["msg"] == "process_starts":
|
81 |
+
pass
|
82 |
+
elif content["msg"] in ["process_generating", "process_completed"]:
|
83 |
+
yield content["output"]["data"][0]
|
84 |
+
# You can search for your desired end indicator and
|
85 |
+
# stop generation by closing the websocket here
|
86 |
+
if (content["msg"] == "process_completed"):
|
87 |
+
break
|
88 |
+
|
89 |
+
|
90 |
+
|
91 |
+
|
92 |
+
|
93 |
+
def predict_tgui(inputs, top_p, temperature, chatbot=[], history=[], system_prompt='', stream = True, additional_fn=None):
|
94 |
+
"""
|
95 |
+
发送至chatGPT,流式获取输出。
|
96 |
+
用于基础的对话功能。
|
97 |
+
inputs 是本次问询的输入
|
98 |
+
top_p, temperature是chatGPT的内部调优参数
|
99 |
+
history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误)
|
100 |
+
chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容
|
101 |
+
additional_fn代表点击的哪个按钮,按钮见functional.py
|
102 |
+
"""
|
103 |
+
if additional_fn is not None:
|
104 |
+
import core_functional
|
105 |
+
importlib.reload(core_functional) # 热更新prompt
|
106 |
+
core_functional = core_functional.get_core_functions()
|
107 |
+
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
|
108 |
+
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
|
109 |
+
|
110 |
+
raw_input = "What I would like to say is the following: " + inputs
|
111 |
+
logging.info(f'[raw_input] {raw_input}')
|
112 |
+
history.extend([inputs, ""])
|
113 |
+
chatbot.append([inputs, ""])
|
114 |
+
yield chatbot, history, "等待响应"
|
115 |
+
|
116 |
+
prompt = inputs
|
117 |
+
tgui_say = ""
|
118 |
+
|
119 |
+
mutable = ["", time.time()]
|
120 |
+
def run_coorotine(mutable):
|
121 |
+
async def get_result(mutable):
|
122 |
+
async for response in run(prompt):
|
123 |
+
print(response[len(mutable[0]):])
|
124 |
+
mutable[0] = response
|
125 |
+
if (time.time() - mutable[1]) > 3:
|
126 |
+
print('exit when no listener')
|
127 |
+
break
|
128 |
+
asyncio.run(get_result(mutable))
|
129 |
+
|
130 |
+
thread_listen = threading.Thread(target=run_coorotine, args=(mutable,), daemon=True)
|
131 |
+
thread_listen.start()
|
132 |
+
|
133 |
+
while thread_listen.is_alive():
|
134 |
+
time.sleep(1)
|
135 |
+
mutable[1] = time.time()
|
136 |
+
# Print intermediate steps
|
137 |
+
if tgui_say != mutable[0]:
|
138 |
+
tgui_say = mutable[0]
|
139 |
+
history[-1] = tgui_say
|
140 |
+
chatbot[-1] = (history[-2], history[-1])
|
141 |
+
yield chatbot, history, "status_text"
|
142 |
+
|
143 |
+
logging.info(f'[response] {tgui_say}')
|
144 |
+
|
145 |
+
|
146 |
+
|
147 |
+
def predict_tgui_no_ui(inputs, top_p, temperature, history=[], sys_prompt=""):
|
148 |
+
raw_input = "What I would like to say is the following: " + inputs
|
149 |
+
prompt = inputs
|
150 |
+
tgui_say = ""
|
151 |
+
mutable = ["", time.time()]
|
152 |
+
def run_coorotine(mutable):
|
153 |
+
async def get_result(mutable):
|
154 |
+
async for response in run(prompt, max_token=20):
|
155 |
+
print(response[len(mutable[0]):])
|
156 |
+
mutable[0] = response
|
157 |
+
if (time.time() - mutable[1]) > 3:
|
158 |
+
print('exit when no listener')
|
159 |
+
break
|
160 |
+
asyncio.run(get_result(mutable))
|
161 |
+
thread_listen = threading.Thread(target=run_coorotine, args=(mutable,))
|
162 |
+
thread_listen.start()
|
163 |
+
while thread_listen.is_alive():
|
164 |
+
time.sleep(1)
|
165 |
+
mutable[1] = time.time()
|
166 |
+
tgui_say = mutable[0]
|
167 |
+
return tgui_say
|
requirements.txt
CHANGED
@@ -1,5 +1,13 @@
|
|
1 |
gradio>=3.23
|
2 |
requests[socks]
|
|
|
|
|
|
|
|
|
3 |
mdtex2html
|
|
|
4 |
Markdown
|
5 |
-
|
|
|
|
|
|
|
|
1 |
gradio>=3.23
|
2 |
requests[socks]
|
3 |
+
transformers
|
4 |
+
python-markdown-math
|
5 |
+
beautifulsoup4
|
6 |
+
latex2mathml
|
7 |
mdtex2html
|
8 |
+
tiktoken
|
9 |
Markdown
|
10 |
+
pygments
|
11 |
+
pymupdf
|
12 |
+
openai
|
13 |
+
numpy
|
self_analysis.md
ADDED
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# chatgpt-academic项目自译解报告
|
2 |
+
(Author补充:以下分析均由本项目调用ChatGPT一键生成,如果有不准确的地方,全怪GPT😄)
|
3 |
+
|
4 |
+
## 对程序的整体功能和构架做出概括。然后用一张markdown表格整理每个文件的功能(包括'check_proxy.py', 'config.py'等)。
|
5 |
+
|
6 |
+
整体概括:
|
7 |
+
|
8 |
+
该程序是一个基于自然语言处理和机器学习的科学论文辅助工具,主要功能包括聊天机器人、批量总结PDF文档、批量翻译PDF文档、生成函数注释、解析项目源代码等。程序基于 Gradio 构建 Web 服务,并集成了代理和自动更新功能,提高了用户的使用体验。
|
9 |
+
|
10 |
+
文件功能表格:
|
11 |
+
|
12 |
+
| 文件名称 | 功能 |
|
13 |
+
| ------------------------------------------------------------ | ------------------------------------------------------------ |
|
14 |
+
| .\check_proxy.py | 检查代理设置功能。 |
|
15 |
+
| .\config.py | 配置文件,存储程序的基本设置。 |
|
16 |
+
| .\config_private.py | 存储代理网络地址的文件。 |
|
17 |
+
| .\core_functional.py | 主要的程序逻辑,包括聊天机器人和文件处理。 |
|
18 |
+
| .\cradle.py | 程序入口,初始化程序和启动 Web 服务。 |
|
19 |
+
| .\crazy_functional.py | 辅助程序功能,包括PDF文档处理、代码处理、函数注释生成等。 |
|
20 |
+
| .\main.py | 包含聊天机器人的具体实现。 |
|
21 |
+
| .\show_math.py | 处理 LaTeX 公式的函数。 |
|
22 |
+
| .\theme.py | 存储 Gradio Web 服务的 CSS 样式文件。 |
|
23 |
+
| .\toolbox.py | 提供了一系列工具函数,包括文件读写、网页抓取、解析函数参数、生成 HTML 等。 |
|
24 |
+
| ./crazy_functions/crazy_utils.py | 提供各种工具函数,如解析字符串、清洗文本、清理目录结构等。 |
|
25 |
+
| ./crazy_functions/\_\_init\_\_.py | crazy_functions 模块的入口文件。 |
|
26 |
+
| ./crazy_functions/下载arxiv论文翻译摘要.py | 对 arxiv.org 上的 PDF 论文进行下载和翻译。 |
|
27 |
+
| ./crazy_functions/代码重写为全英文_多线程.py | 将代码文件中的中文注释和字符串替换为英文。 |
|
28 |
+
| ./crazy_functions/总结word文档.py | 读取 Word 文档并生成摘要。 |
|
29 |
+
| ./crazy_functions/批量总结PDF文档.py | 批量读取 PDF 文件并生成摘要。 |
|
30 |
+
| ./crazy_functions/批量总结PDF文档pdfminer.py | 使用 pdfminer 库进行 PDF 文件处理。 |
|
31 |
+
| ./crazy_functions/批量翻译PDF文档_多线程.py | 使用多线程技术批量翻译 PDF 文件。 |
|
32 |
+
| ./crazy_functions/生成函数注释.py | 给 Python 函数自动生成说明文档。 |
|
33 |
+
| ./crazy_functions/解析项目源代码.py | 解析项目中的源代码,提取注释和函数名等信息。 |
|
34 |
+
| ./crazy_functions/读文章写摘要.py | 读取多个文本文件并生成对应的摘要。 |
|
35 |
+
| ./crazy_functions/高级功能函数模板.py | 使用 GPT 模型进行文本处理。 |
|
36 |
+
|
37 |
+
|
38 |
+
|
39 |
+
## [0/22] 程序概述: check_proxy.py
|
40 |
+
|
41 |
+
该程序的文件名是check_proxy.py,主要有两个函数:check_proxy和auto_update。
|
42 |
+
|
43 |
+
check_proxy函数中会借助requests库向一个IP查询API发送请求,并返回该IP的地理位置信息。同时根据返回的数据来判断代理是否有效。
|
44 |
+
|
45 |
+
auto_update函数主要用于检查程序更新,会从Github获取程序最新的版本信息,如果当前版本和最新版本相差较大,则会提示用户进行更新。该函数中也会依赖requests库进行网络请求。
|
46 |
+
|
47 |
+
在程序的开头,还添加了一句防止代理网络影响的代码。程序使用了自己编写的toolbox模块中的get_conf函数来获取代理设置。
|
48 |
+
|
49 |
+
## [1/22] 程序概述: config.py
|
50 |
+
|
51 |
+
该程序文件是一个Python模块,文件名为config.py。该模块包含了一些变量和配置选项,用���配置一个OpenAI的聊天机器人。具体的配置选项如下:
|
52 |
+
|
53 |
+
- API_KEY: 密钥,用于连接OpenAI的API。需要填写有效的API密钥。
|
54 |
+
- USE_PROXY: 是否使用代理。如果需要使用代理,需要将其改为True。
|
55 |
+
- proxies: 代理的协议、地址和端口。
|
56 |
+
- CHATBOT_HEIGHT: 聊天机器人对话框的高度。
|
57 |
+
- LAYOUT: 聊天机器人对话框的布局,默认为左右布局。
|
58 |
+
- TIMEOUT_SECONDS: 发送请求到OpenAI后,等待多久判定为超时。
|
59 |
+
- WEB_PORT: 网页的端口,-1代表随机端口。
|
60 |
+
- MAX_RETRY: 如果OpenAI不响应(网络卡顿、代理失败、KEY失效),重试的次数限制。
|
61 |
+
- LLM_MODEL: OpenAI模型选择,目前只对某些用户开放的gpt4。
|
62 |
+
- API_URL: OpenAI的API地址。
|
63 |
+
- CONCURRENT_COUNT: 使用的线程数。
|
64 |
+
- AUTHENTICATION: 用户名和密码,如果需要。
|
65 |
+
|
66 |
+
## [2/22] 程序概述: config_private.py
|
67 |
+
|
68 |
+
该程序文件名为config_private.py,包含了API_KEY的设置和代理的配置。使用了一个名为API_KEY的常量来存储私人的API密钥。此外,还有一个名为USE_PROXY的常量来标记是否需要使用代理。如果需要代理,则使用了一个名为proxies的字典来存储代理网络的地址,其中包括协议类型、地址和端口。
|
69 |
+
|
70 |
+
## [3/22] 程序概述: core_functional.py
|
71 |
+
|
72 |
+
该程序文件名为`core_functional.py`,主要是定义了一些核心功能函数,包括英语和中文学术润色、查找语法错误、中译英、学术中英互译、英译中、找图片和解释代码等。每个功能都有一个`Prefix`属性和`Suffix`属性,`Prefix`是指在用户输入的任务前面要显示的文本,`Suffix`是指在任务后面要显示的文本。此外,还有一个`Color`属性指示按钮的颜色,以及一个`PreProcess`函数表示对输入进行预处理的函数。
|
73 |
+
|
74 |
+
## [4/22] 程序概述: cradle.py
|
75 |
+
|
76 |
+
该程序文件名为cradle.py,主要功能是检测当前版本与远程最新版本是否一致,如果不一致则输出新版本信息并提示更新。其流程大致如下:
|
77 |
+
|
78 |
+
1. 导入相关模块与自定义工具箱函数get_conf
|
79 |
+
2. 读取配置文件中的代理proxies
|
80 |
+
3. 使用requests模块请求远程版本信息(url为https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version)并加载为json格式
|
81 |
+
4. 获取远程版本号、是否显示新功能信息、新功能内容
|
82 |
+
5. 读取本地版本文件version并加载为json格式
|
83 |
+
6. 获取当前版本号
|
84 |
+
7. 比较当前版本与远程版本,如果远程版本号比当前版本号高0.05以上,则输出新版本信息并提示更新
|
85 |
+
8. 如果不需要更新,则直接返回
|
86 |
+
|
87 |
+
## [5/22] 程序概述: crazy_functional.py
|
88 |
+
|
89 |
+
该程序文件名为.\crazy_functional.py,主要定义了一个名为get_crazy_functions()的函数,该函数返回一个字典类型的变量function_plugins,其中包含了一些函数插件。
|
90 |
+
|
91 |
+
一些重要的函数插件包括:
|
92 |
+
|
93 |
+
- 读文章写摘要:可以自动读取Tex格式的论文,并生成其摘要。
|
94 |
+
|
95 |
+
- 批量生成函数注释:可以批量生成Python函数的文档注释。
|
96 |
+
|
97 |
+
- 解析项目源代码:可以解析Python、C++、Golang、Java及React项目的源代码。
|
98 |
+
|
99 |
+
- 批量总结PDF文档:可以对PDF文档进行批量总结,以提取其中的关键信息。
|
100 |
+
|
101 |
+
- 一键下载arxiv论文并翻译摘要:可以自动下载arxiv.org网站上的PDF论文,并翻译生成其摘要。
|
102 |
+
|
103 |
+
- 批量翻译PDF文档(多线程):可以对PDF文档进行批量翻译,并使用多线程方式提高翻译效率。
|
104 |
+
|
105 |
+
## [6/22] 程序概述: main.py
|
106 |
+
|
107 |
+
本程序为一个基于 Gradio 和 GPT-3 的交互式聊天机器人,文件名为 main.py。其中主要功能包括:
|
108 |
+
|
109 |
+
1. 使用 Gradio 建立 Web 界面,实现用户与聊天机器人的交互;
|
110 |
+
2. 通过 bridge_chatgpt 模块,利用 GPT-3 模型实现聊天机器人的逻辑;
|
111 |
+
3. 提供一些基础功能和高级函数插件,用户可以通过按钮选择使用;
|
112 |
+
4. 提供文档格式转变、外观调整以及代理和自动更新等功能。
|
113 |
+
|
114 |
+
程序的主要流程为:
|
115 |
+
|
116 |
+
1. 导入所需的库和模块,并通过 get_conf 函数获取配置信息;
|
117 |
+
2. 设置 Gradio 界面的各个组件,包括聊天窗口、输入区、功能区、函数插件区等;
|
118 |
+
3. 注册各个组件的回调函数,包括用户输入、信号按钮等,实现机器人逻辑的交互;
|
119 |
+
4. 通过 Gradio 的 queue 函数和 launch 函数启动 Web 服务,并提供聊天机器人的功能。
|
120 |
+
|
121 |
+
此外,程序还提供了代理和自动更新功能,可以确保用户的使用体验。
|
122 |
+
|
123 |
+
## [7/22] 程序概述: show_math.py
|
124 |
+
|
125 |
+
该程序是一个Python脚本,文件名为show_math.py。它转换Markdown和LaTeX混合语法到带MathML的HTML。程序使用latex2mathml模块来实现从LaTeX到MathML的转换,将符号转换为HTML实体以批量处理。程序利用正则表达式和递归函数的方法处理不同形式的LaTeX语法,支持以下四种情况:$$形式、$形式、\[..]形式和\(...\)形式。如果无法转��某个公式,则在该位置插入一条错误消息。最后,程序输出HTML字符串。
|
126 |
+
|
127 |
+
## [8/22] 程序概述: theme.py
|
128 |
+
|
129 |
+
该程序文件为一个Python脚本,其功能是调整Gradio应用的主题和样式,包括字体、颜色、阴影、背景等等。在程序中,使用了Gradio提供的默认颜色主题,并针对不同元素设置了相应的样式属性,以达到美化显示的效果。此外,程序中还包含了一段高级CSS样式代码,针对表格、列表、聊天气泡、行内代码等元素进行了样式设定。
|
130 |
+
|
131 |
+
## [9/22] 程序概述: toolbox.py
|
132 |
+
|
133 |
+
此程序文件主要包含了一系列用于聊天机器人开发的实用工具函数和装饰器函数。主要函数包括:
|
134 |
+
|
135 |
+
1. ArgsGeneralWrapper:一个装饰器函数,用于重组输入参数,改变输入参数的顺序与结构。
|
136 |
+
|
137 |
+
2. get_reduce_token_percent:一个函数,用于计算自然语言处理时会出现的token溢出比例。
|
138 |
+
|
139 |
+
3. predict_no_ui_but_counting_down:一个函数,调用聊天接口,并且保留了一定的界面心跳功能,即当对话太长时,会自动采用二分法截断。
|
140 |
+
|
141 |
+
4. write_results_to_file:一个函数,将对话记录history生成Markdown格式的文本,并写入文件中。
|
142 |
+
|
143 |
+
5. regular_txt_to_markdown:一个函数,将普通文本转换为Markdown格式的文本。
|
144 |
+
|
145 |
+
6. CatchException:一个装饰器函数,捕捉函数调度中的异常,并封装到一个生成器中返回,并显示到聊天当中。
|
146 |
+
|
147 |
+
7. HotReload:一个装饰器函数,实现函数插件的热更新。
|
148 |
+
|
149 |
+
8. report_execption:一个函数,向chatbot中添加错误信息。
|
150 |
+
|
151 |
+
9. text_divide_paragraph:一个函数,将文本按照段落分隔符分割开,生成带有段落标签的HTML代码。
|
152 |
+
|
153 |
+
10. markdown_convertion:一个函数,将Markdown格式的文本转换为HTML格式。如果包含数学公式,则先将公式转换为HTML格式。
|
154 |
+
|
155 |
+
11. close_up_code_segment_during_stream:一个函数,用于在gpt输出代码的中途,即输出了前面的```,但还没输出完后面的```,补上后面的```。
|
156 |
+
|
157 |
+
12. format_io:一个函数,将输入和输出解析为HTML格式。将输出部分的Markdown和数学公式转换为HTML格式。
|
158 |
+
|
159 |
+
13. find_free_port:一个函数,返回当前系统中可用的未使用端口。
|
160 |
+
|
161 |
+
14. extract_archive:一个函数,解压缩文件。
|
162 |
+
|
163 |
+
15. find_recent_files:一个函数,查找目录下一分钟内创建的文件。
|
164 |
+
|
165 |
+
16. on_file_uploaded:一个函数,响应用户上传的文件。
|
166 |
+
|
167 |
+
## [10/22] 程序概述: crazy_functions\crazy_utils.py
|
168 |
+
|
169 |
+
这是一个名为"crazy_utils.py"的Python程序文件,包含了两个函数:
|
170 |
+
1. `breakdown_txt_to_satisfy_token_limit()`:接受文本字符串、计算文本单词数量的函数和单词数量限制作为输入参数,将长文本拆分成合适的长度,以满足单词数量限制。这个函数使用一个递归方法去拆分长文本。
|
171 |
+
2. `breakdown_txt_to_satisfy_token_limit_for_pdf()`:类似于`breakdown_txt_to_satisfy_token_limit()`,但是它使用一个不同的递归方法来拆分长文本,以满足PDF文档中的需求。当出现无法继续拆分的情况时,该函数将使用一个中文句号标记插入文本来截断长文本。如果还是无法拆分,则会引发运行时异常。
|
172 |
+
|
173 |
+
## [11/22] 程序概述: crazy_functions\__init__.py
|
174 |
+
|
175 |
+
这个程序文件是一个 Python 的包,包名为 "crazy_functions",并且是其中的一个子模块 "__init__.py"。该包中可能包含多个函数或类,用于实现各种疯狂的功能。由于该文件的具体代码没有给出,因此无法进一步确定该包中的功能。通常情况下,一个包应该具有 __init__.py、__main__.py 和其它相关的模块文件,用于实现该包的各种功能。
|
176 |
+
|
177 |
+
## [12/22] 程序概述: crazy_functions\下载arxiv论文翻译摘要.py
|
178 |
+
|
179 |
+
这个程序实现的功能是下载arxiv论文并翻译摘要,文件名为`下载arxiv论文翻译摘要.py`。这个程序引入了`requests`、`unicodedata`、`os`、`re`等Python标准库,以及`pdfminer`、`bs4`等第三方库。其中`download_arxiv_`函数主要实现了从arxiv网站下载论文的功能,包括解析链接、获取论文信息、下载论文和生成文件名等,`get_name`函数则是为了从arxiv网站中获取论文信息创建的辅助函数。`下载arxiv论文并翻译摘要`函数则是实现了从下载好的PDF文件中提取摘要,然后使用预先训练的GPT模型翻译为中文的功能。同时,该函数还会将历史记录写入文件中。函数还会通过`CatchException`函数来捕获程序中出现的异常信息。
|
180 |
+
|
181 |
+
## [13/22] 程序概述: crazy_functions\代码重写为全英文_多线程.py
|
182 |
+
|
183 |
+
该程序文件为一个Python多线程程序,文件名为"crazy_functions\代码重写为全英文_多线程.py"。该程序使用了多线程技术,将一个大任务拆成多个小任务,同时执行,提高运行效率。
|
184 |
+
|
185 |
+
程序的主要功能是将Python文件中的中文转换为英文,同时���转换后的代码输出。程序先清空历史记录,然后尝试导入openai和transformers等依赖库。程序接下来会读取当前路径下的.py文件和crazy_functions文件夹中的.py文件,并将其整合成一个文件清单。随后程序会使用GPT2模型进行中英文的翻译,并将结果保存在本地路径下的"gpt_log/generated_english_version"文件夹中。程序最终会生成一个任务执行报告。
|
186 |
+
|
187 |
+
需要注意的是,该程序依赖于"request_llm"和"toolbox"库以及本地的"crazy_utils"模块。
|
188 |
+
|
189 |
+
## [14/22] 程序概述: crazy_functions\总结word文档.py
|
190 |
+
|
191 |
+
该程序文件是一个 Python 脚本文件,文件名为 ./crazy_functions/总结word文档.py。该脚本是一个函数插件,提供了名为“总结word文档”的函数。该函数的主要功能是批量读取给定文件夹下的 Word 文档文件,并使用 GPT 模型生成对每个文件的概述和意见建议。其中涉及到了读取 Word 文档、使用 GPT 模型等操作,依赖于许多第三方库。该文件也提供了导入依赖的方法,使用该脚本需要安装依赖库 python-docx 和 pywin32。函数功能实现的过程中,使用了一些用于调试的变量(如 fast_debug),可在需要时设置为 True。该脚本文件也提供了对程序功能和贡献者的注释。
|
192 |
+
|
193 |
+
## [15/22] 程序概述: crazy_functions\批量总结PDF文档.py
|
194 |
+
|
195 |
+
该程序文件名为 `./crazy_functions\批量总结PDF文档.py`,主要实现了批量处理PDF文档的功能。具体实现了以下几个函数:
|
196 |
+
|
197 |
+
1. `is_paragraph_break(match)`:根据给定的匹配结果判断换行符是否表示段落分隔。
|
198 |
+
2. `normalize_text(text)`:通过将文本特殊符号转换为其基本形式来对文本进行归一化处理。
|
199 |
+
3. `clean_text(raw_text)`:对从 PDF 提取出的原始文本进行清洗和格式化处理。
|
200 |
+
4. `解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)`:对给定的PDF文件进行分析并生成相应的概述。
|
201 |
+
5. `批量总结PDF文档(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT)`:批量处理PDF文件,对其进行摘要生成。
|
202 |
+
|
203 |
+
其中,主要用到了第三方库`pymupdf`对PDF文件进行处理。程序通过调用`fitz.open`函数打开PDF文件,使用`page.get_text()`方法获取PDF文本内容。然后,使用`clean_text`函数对文本进行清洗和格式化处理,生成最终的摘要。最后,调用`write_results_to_file`函数将历史记录写入文件并输出。
|
204 |
+
|
205 |
+
## [16/22] 程序概述: crazy_functions\批量总结PDF文档pdfminer.py
|
206 |
+
|
207 |
+
这个程序文件名是./crazy_functions\批量总结PDF文档pdfminer.py,是一个用于批量读取PDF文件,解析其中的内容,并对其进行概括的程序。程序中引用了pdfminer和beautifulsoup4等Python库,读取PDF文件并将其转化为文本内容,然后利用GPT模型生成摘要语言,最终输出一个中文和英文的摘要。程序还有一些错误处理的代码,会输出错误信息。
|
208 |
+
|
209 |
+
## [17/22] 程序概述: crazy_functions\批量翻译PDF文档_多线程.py
|
210 |
+
|
211 |
+
这是一个 Python 程序文件,文件名为 `批量翻译PDF文档_多线程.py`,包含多个函数。主要功能是批量处理 PDF 文档,解析其中的文本,进行清洗和格式化处理,并使用 OpenAI 的 GPT 模型进行翻译。其中使用了多线程技术来提高程序的效率和并行度。
|
212 |
+
|
213 |
+
## [18/22] 程序概述: crazy_functions\生成函数注释.py
|
214 |
+
|
215 |
+
该程序文件名为./crazy_functions\生成函数注释.py。该文件包含两个函数,分别为`生成函数注释`和`批量生成函数注释`。
|
216 |
+
|
217 |
+
函数`生成函数注释`包含参数`file_manifest`、`project_folder`、`top_p`、`temperature`、`chatbot`、`history`和`systemPromptTxt`。其中,`file_manifest`为一个包含待处理文件路径的列表,`project_folder`表示项目文件夹路径,`top_p`和`temperature`是GPT模型参数,`chatbot`为与用户交互的聊天机器人,`history`记录聊天机器人与用户的历史记录,`systemPromptTxt`为聊天机器人发送信息前的提示语。`生成函数注释`通过读取文件内容,并调用GPT模型对文件中的所有函数生成注释,最后使用markdown表格输出结果。函数中还包含一些条件判断和计时器,以及调用其他自定义模块的函数。
|
218 |
+
|
219 |
+
函数`批量生成函数注释`包含参数`txt`、`top_p`、`temperature`、`chatbot`、`history`、`systemPromptTxt`和`WEB_PORT`。其中,`txt`表示用户输入的项目文件夹路径,其他参数含义与`生成函数注释`中相同。`批量生成函数注释`主要是通过解析项目文件夹,获取所有待处理文件的路径,并调用函数`生成函数注释`对每个文件进行处理,最终生成注释表格输出给用户。
|
220 |
+
|
221 |
+
## [19/22] 程序概述: crazy_functions\解析项目源代码.py
|
222 |
+
|
223 |
+
该程序文件包含了多个函数,用于解析不同类型的项目,如Python项目、C项目、Java项目等。其中,最核心的函��是`解析源代码()`,它会对给定的一组文件进行分析,并返回对应的结果。具体流程如下:
|
224 |
+
|
225 |
+
1. 遍历所有待分析的文件,对每个文件进行如下处理:
|
226 |
+
|
227 |
+
1.1 从文件中读取代码内容,构造成一个字符串。
|
228 |
+
|
229 |
+
1.2 构造一条GPT请求,向`predict_no_ui_but_counting_down()`函数发送请求,等待GPT回复。
|
230 |
+
|
231 |
+
1.3 将GPT回复添加到机器人会话列表中,更新历史记录。
|
232 |
+
|
233 |
+
1.4 如果不是快速调试模式,则等待2秒钟,继续分析下一个文件。
|
234 |
+
|
235 |
+
2. 如果所有文件都分析完成,则向机器人会话列表中添加一条新消息,提示用户整个分析过程已经结束。
|
236 |
+
|
237 |
+
3. 返回机器人会话列表和历史记录。
|
238 |
+
|
239 |
+
除此之外,该程序文件还定义了若干个函数,用于针对不同类型的项目进行解析。这些函数会按照不同的方式调用`解析源代码()`函数。例如,对于Python项目,只需要分析.py文件;对于C项目,需要同时分析.h和.cpp文件等。每个函数中都会首先根据给定的项目路径读取相应的文件,然后调用`解析源代码()`函数进行分析。
|
240 |
+
|
241 |
+
## [20/22] 程序概述: crazy_functions\读文章写摘要.py
|
242 |
+
|
243 |
+
该程序文件为一个名为“读文章写摘要”的Python函数,用于解析项目文件夹中的.tex文件,并使用GPT模型生成文章的中英文摘要。函数使用了request_llm.bridge_chatgpt和toolbox模块中的函数,并包含两个子函数:解析Paper和CatchException。函数参数包括txt,top_p,temperature,chatbot,history,systemPromptTxt和WEB_PORT。执行过程中函数首先清空历史,然后根据项目文件夹中的.tex文件列表,对每个文件调用解析Paper函数生成中文摘要,最后根据所有文件的中文摘要,调用GPT模型生成英文摘要。函数运行过程中会将结果写入文件并返回聊天机器人和历史记录。
|
244 |
+
|
245 |
+
## [21/22] 程序概述: crazy_functions\高级功能函数模板.py
|
246 |
+
|
247 |
+
该程序文件为一个高级功能函数模板,文件名为"./crazy_functions\高级功能函数模板.py"。
|
248 |
+
|
249 |
+
该文件导入了两个模块,分别是"request_llm.bridge_chatgpt"和"toolbox"。其中"request_llm.bridge_chatgpt"模块包含了一个函数"predict_no_ui_long_connection",该函数用于请求GPT模型进行对话生成。"toolbox"模块包含了三个函数,分别是"catchException"、"report_exception"和"write_results_to_file"函数,这三个函数主要用于异常处理和日志记录等。
|
250 |
+
|
251 |
+
该文件定义了一个名为"高阶功能模板函数"的函数,并通过"decorator"装饰器将该函数装饰为一个异常处理函数,可以处理函数执行过程中出现的错误。该函数的作用是生成历史事件查询的问题,并向用户询问历史中哪些事件发生在指定日期,并索要相关图片。在查询完所有日期后,该函数返回所有历史事件及其相关图片的列表。其中,该函数的输入参数包括:
|
252 |
+
|
253 |
+
1. txt: 一个字符串,表示当前消息的文本内容。
|
254 |
+
2. top_p: 一个浮点数,表示GPT模型生成文本时的"top_p"参数。
|
255 |
+
3. temperature: 一个浮点数,表示GPT模型生成文本时的"temperature"参数。
|
256 |
+
4. chatbot: 一个列表,表示当前对话的记录列表。
|
257 |
+
5. history: 一个列表,表示当前对话的历史记录列表。
|
258 |
+
6. systemPromptTxt: 一个字符串,表示当前对话的系统提示信息。
|
259 |
+
7. WEB_PORT: 一个整数,表示当前应用程序的WEB端口号。
|
260 |
+
|
261 |
+
该函数在执行过程中,会先清空历史记录,以免输入溢出。然后,它会循环5次,生成5个历史事件查询的问题,并向用户请求输入相关信息。每次询问不携带之前的询问历史。在生成每个问题时,该函数会向"chatbot"列表中添加一条消息记录,并设置该记录的初始状态为"[Local Message] waiting gpt response."。然后,该函数会调用"predict_no_ui_long_connection"函数向GPT模型请求生成一段文本,并将生成的文本作为回答。如果请求过程中出现异常,该函数会忽略异常。最后,该函数将问题和回答添加到"chatbot"列表和"history"列表中,并将"chatbot"和"history"列表作为函数的返回值返回。
|
262 |
+
|
theme.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
|
|
|
3 |
# gradio可用颜色列表
|
4 |
# gr.themes.utils.colors.slate (石板色)
|
5 |
# gr.themes.utils.colors.gray (灰色)
|
@@ -24,14 +25,16 @@ import gradio as gr
|
|
24 |
# gr.themes.utils.colors.pink (粉红色)
|
25 |
# gr.themes.utils.colors.rose (玫瑰色)
|
26 |
|
|
|
27 |
def adjust_theme():
|
28 |
-
try:
|
29 |
color_er = gr.themes.utils.colors.pink
|
30 |
-
set_theme = gr.themes.Default(
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
35 |
set_theme.set(
|
36 |
# Colors
|
37 |
input_background_fill_dark="*neutral_800",
|
@@ -77,18 +80,150 @@ def adjust_theme():
|
|
77 |
button_cancel_text_color=color_er.c600,
|
78 |
button_cancel_text_color_dark="white",
|
79 |
)
|
80 |
-
except:
|
81 |
-
set_theme = None
|
|
|
82 |
return set_theme
|
83 |
|
|
|
84 |
advanced_css = """
|
|
|
85 |
.markdown-body table {
|
86 |
-
|
87 |
border-collapse: collapse;
|
|
|
88 |
}
|
89 |
|
|
|
90 |
.markdown-body th, .markdown-body td {
|
91 |
-
border:
|
92 |
padding: 5px;
|
93 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
"""
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from toolbox import get_conf
|
3 |
+
CODE_HIGHLIGHT, = get_conf('CODE_HIGHLIGHT')
|
4 |
# gradio可用颜色列表
|
5 |
# gr.themes.utils.colors.slate (石板色)
|
6 |
# gr.themes.utils.colors.gray (灰色)
|
|
|
25 |
# gr.themes.utils.colors.pink (粉红色)
|
26 |
# gr.themes.utils.colors.rose (玫瑰色)
|
27 |
|
28 |
+
|
29 |
def adjust_theme():
|
30 |
+
try:
|
31 |
color_er = gr.themes.utils.colors.pink
|
32 |
+
set_theme = gr.themes.Default(
|
33 |
+
primary_hue=gr.themes.utils.colors.sky,
|
34 |
+
neutral_hue=gr.themes.utils.colors.fuchsia,
|
35 |
+
font=["sans-serif", "Microsoft YaHei", "ui-sans-serif", "system-ui",
|
36 |
+
"sans-serif", gr.themes.utils.fonts.GoogleFont("Source Sans Pro")],
|
37 |
+
font_mono=["ui-monospace", "Consolas", "monospace", gr.themes.utils.fonts.GoogleFont("IBM Plex Mono")])
|
38 |
set_theme.set(
|
39 |
# Colors
|
40 |
input_background_fill_dark="*neutral_800",
|
|
|
80 |
button_cancel_text_color=color_er.c600,
|
81 |
button_cancel_text_color_dark="white",
|
82 |
)
|
83 |
+
except:
|
84 |
+
set_theme = None
|
85 |
+
print('gradio版本较旧, 不能自定义字体和颜色')
|
86 |
return set_theme
|
87 |
|
88 |
+
|
89 |
advanced_css = """
|
90 |
+
/* 设置表格的外边距为1em,内部单元格之间边框合并,空单元格显示. */
|
91 |
.markdown-body table {
|
92 |
+
margin: 1em 0;
|
93 |
border-collapse: collapse;
|
94 |
+
empty-cells: show;
|
95 |
}
|
96 |
|
97 |
+
/* 设置表格单元格的内边距为5px,边框粗细为1.2px,颜色为--border-color-primary. */
|
98 |
.markdown-body th, .markdown-body td {
|
99 |
+
border: 1.2px solid var(--border-color-primary);
|
100 |
padding: 5px;
|
101 |
}
|
102 |
+
|
103 |
+
/* 设置表头背景颜色为rgba(175,184,193,0.2),透明度为0.2. */
|
104 |
+
.markdown-body thead {
|
105 |
+
background-color: rgba(175,184,193,0.2);
|
106 |
+
}
|
107 |
+
|
108 |
+
/* 设置表头单元格的内边距为0.5em和0.2em. */
|
109 |
+
.markdown-body thead th {
|
110 |
+
padding: .5em .2em;
|
111 |
+
}
|
112 |
+
|
113 |
+
/* 去掉列表前缀的默认间距,使其与文本线对齐. */
|
114 |
+
.markdown-body ol, .markdown-body ul {
|
115 |
+
padding-inline-start: 2em !important;
|
116 |
+
}
|
117 |
+
|
118 |
+
/* 设定聊天气泡的样式,包括圆角、最大宽度和阴影等. */
|
119 |
+
[class *= "message"] {
|
120 |
+
border-radius: var(--radius-xl) !important;
|
121 |
+
/* padding: var(--spacing-xl) !important; */
|
122 |
+
/* font-size: var(--text-md) !important; */
|
123 |
+
/* line-height: var(--line-md) !important; */
|
124 |
+
/* min-height: calc(var(--text-md)*var(--line-md) + 2*var(--spacing-xl)); */
|
125 |
+
/* min-width: calc(var(--text-md)*var(--line-md) + 2*var(--spacing-xl)); */
|
126 |
+
}
|
127 |
+
[data-testid = "bot"] {
|
128 |
+
max-width: 95%;
|
129 |
+
/* width: auto !important; */
|
130 |
+
border-bottom-left-radius: 0 !important;
|
131 |
+
}
|
132 |
+
[data-testid = "user"] {
|
133 |
+
max-width: 100%;
|
134 |
+
/* width: auto !important; */
|
135 |
+
border-bottom-right-radius: 0 !important;
|
136 |
+
}
|
137 |
+
|
138 |
+
/* 行内代码的背景设为淡灰色,设定圆角和间距. */
|
139 |
+
.markdown-body code {
|
140 |
+
display: inline;
|
141 |
+
white-space: break-spaces;
|
142 |
+
border-radius: 6px;
|
143 |
+
margin: 0 2px 0 2px;
|
144 |
+
padding: .2em .4em .1em .4em;
|
145 |
+
background-color: rgba(175,184,193,0.2);
|
146 |
+
}
|
147 |
+
/* 设定代码块的样式,包括背景颜色、内、外边距、圆角。 */
|
148 |
+
.markdown-body pre code {
|
149 |
+
display: block;
|
150 |
+
overflow: auto;
|
151 |
+
white-space: pre;
|
152 |
+
background-color: rgba(175,184,193,0.2);
|
153 |
+
border-radius: 10px;
|
154 |
+
padding: 1em;
|
155 |
+
margin: 1em 2em 1em 0.5em;
|
156 |
+
}
|
157 |
+
"""
|
158 |
+
|
159 |
+
if CODE_HIGHLIGHT:
|
160 |
+
advanced_css += """
|
161 |
+
.hll { background-color: #ffffcc }
|
162 |
+
.c { color: #3D7B7B; font-style: italic } /* Comment */
|
163 |
+
.err { border: 1px solid #FF0000 } /* Error */
|
164 |
+
.k { color: hsl(197, 94%, 51%); font-weight: bold } /* Keyword */
|
165 |
+
.o { color: #666666 } /* Operator */
|
166 |
+
.ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */
|
167 |
+
.cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */
|
168 |
+
.cp { color: #9C6500 } /* Comment.Preproc */
|
169 |
+
.cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */
|
170 |
+
.c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */
|
171 |
+
.cs { color: #3D7B7B; font-style: italic } /* Comment.Special */
|
172 |
+
.gd { color: #A00000 } /* Generic.Deleted */
|
173 |
+
.ge { font-style: italic } /* Generic.Emph */
|
174 |
+
.gr { color: #E40000 } /* Generic.Error */
|
175 |
+
.gh { color: #000080; font-weight: bold } /* Generic.Heading */
|
176 |
+
.gi { color: #008400 } /* Generic.Inserted */
|
177 |
+
.go { color: #717171 } /* Generic.Output */
|
178 |
+
.gp { color: #000080; font-weight: bold } /* Generic.Prompt */
|
179 |
+
.gs { font-weight: bold } /* Generic.Strong */
|
180 |
+
.gu { color: #800080; font-weight: bold } /* Generic.Subheading */
|
181 |
+
.gt { color: #a9dd00 } /* Generic.Traceback */
|
182 |
+
.kc { color: #008000; font-weight: bold } /* Keyword.Constant */
|
183 |
+
.kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
|
184 |
+
.kn { color: #008000; font-weight: bold } /* Keyword.Namespace */
|
185 |
+
.kp { color: #008000 } /* Keyword.Pseudo */
|
186 |
+
.kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
|
187 |
+
.kt { color: #B00040 } /* Keyword.Type */
|
188 |
+
.m { color: #666666 } /* Literal.Number */
|
189 |
+
.s { color: #BA2121 } /* Literal.String */
|
190 |
+
.na { color: #687822 } /* Name.Attribute */
|
191 |
+
.nb { color: #e5f8c3 } /* Name.Builtin */
|
192 |
+
.nc { color: #ffad65; font-weight: bold } /* Name.Class */
|
193 |
+
.no { color: #880000 } /* Name.Constant */
|
194 |
+
.nd { color: #AA22FF } /* Name.Decorator */
|
195 |
+
.ni { color: #717171; font-weight: bold } /* Name.Entity */
|
196 |
+
.ne { color: #CB3F38; font-weight: bold } /* Name.Exception */
|
197 |
+
.nf { color: #f9f978 } /* Name.Function */
|
198 |
+
.nl { color: #767600 } /* Name.Label */
|
199 |
+
.nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
|
200 |
+
.nt { color: #008000; font-weight: bold } /* Name.Tag */
|
201 |
+
.nv { color: #19177C } /* Name.Variable */
|
202 |
+
.ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
|
203 |
+
.w { color: #bbbbbb } /* Text.Whitespace */
|
204 |
+
.mb { color: #666666 } /* Literal.Number.Bin */
|
205 |
+
.mf { color: #666666 } /* Literal.Number.Float */
|
206 |
+
.mh { color: #666666 } /* Literal.Number.Hex */
|
207 |
+
.mi { color: #666666 } /* Literal.Number.Integer */
|
208 |
+
.mo { color: #666666 } /* Literal.Number.Oct */
|
209 |
+
.sa { color: #BA2121 } /* Literal.String.Affix */
|
210 |
+
.sb { color: #BA2121 } /* Literal.String.Backtick */
|
211 |
+
.sc { color: #BA2121 } /* Literal.String.Char */
|
212 |
+
.dl { color: #BA2121 } /* Literal.String.Delimiter */
|
213 |
+
.sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
|
214 |
+
.s2 { color: #2bf840 } /* Literal.String.Double */
|
215 |
+
.se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */
|
216 |
+
.sh { color: #BA2121 } /* Literal.String.Heredoc */
|
217 |
+
.si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */
|
218 |
+
.sx { color: #008000 } /* Literal.String.Other */
|
219 |
+
.sr { color: #A45A77 } /* Literal.String.Regex */
|
220 |
+
.s1 { color: #BA2121 } /* Literal.String.Single */
|
221 |
+
.ss { color: #19177C } /* Literal.String.Symbol */
|
222 |
+
.bp { color: #008000 } /* Name.Builtin.Pseudo */
|
223 |
+
.fm { color: #0000FF } /* Name.Function.Magic */
|
224 |
+
.vc { color: #19177C } /* Name.Variable.Class */
|
225 |
+
.vg { color: #19177C } /* Name.Variable.Global */
|
226 |
+
.vi { color: #19177C } /* Name.Variable.Instance */
|
227 |
+
.vm { color: #19177C } /* Name.Variable.Magic */
|
228 |
+
.il { color: #666666 } /* Literal.Number.Integer.Long */
|
229 |
"""
|
toolbox.py
CHANGED
@@ -1,13 +1,33 @@
|
|
1 |
-
import markdown
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from functools import wraps, lru_cache
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
def get_reduce_token_percent(text):
|
6 |
try:
|
7 |
# text = "maximum context length is 4097 tokens. However, your messages resulted in 4870 tokens"
|
8 |
pattern = r"(\d+)\s+tokens\b"
|
9 |
match = re.findall(pattern, text)
|
10 |
-
EXCEED_ALLO = 500
|
11 |
max_limit = float(match[0]) - EXCEED_ALLO
|
12 |
current_tokens = float(match[1])
|
13 |
ratio = max_limit/current_tokens
|
@@ -16,6 +36,7 @@ def get_reduce_token_percent(text):
|
|
16 |
except:
|
17 |
return 0.5, '不详'
|
18 |
|
|
|
19 |
def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], sys_prompt='', long_connection=True):
|
20 |
"""
|
21 |
调用简单的predict_no_ui接口,但是依然保留了些许界面心跳功能,当对话太长时,会自动采用二分法截断
|
@@ -28,28 +49,33 @@ def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temp
|
|
28 |
long_connection: 是否采用更稳定的连接方式(推荐)
|
29 |
"""
|
30 |
import time
|
31 |
-
from
|
32 |
from toolbox import get_conf
|
33 |
TIMEOUT_SECONDS, MAX_RETRY = get_conf('TIMEOUT_SECONDS', 'MAX_RETRY')
|
34 |
# 多线程的时候,需要一个mutable结构在不同线程之间传递信息
|
35 |
# list就是最简单的mutable结构,我们第一个位置放gpt输出,第二个位置传递报错信息
|
36 |
mutable = [None, '']
|
37 |
# multi-threading worker
|
|
|
38 |
def mt(i_say, history):
|
39 |
while True:
|
40 |
try:
|
41 |
if long_connection:
|
42 |
-
mutable[0] = predict_no_ui_long_connection(
|
|
|
43 |
else:
|
44 |
-
mutable[0] = predict_no_ui(
|
|
|
45 |
break
|
46 |
except ConnectionAbortedError as token_exceeded_error:
|
47 |
# 尝试计算比例,尽可能多地保留文本
|
48 |
-
p_ratio, n_exceed = get_reduce_token_percent(
|
|
|
49 |
if len(history) > 0:
|
50 |
-
history = [his[
|
|
|
51 |
else:
|
52 |
-
i_say = i_say[: int(len(i_say)
|
53 |
mutable[1] = f'警告,文本过长将进行截断,Token溢出数:{n_exceed},截断比例:{(1-p_ratio):.0%}。'
|
54 |
except TimeoutError as e:
|
55 |
mutable[0] = '[Local Message] 请求超时。'
|
@@ -58,42 +84,51 @@ def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temp
|
|
58 |
mutable[0] = f'[Local Message] 异常:{str(e)}.'
|
59 |
raise RuntimeError(f'[Local Message] 异常:{str(e)}.')
|
60 |
# 创建新线程发出http请求
|
61 |
-
thread_name = threading.Thread(target=mt, args=(i_say, history))
|
|
|
62 |
# 原来的线程则负责持续更新UI,实现一个超时倒计时,并等待新线程的任务完成
|
63 |
cnt = 0
|
64 |
while thread_name.is_alive():
|
65 |
cnt += 1
|
66 |
-
chatbot[-1] = (i_say_show_user,
|
|
|
67 |
yield chatbot, history, '正常'
|
68 |
time.sleep(1)
|
69 |
# 把gpt的输出从mutable中取出来
|
70 |
gpt_say = mutable[0]
|
71 |
-
if gpt_say=='[Local Message] Failed with timeout.':
|
|
|
72 |
return gpt_say
|
73 |
|
|
|
74 |
def write_results_to_file(history, file_name=None):
|
75 |
"""
|
76 |
将对话记录history以Markdown格式写入文件中。如果没有指定文件名,则使用当前时间生成文件名。
|
77 |
"""
|
78 |
-
import os
|
|
|
79 |
if file_name is None:
|
80 |
# file_name = time.strftime("chatGPT分析报告%Y-%m-%d-%H-%M-%S", time.localtime()) + '.md'
|
81 |
-
file_name = 'chatGPT分析报告' +
|
|
|
82 |
os.makedirs('./gpt_log/', exist_ok=True)
|
83 |
-
with open(f'./gpt_log/{file_name}', 'w', encoding
|
84 |
f.write('# chatGPT 分析报告\n')
|
85 |
for i, content in enumerate(history):
|
86 |
try: # 这个bug没找到触发条件,暂时先这样顶一下
|
87 |
-
if type(content) != str:
|
|
|
88 |
except:
|
89 |
continue
|
90 |
-
if i%2==0:
|
|
|
91 |
f.write(content)
|
92 |
f.write('\n\n')
|
93 |
res = '以上材料已经被写入' + os.path.abspath(f'./gpt_log/{file_name}')
|
94 |
print(res)
|
95 |
return res
|
96 |
|
|
|
97 |
def regular_txt_to_markdown(text):
|
98 |
"""
|
99 |
将普通文本转换为Markdown格式的文本。
|
@@ -103,6 +138,7 @@ def regular_txt_to_markdown(text):
|
|
103 |
text = text.replace('\n\n\n', '\n\n')
|
104 |
return text
|
105 |
|
|
|
106 |
def CatchException(f):
|
107 |
"""
|
108 |
装饰器函数,捕捉函数f中的异常并封装到一个生成器中返回,并显示到聊天当中。
|
@@ -115,14 +151,24 @@ def CatchException(f):
|
|
115 |
from check_proxy import check_proxy
|
116 |
from toolbox import get_conf
|
117 |
proxies, = get_conf('proxies')
|
118 |
-
tb_str =
|
119 |
-
|
|
|
|
|
|
|
120 |
yield chatbot, history, f'异常 {e}'
|
121 |
return decorated
|
122 |
|
|
|
123 |
def HotReload(f):
|
124 |
"""
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
"""
|
127 |
@wraps(f)
|
128 |
def decorated(*args, **kwargs):
|
@@ -131,12 +177,15 @@ def HotReload(f):
|
|
131 |
yield from f_hot_reload(*args, **kwargs)
|
132 |
return decorated
|
133 |
|
|
|
134 |
def report_execption(chatbot, history, a, b):
|
135 |
"""
|
136 |
向chatbot中添加错误信息
|
137 |
"""
|
138 |
chatbot.append((a, b))
|
139 |
-
history.append(a)
|
|
|
|
|
140 |
|
141 |
def text_divide_paragraph(text):
|
142 |
"""
|
@@ -153,27 +202,112 @@ def text_divide_paragraph(text):
|
|
153 |
text = "</br>".join(lines)
|
154 |
return text
|
155 |
|
|
|
156 |
def markdown_convertion(txt):
|
157 |
"""
|
158 |
将Markdown格式的文本转换为HTML格式。如果包含数学公式,则先将公式转换为HTML格式。
|
159 |
"""
|
160 |
pre = '<div class="markdown-body">'
|
161 |
suf = '</div>'
|
162 |
-
|
163 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
else:
|
165 |
-
return pre + markdown.markdown(txt,extensions=['fenced_code','tables']) + suf
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
|
167 |
|
168 |
def format_io(self, y):
|
169 |
"""
|
170 |
将输入和输出解析为HTML格式。将y中最后一项的输入部分段落化,并将输出部分的Markdown和数学公式转换为HTML格式。
|
171 |
"""
|
172 |
-
if y is None or y == []:
|
|
|
173 |
i_ask, gpt_reply = y[-1]
|
174 |
-
i_ask = text_divide_paragraph(i_ask)
|
|
|
|
|
175 |
y[-1] = (
|
176 |
-
None if i_ask is None else markdown.markdown(
|
|
|
177 |
None if gpt_reply is None else markdown_convertion(gpt_reply)
|
178 |
)
|
179 |
return y
|
@@ -235,6 +369,7 @@ def extract_archive(file_path, dest_dir):
|
|
235 |
return ''
|
236 |
return ''
|
237 |
|
|
|
238 |
def find_recent_files(directory):
|
239 |
"""
|
240 |
me: find files that is created with in one minutes under a directory with python, write a function
|
@@ -248,21 +383,29 @@ def find_recent_files(directory):
|
|
248 |
|
249 |
for filename in os.listdir(directory):
|
250 |
file_path = os.path.join(directory, filename)
|
251 |
-
if file_path.endswith('.log'):
|
252 |
-
|
|
|
253 |
if created_time >= one_minute_ago:
|
254 |
-
if os.path.isdir(file_path):
|
|
|
255 |
recent_files.append(file_path)
|
256 |
|
257 |
return recent_files
|
258 |
|
259 |
|
260 |
def on_file_uploaded(files, chatbot, txt):
|
261 |
-
if len(files) == 0:
|
262 |
-
|
|
|
|
|
|
|
|
|
263 |
from toolbox import extract_archive
|
264 |
-
try:
|
265 |
-
|
|
|
|
|
266 |
time_tag = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
|
267 |
os.makedirs(f'private_upload/{time_tag}', exist_ok=True)
|
268 |
err_msg = ''
|
@@ -270,13 +413,14 @@ def on_file_uploaded(files, chatbot, txt):
|
|
270 |
file_origin_name = os.path.basename(file.orig_name)
|
271 |
shutil.copy(file.name, f'private_upload/{time_tag}/{file_origin_name}')
|
272 |
err_msg += extract_archive(f'private_upload/{time_tag}/{file_origin_name}',
|
273 |
-
|
274 |
-
moved_files = [fp for fp in glob.glob(
|
|
|
275 |
txt = f'private_upload/{time_tag}'
|
276 |
moved_files_str = '\t\n\n'.join(moved_files)
|
277 |
chatbot.append(['我上传了文件,请查收',
|
278 |
-
f'[Local Message] 收到以下文件: \n\n{moved_files_str}'+
|
279 |
-
f'\n\n调用路径参数已自动修正到: \n\n{txt}'+
|
280 |
f'\n\n现在您点击任意实验功能时,以上文件将被作为输入参数'+err_msg])
|
281 |
return chatbot, txt
|
282 |
|
@@ -284,32 +428,37 @@ def on_file_uploaded(files, chatbot, txt):
|
|
284 |
def on_report_generated(files, chatbot):
|
285 |
from toolbox import find_recent_files
|
286 |
report_files = find_recent_files('gpt_log')
|
287 |
-
if len(report_files) == 0:
|
|
|
288 |
# files.extend(report_files)
|
289 |
chatbot.append(['汇总报告如何远程获取?', '汇总报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。'])
|
290 |
return report_files, chatbot
|
291 |
|
|
|
292 |
@lru_cache(maxsize=128)
|
293 |
def read_single_conf_with_lru_cache(arg):
|
294 |
-
try:
|
295 |
-
|
|
|
|
|
296 |
# 在读取API_KEY时,检查一下是不是忘了改config
|
297 |
-
if arg=='API_KEY':
|
298 |
# 正确的 API_KEY 是 "sk-" + 48 位大小写字母数字的组合
|
299 |
API_MATCH = re.match(r"sk-[a-zA-Z0-9]{48}$", r)
|
300 |
if API_MATCH:
|
301 |
print(f"[API_KEY] 您的 API_KEY 是: {r[:15]}*** API_KEY 导入成功")
|
302 |
else:
|
303 |
assert False, "正确的 API_KEY 是 'sk-' + '48 位大小写字母数字' 的组合,请在config文件中修改API密钥, 添加海外代理之后再运行。" + \
|
304 |
-
|
305 |
-
if arg=='proxies':
|
306 |
-
if r is None:
|
307 |
print('[PROXY] 网络代理状态:未配置。无代理状态下很可能无法访问。建议:检查USE_PROXY选项是否修改。')
|
308 |
-
else:
|
309 |
print('[PROXY] 网络代理状态:已配置。配置信息如下:', r)
|
310 |
assert isinstance(r, dict), 'proxies格式错误,请注意proxies选项的格式,不要遗漏括号。'
|
311 |
return r
|
312 |
|
|
|
313 |
def get_conf(*args):
|
314 |
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
|
315 |
res = []
|
@@ -318,8 +467,26 @@ def get_conf(*args):
|
|
318 |
res.append(r)
|
319 |
return res
|
320 |
|
|
|
321 |
def clear_line_break(txt):
|
322 |
txt = txt.replace('\n', ' ')
|
323 |
txt = txt.replace(' ', ' ')
|
324 |
txt = txt.replace(' ', ' ')
|
325 |
return txt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import markdown
|
2 |
+
import mdtex2html
|
3 |
+
import threading
|
4 |
+
import importlib
|
5 |
+
import traceback
|
6 |
+
import importlib
|
7 |
+
import inspect
|
8 |
+
import re
|
9 |
+
from latex2mathml.converter import convert as tex2mathml
|
10 |
from functools import wraps, lru_cache
|
11 |
|
12 |
+
|
13 |
+
def ArgsGeneralWrapper(f):
|
14 |
+
"""
|
15 |
+
装饰器函数,用于重组输入参数,改变输入参数的顺序与结构。
|
16 |
+
"""
|
17 |
+
def decorated(txt, txt2, *args, **kwargs):
|
18 |
+
txt_passon = txt
|
19 |
+
if txt == "" and txt2 != "":
|
20 |
+
txt_passon = txt2
|
21 |
+
yield from f(txt_passon, *args, **kwargs)
|
22 |
+
return decorated
|
23 |
+
|
24 |
+
|
25 |
def get_reduce_token_percent(text):
|
26 |
try:
|
27 |
# text = "maximum context length is 4097 tokens. However, your messages resulted in 4870 tokens"
|
28 |
pattern = r"(\d+)\s+tokens\b"
|
29 |
match = re.findall(pattern, text)
|
30 |
+
EXCEED_ALLO = 500 # 稍微留一点余地,否则在回复时会因余量太少出问题
|
31 |
max_limit = float(match[0]) - EXCEED_ALLO
|
32 |
current_tokens = float(match[1])
|
33 |
ratio = max_limit/current_tokens
|
|
|
36 |
except:
|
37 |
return 0.5, '不详'
|
38 |
|
39 |
+
|
40 |
def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], sys_prompt='', long_connection=True):
|
41 |
"""
|
42 |
调用简单的predict_no_ui接口,但是依然保留了些许界面心跳功能,当对话太长时,会自动采用二分法截断
|
|
|
49 |
long_connection: 是否采用更稳定的连接方式(推荐)
|
50 |
"""
|
51 |
import time
|
52 |
+
from request_llm.bridge_chatgpt import predict_no_ui, predict_no_ui_long_connection
|
53 |
from toolbox import get_conf
|
54 |
TIMEOUT_SECONDS, MAX_RETRY = get_conf('TIMEOUT_SECONDS', 'MAX_RETRY')
|
55 |
# 多线程的时候,需要一个mutable结构在不同线程之间传递信息
|
56 |
# list就是最简单的mutable结构,我们第一个位置放gpt输出,第二个位置传递报错信息
|
57 |
mutable = [None, '']
|
58 |
# multi-threading worker
|
59 |
+
|
60 |
def mt(i_say, history):
|
61 |
while True:
|
62 |
try:
|
63 |
if long_connection:
|
64 |
+
mutable[0] = predict_no_ui_long_connection(
|
65 |
+
inputs=i_say, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt)
|
66 |
else:
|
67 |
+
mutable[0] = predict_no_ui(
|
68 |
+
inputs=i_say, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt)
|
69 |
break
|
70 |
except ConnectionAbortedError as token_exceeded_error:
|
71 |
# 尝试计算比例,尽可能多地保留文本
|
72 |
+
p_ratio, n_exceed = get_reduce_token_percent(
|
73 |
+
str(token_exceeded_error))
|
74 |
if len(history) > 0:
|
75 |
+
history = [his[int(len(his) * p_ratio):]
|
76 |
+
for his in history if his is not None]
|
77 |
else:
|
78 |
+
i_say = i_say[: int(len(i_say) * p_ratio)]
|
79 |
mutable[1] = f'警告,文本过长将进行截断,Token溢出数:{n_exceed},截断比例:{(1-p_ratio):.0%}。'
|
80 |
except TimeoutError as e:
|
81 |
mutable[0] = '[Local Message] 请求超时。'
|
|
|
84 |
mutable[0] = f'[Local Message] 异常:{str(e)}.'
|
85 |
raise RuntimeError(f'[Local Message] 异常:{str(e)}.')
|
86 |
# 创建新线程发出http请求
|
87 |
+
thread_name = threading.Thread(target=mt, args=(i_say, history))
|
88 |
+
thread_name.start()
|
89 |
# 原来的线程则负责持续更新UI,实现一个超时倒计时,并等待新线程的任务完成
|
90 |
cnt = 0
|
91 |
while thread_name.is_alive():
|
92 |
cnt += 1
|
93 |
+
chatbot[-1] = (i_say_show_user,
|
94 |
+
f"[Local Message] {mutable[1]}waiting gpt response {cnt}/{TIMEOUT_SECONDS*2*(MAX_RETRY+1)}"+''.join(['.']*(cnt % 4)))
|
95 |
yield chatbot, history, '正常'
|
96 |
time.sleep(1)
|
97 |
# 把gpt的输出从mutable中取出来
|
98 |
gpt_say = mutable[0]
|
99 |
+
if gpt_say == '[Local Message] Failed with timeout.':
|
100 |
+
raise TimeoutError
|
101 |
return gpt_say
|
102 |
|
103 |
+
|
104 |
def write_results_to_file(history, file_name=None):
|
105 |
"""
|
106 |
将对话记录history以Markdown格式写入文件中。如果没有指定文件名,则使用当前时间生成文件名。
|
107 |
"""
|
108 |
+
import os
|
109 |
+
import time
|
110 |
if file_name is None:
|
111 |
# file_name = time.strftime("chatGPT分析报告%Y-%m-%d-%H-%M-%S", time.localtime()) + '.md'
|
112 |
+
file_name = 'chatGPT分析报告' + \
|
113 |
+
time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.md'
|
114 |
os.makedirs('./gpt_log/', exist_ok=True)
|
115 |
+
with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f:
|
116 |
f.write('# chatGPT 分析报告\n')
|
117 |
for i, content in enumerate(history):
|
118 |
try: # 这个bug没找到触发条件,暂时先这样顶一下
|
119 |
+
if type(content) != str:
|
120 |
+
content = str(content)
|
121 |
except:
|
122 |
continue
|
123 |
+
if i % 2 == 0:
|
124 |
+
f.write('## ')
|
125 |
f.write(content)
|
126 |
f.write('\n\n')
|
127 |
res = '以上材料已经被写入' + os.path.abspath(f'./gpt_log/{file_name}')
|
128 |
print(res)
|
129 |
return res
|
130 |
|
131 |
+
|
132 |
def regular_txt_to_markdown(text):
|
133 |
"""
|
134 |
将普通文本转换为Markdown格式的文本。
|
|
|
138 |
text = text.replace('\n\n\n', '\n\n')
|
139 |
return text
|
140 |
|
141 |
+
|
142 |
def CatchException(f):
|
143 |
"""
|
144 |
装饰器函数,捕捉函数f中的异常并封装到一个生成器中返回,并显示到聊天当中。
|
|
|
151 |
from check_proxy import check_proxy
|
152 |
from toolbox import get_conf
|
153 |
proxies, = get_conf('proxies')
|
154 |
+
tb_str = '```\n' + traceback.format_exc() + '```'
|
155 |
+
if chatbot is None or len(chatbot) == 0:
|
156 |
+
chatbot = [["插件调度异常", "异常原因"]]
|
157 |
+
chatbot[-1] = (chatbot[-1][0],
|
158 |
+
f"[Local Message] 实验性函数调用出错: \n\n{tb_str} \n\n当前代理可用性: \n\n{check_proxy(proxies)}")
|
159 |
yield chatbot, history, f'异常 {e}'
|
160 |
return decorated
|
161 |
|
162 |
+
|
163 |
def HotReload(f):
|
164 |
"""
|
165 |
+
HotReload的装饰器函数,用于实现Python函数插件的热更新。
|
166 |
+
函数热更新是指在不停止程序运行的情况下,更新函数代码,从而达到实时更新功能。
|
167 |
+
在装饰器内部,使用wraps(f)来保留函数的元信息,并定义了一个名为decorated的内部函数。
|
168 |
+
内部函数通过使用importlib模块的reload函数和inspect模块的getmodule函数来重新加载并获取函数模块,
|
169 |
+
然后通过getattr函数获取函数名,并在新模块中重新加载函数。
|
170 |
+
最后,使用yield from语句返回重新加载过的函数,并在被装饰的函数上执行。
|
171 |
+
最终,装饰器函数返回内部函数。这个内部函数可以将函数的原始定义更新为最新版本,并执行函数的新版本。
|
172 |
"""
|
173 |
@wraps(f)
|
174 |
def decorated(*args, **kwargs):
|
|
|
177 |
yield from f_hot_reload(*args, **kwargs)
|
178 |
return decorated
|
179 |
|
180 |
+
|
181 |
def report_execption(chatbot, history, a, b):
|
182 |
"""
|
183 |
向chatbot中添加错误信息
|
184 |
"""
|
185 |
chatbot.append((a, b))
|
186 |
+
history.append(a)
|
187 |
+
history.append(b)
|
188 |
+
|
189 |
|
190 |
def text_divide_paragraph(text):
|
191 |
"""
|
|
|
202 |
text = "</br>".join(lines)
|
203 |
return text
|
204 |
|
205 |
+
|
206 |
def markdown_convertion(txt):
|
207 |
"""
|
208 |
将Markdown格式的文本转换为HTML格式。如果包含数学公式,则先将公式转换为HTML格式。
|
209 |
"""
|
210 |
pre = '<div class="markdown-body">'
|
211 |
suf = '</div>'
|
212 |
+
markdown_extension_configs = {
|
213 |
+
'mdx_math': {
|
214 |
+
'enable_dollar_delimiter': True,
|
215 |
+
'use_gitlab_delimiters': False,
|
216 |
+
},
|
217 |
+
}
|
218 |
+
find_equation_pattern = r'<script type="math/tex(?:.*?)>(.*?)</script>'
|
219 |
+
|
220 |
+
def tex2mathml_catch_exception(content, *args, **kwargs):
|
221 |
+
try:
|
222 |
+
content = tex2mathml(content, *args, **kwargs)
|
223 |
+
except:
|
224 |
+
content = content
|
225 |
+
return content
|
226 |
+
|
227 |
+
def replace_math_no_render(match):
|
228 |
+
content = match.group(1)
|
229 |
+
if 'mode=display' in match.group(0):
|
230 |
+
content = content.replace('\n', '</br>')
|
231 |
+
return f"<font color=\"#00FF00\">$$</font><font color=\"#FF00FF\">{content}</font><font color=\"#00FF00\">$$</font>"
|
232 |
+
else:
|
233 |
+
return f"<font color=\"#00FF00\">$</font><font color=\"#FF00FF\">{content}</font><font color=\"#00FF00\">$</font>"
|
234 |
+
|
235 |
+
def replace_math_render(match):
|
236 |
+
content = match.group(1)
|
237 |
+
if 'mode=display' in match.group(0):
|
238 |
+
if '\\begin{aligned}' in content:
|
239 |
+
content = content.replace('\\begin{aligned}', '\\begin{array}')
|
240 |
+
content = content.replace('\\end{aligned}', '\\end{array}')
|
241 |
+
content = content.replace('&', ' ')
|
242 |
+
content = tex2mathml_catch_exception(content, display="block")
|
243 |
+
return content
|
244 |
+
else:
|
245 |
+
return tex2mathml_catch_exception(content)
|
246 |
+
|
247 |
+
def markdown_bug_hunt(content):
|
248 |
+
"""
|
249 |
+
解决一个mdx_math的bug(单$包裹begin命令时多余<script>)
|
250 |
+
"""
|
251 |
+
content = content.replace('<script type="math/tex">\n<script type="math/tex; mode=display">', '<script type="math/tex; mode=display">')
|
252 |
+
content = content.replace('</script>\n</script>', '</script>')
|
253 |
+
return content
|
254 |
+
|
255 |
+
|
256 |
+
if ('$' in txt) and ('```' not in txt): # 有$标识的公式符号,且没有代码段```的标识
|
257 |
+
# convert everything to html format
|
258 |
+
split = markdown.markdown(text='---')
|
259 |
+
convert_stage_1 = markdown.markdown(text=txt, extensions=['mdx_math', 'fenced_code', 'tables', 'sane_lists'], extension_configs=markdown_extension_configs)
|
260 |
+
convert_stage_1 = markdown_bug_hunt(convert_stage_1)
|
261 |
+
# re.DOTALL: Make the '.' special character match any character at all, including a newline; without this flag, '.' will match anything except a newline. Corresponds to the inline flag (?s).
|
262 |
+
# 1. convert to easy-to-copy tex (do not render math)
|
263 |
+
convert_stage_2_1, n = re.subn(find_equation_pattern, replace_math_no_render, convert_stage_1, flags=re.DOTALL)
|
264 |
+
# 2. convert to rendered equation
|
265 |
+
convert_stage_2_2, n = re.subn(find_equation_pattern, replace_math_render, convert_stage_1, flags=re.DOTALL)
|
266 |
+
# cat them together
|
267 |
+
return pre + convert_stage_2_1 + f'{split}' + convert_stage_2_2 + suf
|
268 |
else:
|
269 |
+
return pre + markdown.markdown(txt, extensions=['fenced_code', 'codehilite', 'tables', 'sane_lists']) + suf
|
270 |
+
|
271 |
+
|
272 |
+
def close_up_code_segment_during_stream(gpt_reply):
|
273 |
+
"""
|
274 |
+
在gpt输出代码的中途(输出了前面的```,但还没输出完后面的```),补上后面的```
|
275 |
+
|
276 |
+
Args:
|
277 |
+
gpt_reply (str): GPT模型返回的回复字符串。
|
278 |
+
|
279 |
+
Returns:
|
280 |
+
str: 返回一个新的字符串,将输出代码片段的“后面的```”补上。
|
281 |
+
|
282 |
+
"""
|
283 |
+
if '```' not in gpt_reply:
|
284 |
+
return gpt_reply
|
285 |
+
if gpt_reply.endswith('```'):
|
286 |
+
return gpt_reply
|
287 |
+
|
288 |
+
# 排除了以上两个情况,我们
|
289 |
+
segments = gpt_reply.split('```')
|
290 |
+
n_mark = len(segments) - 1
|
291 |
+
if n_mark % 2 == 1:
|
292 |
+
# print('输出代码片段中!')
|
293 |
+
return gpt_reply+'\n```'
|
294 |
+
else:
|
295 |
+
return gpt_reply
|
296 |
|
297 |
|
298 |
def format_io(self, y):
|
299 |
"""
|
300 |
将输入和输出解析为HTML格式。将y中最后一项的输入部分段落化,并将输出部分的Markdown和数学公式转换为HTML格式。
|
301 |
"""
|
302 |
+
if y is None or y == []:
|
303 |
+
return []
|
304 |
i_ask, gpt_reply = y[-1]
|
305 |
+
i_ask = text_divide_paragraph(i_ask) # 输入部分太自由,预处理一波
|
306 |
+
gpt_reply = close_up_code_segment_during_stream(
|
307 |
+
gpt_reply) # 当代码输出半截的时候,试着补上后个```
|
308 |
y[-1] = (
|
309 |
+
None if i_ask is None else markdown.markdown(
|
310 |
+
i_ask, extensions=['fenced_code', 'tables']),
|
311 |
None if gpt_reply is None else markdown_convertion(gpt_reply)
|
312 |
)
|
313 |
return y
|
|
|
369 |
return ''
|
370 |
return ''
|
371 |
|
372 |
+
|
373 |
def find_recent_files(directory):
|
374 |
"""
|
375 |
me: find files that is created with in one minutes under a directory with python, write a function
|
|
|
383 |
|
384 |
for filename in os.listdir(directory):
|
385 |
file_path = os.path.join(directory, filename)
|
386 |
+
if file_path.endswith('.log'):
|
387 |
+
continue
|
388 |
+
created_time = os.path.getmtime(file_path)
|
389 |
if created_time >= one_minute_ago:
|
390 |
+
if os.path.isdir(file_path):
|
391 |
+
continue
|
392 |
recent_files.append(file_path)
|
393 |
|
394 |
return recent_files
|
395 |
|
396 |
|
397 |
def on_file_uploaded(files, chatbot, txt):
|
398 |
+
if len(files) == 0:
|
399 |
+
return chatbot, txt
|
400 |
+
import shutil
|
401 |
+
import os
|
402 |
+
import time
|
403 |
+
import glob
|
404 |
from toolbox import extract_archive
|
405 |
+
try:
|
406 |
+
shutil.rmtree('./private_upload/')
|
407 |
+
except:
|
408 |
+
pass
|
409 |
time_tag = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
|
410 |
os.makedirs(f'private_upload/{time_tag}', exist_ok=True)
|
411 |
err_msg = ''
|
|
|
413 |
file_origin_name = os.path.basename(file.orig_name)
|
414 |
shutil.copy(file.name, f'private_upload/{time_tag}/{file_origin_name}')
|
415 |
err_msg += extract_archive(f'private_upload/{time_tag}/{file_origin_name}',
|
416 |
+
dest_dir=f'private_upload/{time_tag}/{file_origin_name}.extract')
|
417 |
+
moved_files = [fp for fp in glob.glob(
|
418 |
+
'private_upload/**/*', recursive=True)]
|
419 |
txt = f'private_upload/{time_tag}'
|
420 |
moved_files_str = '\t\n\n'.join(moved_files)
|
421 |
chatbot.append(['我上传了文件,请查收',
|
422 |
+
f'[Local Message] 收到以下文件: \n\n{moved_files_str}' +
|
423 |
+
f'\n\n调用路径参数已自动修正到: \n\n{txt}' +
|
424 |
f'\n\n现在您点击任意实验功能时,以上文件将被作为输入参数'+err_msg])
|
425 |
return chatbot, txt
|
426 |
|
|
|
428 |
def on_report_generated(files, chatbot):
|
429 |
from toolbox import find_recent_files
|
430 |
report_files = find_recent_files('gpt_log')
|
431 |
+
if len(report_files) == 0:
|
432 |
+
return None, chatbot
|
433 |
# files.extend(report_files)
|
434 |
chatbot.append(['汇总报告如何远程获取?', '汇总报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。'])
|
435 |
return report_files, chatbot
|
436 |
|
437 |
+
|
438 |
@lru_cache(maxsize=128)
|
439 |
def read_single_conf_with_lru_cache(arg):
|
440 |
+
try:
|
441 |
+
r = getattr(importlib.import_module('config_private'), arg)
|
442 |
+
except:
|
443 |
+
r = getattr(importlib.import_module('config'), arg)
|
444 |
# 在读取API_KEY时,检查一下是不是忘了改config
|
445 |
+
if arg == 'API_KEY':
|
446 |
# 正确的 API_KEY 是 "sk-" + 48 位大小写字母数字的组合
|
447 |
API_MATCH = re.match(r"sk-[a-zA-Z0-9]{48}$", r)
|
448 |
if API_MATCH:
|
449 |
print(f"[API_KEY] 您的 API_KEY 是: {r[:15]}*** API_KEY 导入成功")
|
450 |
else:
|
451 |
assert False, "正确的 API_KEY 是 'sk-' + '48 位大小写字母数字' 的组合,请在config文件中修改API密钥, 添加海外代理之后再运行。" + \
|
452 |
+
"(如果您刚更新过代码,请确保旧版config_private文件中没有遗留任何新增键值)"
|
453 |
+
if arg == 'proxies':
|
454 |
+
if r is None:
|
455 |
print('[PROXY] 网络代理状态:未配置。无代理状态下很可能无法访问。建议:检查USE_PROXY选项是否修改。')
|
456 |
+
else:
|
457 |
print('[PROXY] 网络代理状态:已配置。配置信息如下:', r)
|
458 |
assert isinstance(r, dict), 'proxies格式错误,请注意proxies选项的格式,不要遗漏括号。'
|
459 |
return r
|
460 |
|
461 |
+
|
462 |
def get_conf(*args):
|
463 |
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
|
464 |
res = []
|
|
|
467 |
res.append(r)
|
468 |
return res
|
469 |
|
470 |
+
|
471 |
def clear_line_break(txt):
|
472 |
txt = txt.replace('\n', ' ')
|
473 |
txt = txt.replace(' ', ' ')
|
474 |
txt = txt.replace(' ', ' ')
|
475 |
return txt
|
476 |
+
|
477 |
+
|
478 |
+
class DummyWith():
|
479 |
+
"""
|
480 |
+
这段代码定义了一个名为DummyWith的空上下文管理器,
|
481 |
+
它的作用是……额……没用,即在代码结构不变得情况下取代其他的上下文管理器。
|
482 |
+
上下文管理器是一种Python对象,用于与with语句一起使用,
|
483 |
+
以确保一些资源在代码块执行期间得到正确的初始化和清理。
|
484 |
+
上下文管理器必须实现两个方法,分别为 __enter__()和 __exit__()。
|
485 |
+
在上下文执行开始的情况下,__enter__()方法会在代码块被执行前被调用,
|
486 |
+
而在上下文执行结束时,__exit__()方法则会被调用。
|
487 |
+
"""
|
488 |
+
def __enter__(self):
|
489 |
+
return self
|
490 |
+
|
491 |
+
def __exit__(self, exc_type, exc_value, traceback):
|
492 |
+
return
|
version
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"version": 2.5,
|
3 |
+
"show_feature": true,
|
4 |
+
"new_feature": "新增一键更新程序<->高亮代码<->高亮公式<->新增垂直布局选项"
|
5 |
+
}
|