Spaces:
Running
Running
123
#244
by
mj898
- opened
This view is limited to 50 files because it contains too many changes.
See the raw diff here.
- .pre-commit-config.yaml +0 -32
- Dockerfile +11 -11
- README.md +139 -214
- app.py +89 -231
- check_proxy.py +17 -24
- config.py +27 -140
- core_functional.py +44 -121
- crazy_functional.py +246 -406
- crazy_functions/Langchain知识库.py +2 -2
- crazy_functions/Latex全文润色.py +17 -17
- crazy_functions/Latex全文翻译.py +12 -12
- crazy_functions/Latex输出PDF.py +0 -484
- crazy_functions/Latex输出PDF结果.py +14 -20
- crazy_functions/agent_fns/auto_agent.py +0 -23
- crazy_functions/agent_fns/echo_agent.py +0 -19
- crazy_functions/agent_fns/general.py +0 -138
- crazy_functions/agent_fns/persistent.py +0 -16
- crazy_functions/agent_fns/pipe.py +0 -194
- crazy_functions/agent_fns/watchdog.py +0 -28
- crazy_functions/chatglm微调工具.py +4 -4
- crazy_functions/crazy_utils.py +258 -57
- crazy_functions/diagram_fns/file_tree.py +0 -122
- crazy_functions/game_fns/game_ascii_art.py +0 -42
- crazy_functions/game_fns/game_interactive_story.py +0 -212
- crazy_functions/game_fns/game_utils.py +0 -35
- crazy_functions/gen_fns/gen_fns_shared.py +0 -70
- crazy_functions/ipc_fns/mp.py +0 -37
- crazy_functions/latex_fns/latex_actions.py +11 -31
- crazy_functions/latex_fns/latex_toolbox.py +115 -345
- crazy_functions/live_audio/aliyunASR.py +6 -138
- crazy_functions/live_audio/audio_io.py +1 -1
- crazy_functions/multi_stage/multi_stage_utils.py +0 -93
- crazy_functions/pdf_fns/breakdown_txt.py +0 -125
- crazy_functions/pdf_fns/parse_pdf.py +5 -146
- crazy_functions/pdf_fns/parse_word.py +0 -85
- crazy_functions/pdf_fns/report_gen_html.py +0 -58
- crazy_functions/pdf_fns/report_template.html +0 -0
- crazy_functions/vector_fns/__init__.py +0 -0
- crazy_functions/vector_fns/general_file_loader.py +0 -70
- crazy_functions/vector_fns/vector_database.py +0 -338
- crazy_functions/vt_fns/vt_call_plugin.py +1 -1
- crazy_functions/vt_fns/vt_modify_config.py +3 -3
- crazy_functions/下载arxiv论文翻译摘要.py +6 -6
- crazy_functions/互动小游戏.py +0 -40
- crazy_functions/交互功能函数模板.py +2 -2
- crazy_functions/函数动态生成.py +0 -252
- crazy_functions/命令行助手.py +2 -2
- crazy_functions/图片生成.py +17 -224
- crazy_functions/多智能体.py +0 -101
- crazy_functions/对话历史存档.py +14 -24
.pre-commit-config.yaml
DELETED
@@ -1,32 +0,0 @@
|
|
1 |
-
default_language_version:
|
2 |
-
python: python3
|
3 |
-
exclude: 'dotnet'
|
4 |
-
ci:
|
5 |
-
autofix_prs: true
|
6 |
-
autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
|
7 |
-
autoupdate_schedule: 'quarterly'
|
8 |
-
|
9 |
-
repos:
|
10 |
-
- repo: https://github.com/pre-commit/pre-commit-hooks
|
11 |
-
rev: v4.4.0
|
12 |
-
hooks:
|
13 |
-
- id: check-ast
|
14 |
-
# - id: check-yaml
|
15 |
-
- id: check-toml
|
16 |
-
- id: check-json
|
17 |
-
- id: check-byte-order-marker
|
18 |
-
exclude: .gitignore
|
19 |
-
- id: check-merge-conflict
|
20 |
-
- id: detect-private-key
|
21 |
-
- id: trailing-whitespace
|
22 |
-
- id: end-of-file-fixer
|
23 |
-
- id: no-commit-to-branch
|
24 |
-
- repo: https://github.com/psf/black
|
25 |
-
rev: 23.3.0
|
26 |
-
hooks:
|
27 |
-
- id: black
|
28 |
-
# - repo: https://github.com/charliermarsh/ruff-pre-commit
|
29 |
-
# rev: v0.0.261
|
30 |
-
# hooks:
|
31 |
-
# - id: ruff
|
32 |
-
# args: ["--fix"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Dockerfile
CHANGED
@@ -1,34 +1,34 @@
|
|
1 |
-
# 此Dockerfile
|
2 |
-
#
|
3 |
-
#
|
4 |
-
#
|
5 |
-
# - 如何运行(其他操作系统,选择任意一个固定端口50923): `docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic `
|
6 |
FROM python:3.11
|
7 |
|
8 |
|
9 |
-
# 非必要步骤,更换pip源
|
10 |
RUN echo '[global]' > /etc/pip.conf && \
|
11 |
echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
|
12 |
echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf
|
13 |
|
14 |
|
15 |
-
#
|
16 |
WORKDIR /gpt
|
17 |
|
18 |
|
19 |
-
# 安装大部分依赖,利用Docker缓存加速以后的构建
|
20 |
COPY requirements.txt ./
|
|
|
21 |
RUN pip3 install -r requirements.txt
|
22 |
|
23 |
|
24 |
-
#
|
25 |
COPY . .
|
26 |
RUN pip3 install -r requirements.txt
|
27 |
|
28 |
|
29 |
-
#
|
30 |
RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
|
31 |
|
32 |
|
33 |
-
#
|
34 |
CMD ["python3", "-u", "main.py"]
|
|
|
1 |
+
# 此Dockerfile适用于“无本地模型”的环境构建,如果需要使用chatglm等本地模型或者latex运行依赖,请参考 docker-compose.yml
|
2 |
+
# 如何构建: 先修改 `config.py`, 然后 `docker build -t gpt-academic . `
|
3 |
+
# 如何运行(Linux下): `docker run --rm -it --net=host gpt-academic `
|
4 |
+
# 如何运行(其他操作系统,选择任意一个固定端口50923): `docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic `
|
|
|
5 |
FROM python:3.11
|
6 |
|
7 |
|
8 |
+
# 非必要步骤,更换pip源
|
9 |
RUN echo '[global]' > /etc/pip.conf && \
|
10 |
echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
|
11 |
echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf
|
12 |
|
13 |
|
14 |
+
# 进入工作路径
|
15 |
WORKDIR /gpt
|
16 |
|
17 |
|
18 |
+
# 安装大部分依赖,利用Docker缓存加速以后的构建
|
19 |
COPY requirements.txt ./
|
20 |
+
COPY ./docs/gradio-3.32.2-py3-none-any.whl ./docs/gradio-3.32.2-py3-none-any.whl
|
21 |
RUN pip3 install -r requirements.txt
|
22 |
|
23 |
|
24 |
+
# 装载项目文件,安装剩余依赖
|
25 |
COPY . .
|
26 |
RUN pip3 install -r requirements.txt
|
27 |
|
28 |
|
29 |
+
# 非必要步骤,用于预热模块
|
30 |
RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
|
31 |
|
32 |
|
33 |
+
# 启动
|
34 |
CMD ["python3", "-u", "main.py"]
|
README.md
CHANGED
@@ -11,96 +11,73 @@ pinned: false
|
|
11 |
|
12 |
# ChatGPT 学术优化
|
13 |
> **Note**
|
14 |
-
>
|
15 |
-
> 2023.
|
16 |
-
>
|
17 |
-
>
|
18 |
-
|
19 |
-
<br>
|
20 |
-
|
21 |
-
<div align=center>
|
22 |
-
<h1 aligh="center">
|
23 |
-
<img src="docs/logo.png" width="40"> GPT 学术优化 (GPT Academic)
|
24 |
-
</h1>
|
25 |
-
|
26 |
-
[![Github][Github-image]][Github-url]
|
27 |
-
[![License][License-image]][License-url]
|
28 |
-
[![Releases][Releases-image]][Releases-url]
|
29 |
-
[![Installation][Installation-image]][Installation-url]
|
30 |
-
[![Wiki][Wiki-image]][Wiki-url]
|
31 |
-
[![PR][PRs-image]][PRs-url]
|
32 |
-
|
33 |
-
[Github-image]: https://img.shields.io/badge/github-12100E.svg?style=flat-square
|
34 |
-
[License-image]: https://img.shields.io/github/license/binary-husky/gpt_academic?label=License&style=flat-square&color=orange
|
35 |
-
[Releases-image]: https://img.shields.io/github/release/binary-husky/gpt_academic?label=Release&style=flat-square&color=blue
|
36 |
-
[Installation-image]: https://img.shields.io/badge/dynamic/json?color=blue&url=https://raw.githubusercontent.com/binary-husky/gpt_academic/master/version&query=$.version&label=Installation&style=flat-square
|
37 |
-
[Wiki-image]: https://img.shields.io/badge/wiki-项目文档-black?style=flat-square
|
38 |
-
[PRs-image]: https://img.shields.io/badge/PRs-welcome-pink?style=flat-square
|
39 |
-
|
40 |
-
[Github-url]: https://github.com/binary-husky/gpt_academic
|
41 |
-
[License-url]: https://github.com/binary-husky/gpt_academic/blob/master/LICENSE
|
42 |
-
[Releases-url]: https://github.com/binary-husky/gpt_academic/releases
|
43 |
-
[Installation-url]: https://github.com/binary-husky/gpt_academic#installation
|
44 |
-
[Wiki-url]: https://github.com/binary-husky/gpt_academic/wiki
|
45 |
-
[PRs-url]: https://github.com/binary-husky/gpt_academic/pulls
|
46 |
|
47 |
|
48 |
-
</div>
|
49 |
-
<br>
|
50 |
|
51 |
-
**如果喜欢这个项目,请给它一个Star
|
52 |
|
53 |
-
If you like this project, please give it a Star.
|
54 |
-
|
55 |
-
<br>
|
56 |
|
57 |
-
>
|
58 |
-
> 1.本项目中每个文件的功能都在[自译解报告](https://github.com/binary-husky/gpt_academic/wiki/GPT‐Academic项目自译解报告)`self_analysis.md`详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题请查阅wiki。
|
59 |
-
> [![常规安装方法](https://img.shields.io/static/v1?label=&message=常规安装方法&color=gray)](#installation) [![一键安装脚本](https://img.shields.io/static/v1?label=&message=一键安装脚本&color=gray)](https://github.com/binary-husky/gpt_academic/releases) [![配置说明](https://img.shields.io/static/v1?label=&message=配置说明&color=gray)](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明) [![wiki](https://img.shields.io/static/v1?label=&message=wiki&color=gray)]([https://github.com/binary-husky/gpt_academic/wiki/项目配置说明](https://github.com/binary-husky/gpt_academic/wiki))
|
60 |
>
|
61 |
-
>
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
-
|
64 |
|
65 |
<div align="center">
|
66 |
|
67 |
功能(⭐= 近期新增功能) | 描述
|
68 |
--- | ---
|
69 |
-
⭐[接入新模型](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E5%88%87%E6%8D%A2%E6%A8%A1%E5%9E%8B) | 百度[千帆](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu)与文心一言, 通义千问
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
⭐AutoGen多智能体插件 | [插件] 借助微软AutoGen,探索多Agent的智能涌现可能!
|
74 |
-
⭐虚空终端插件 | [插件] 能够使用自然语言直接调度本项目其他插件
|
75 |
-
润色、翻译、代码解释 | 一键润色、翻译、查找论文语法错误、解释代码
|
76 |
[自定义快捷键](https://www.bilibili.com/video/BV14s4y1E7jN) | 支持自定义快捷键
|
77 |
-
模块化设计 | 支持自定义强大的[
|
78 |
-
[
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
[
|
84 |
-
|
85 |
-
|
86 |
-
[
|
87 |
-
|
|
|
|
|
|
|
|
|
88 |
公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮
|
|
|
89 |
启动暗色[主题](https://github.com/binary-husky/gpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题
|
90 |
-
[多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持 | 同时被GPT3.5、GPT4、[清华ChatGLM2](https://github.com/THUDM/ChatGLM2-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)
|
|
|
91 |
更多LLM模型接入,支持[huggingface部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入Newbing接口(新必应),引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama)和[盘古α](https://openi.org.cn/pangu/)
|
92 |
⭐[void-terminal](https://github.com/binary-husky/void-terminal) pip包 | 脱离GUI,在Python中直接调用本项目的所有函数插件(开发中)
|
|
|
93 |
更多新功能展示 (图像生成等) …… | 见本文档结尾处 ……
|
94 |
</div>
|
95 |
|
96 |
|
97 |
- 新界面(修改`config.py`中的LAYOUT选项即可实现“左右布局”和“上下布局”的切换)
|
98 |
<div align="center">
|
99 |
-
<img src="https://user-images.githubusercontent.com/96192199/
|
100 |
</div>
|
101 |
|
102 |
|
103 |
-
- 所有按钮都通过读取functional.py
|
104 |
<div align="center">
|
105 |
<img src="https://user-images.githubusercontent.com/96192199/231975334-b4788e91-4887-412f-8b43-2b9c5f41d248.gif" width="700" >
|
106 |
</div>
|
@@ -110,99 +87,66 @@ Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼
|
|
110 |
<img src="https://user-images.githubusercontent.com/96192199/231980294-f374bdcb-3309-4560-b424-38ef39f04ebd.gif" width="700" >
|
111 |
</div>
|
112 |
|
113 |
-
-
|
114 |
<div align="center">
|
115 |
<img src="https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png" width="700" >
|
116 |
</div>
|
117 |
|
118 |
-
-
|
119 |
<div align="center">
|
120 |
<img src="https://user-images.githubusercontent.com/96192199/226935232-6b6a73ce-8900-4aee-93f9-733c7e6fef53.png" width="700" >
|
121 |
</div>
|
122 |
|
123 |
-
- 多种大语言模型混合调用(ChatGLM + OpenAI-GPT3.5 + GPT4)
|
124 |
<div align="center">
|
125 |
<img src="https://user-images.githubusercontent.com/96192199/232537274-deca0563-7aa6-4b5d-94a2-b7c453c47794.png" width="700" >
|
126 |
</div>
|
127 |
|
128 |
-
<br><br>
|
129 |
-
|
130 |
# Installation
|
131 |
-
|
132 |
-
```mermaid
|
133 |
-
flowchart TD
|
134 |
-
A{"安装方法"} --> W1("I. 🔑直接运行 (Windows, Linux or MacOS)")
|
135 |
-
W1 --> W11["1. Python pip包管理依赖"]
|
136 |
-
W1 --> W12["2. Anaconda包管理依赖(推荐⭐)"]
|
137 |
-
|
138 |
-
A --> W2["II. 🐳使用Docker (Windows, Linux or MacOS)"]
|
139 |
-
|
140 |
-
W2 --> k1["1. 部署项目全部能力的大镜像(推荐⭐)"]
|
141 |
-
W2 --> k2["2. 仅在线模型(GPT, GLM4等)镜像"]
|
142 |
-
W2 --> k3["3. 在线模型 + Latex的大镜像"]
|
143 |
-
|
144 |
-
A --> W4["IV. 🚀其他部署方法"]
|
145 |
-
W4 --> C1["1. Windows/MacOS 一键安装运行脚本(推荐⭐)"]
|
146 |
-
W4 --> C2["2. Huggingface, Sealos远程部署"]
|
147 |
-
W4 --> C4["3. ... 其他 ..."]
|
148 |
-
```
|
149 |
-
|
150 |
-
### 安装方法I:直接运行 (Windows, Linux or MacOS)
|
151 |
|
152 |
1. 下载项目
|
|
|
|
|
|
|
|
|
153 |
|
154 |
-
|
155 |
-
git clone --depth=1 https://github.com/binary-husky/gpt_academic.git
|
156 |
-
cd gpt_academic
|
157 |
-
```
|
158 |
-
|
159 |
-
2. 配置API_KEY等变量
|
160 |
-
|
161 |
-
在`config.py`中,配置API KEY等变量。[特殊网络环境设置方法](https://github.com/binary-husky/gpt_academic/issues/1)、[Wiki-项目配置说明](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)。
|
162 |
|
163 |
-
|
164 |
|
165 |
-
|
166 |
|
167 |
|
168 |
3. 安装依赖
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
|
179 |
|
180 |
<details><summary>如果需要支持清华ChatGLM2/复旦MOSS/RWKV作为后端,请点击展开此处</summary>
|
181 |
<p>
|
182 |
|
183 |
-
【可选步骤】如果需要支持清华
|
184 |
-
|
185 |
```sh
|
186 |
-
# 【可选步骤I】支持清华
|
187 |
-
python -m pip install -r
|
188 |
|
189 |
# 【可选步骤II】支持复旦MOSS
|
190 |
-
python -m pip install -r
|
191 |
-
git clone --depth=1 https://github.com/OpenLMLab/MOSS.git
|
192 |
|
193 |
# 【可选步骤III】支持RWKV Runner
|
194 |
参考wiki:https://github.com/binary-husky/gpt_academic/wiki/%E9%80%82%E9%85%8DRWKV-Runner
|
195 |
|
196 |
# 【可选步骤IV】确保config.py配置文件的AVAIL_LLM_MODELS包含了期望的模型,目前支持的全部模型如下(jittorllms系列目前仅支持docker方案):
|
197 |
-
AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss"] # + ["jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
|
198 |
-
|
199 |
-
# 【可选步骤V】支持本地模型INT8,INT4量化(这里所指的模型本身不是量化版本,目前deepseek-coder支持,后面测试后会加入更多模型量化选择)
|
200 |
-
pip install bitsandbyte
|
201 |
-
# windows用户安装bitsandbytes需要使用下面bitsandbytes-windows-webui
|
202 |
-
python -m pip install bitsandbytes --prefer-binary --extra-index-url=https://jllllll.github.io/bitsandbytes-windows-webui
|
203 |
-
pip install -U git+https://github.com/huggingface/transformers.git
|
204 |
-
pip install -U git+https://github.com/huggingface/accelerate.git
|
205 |
-
pip install peft
|
206 |
```
|
207 |
|
208 |
</p>
|
@@ -211,86 +155,102 @@ pip install peft
|
|
211 |
|
212 |
|
213 |
4. 运行
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
|
218 |
### 安装方法II:使用Docker
|
219 |
|
220 |
-
|
221 |
-
[![fullcapacity](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-all-capacity.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-all-capacity.yml)
|
222 |
|
223 |
-
|
224 |
-
# 修改docker-compose.yml,保留方案0并删除其他方案。然后运行:
|
225 |
-
docker-compose up
|
226 |
-
```
|
227 |
-
|
228 |
-
1. 仅ChatGPT + GLM4 + 文心一言+spark等在线模型(推荐大多数人选择)
|
229 |
[![basic](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml)
|
230 |
[![basiclatex](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml)
|
231 |
[![basicaudio](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml)
|
232 |
|
233 |
-
``` sh
|
234 |
-
# 修改docker-compose.yml,保留方案1并删除其他方案。然后运行:
|
235 |
-
docker-compose up
|
236 |
-
```
|
237 |
|
238 |
-
|
|
|
|
|
|
|
|
|
239 |
|
240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
[![chatglm](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml)
|
242 |
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
|
|
|
|
|
|
247 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
|
249 |
-
|
250 |
-
|
251 |
-
完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本。脚本贡献来源:[oobabooga](https://github.com/oobabooga/one-click-installers)。
|
252 |
|
253 |
-
|
|
|
254 |
|
255 |
-
|
256 |
-
请访问[
|
257 |
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
|
|
|
|
|
|
262 |
|
263 |
-
<br><br>
|
264 |
|
265 |
# Advanced Usage
|
266 |
### I:自定义新的便捷按钮(学术快捷键)
|
267 |
-
|
268 |
-
任意文本编辑器打开`core_functional.py`,添加如下条目,然后重启程序。(如果按钮已存在,那么可以直接修改(前缀、后缀都已支持热修改),无需重启程序即可生效。)
|
269 |
例如
|
270 |
-
|
271 |
-
```python
|
272 |
"超级英译中": {
|
273 |
# 前缀,会被加在你的输入之前。例如,用来描述你的要求,例如翻译、解释代码、润色等等
|
274 |
-
"Prefix": "请翻译把下面一段内容成中文,然后用一个markdown表格逐一解释文中出现的专有名词:\n\n",
|
275 |
-
|
276 |
# 后缀,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来。
|
277 |
"Suffix": "",
|
278 |
},
|
279 |
```
|
280 |
-
|
281 |
<div align="center">
|
282 |
<img src="https://user-images.githubusercontent.com/96192199/226899272-477c2134-ed71-4326-810c-29891fe4a508.png" width="500" >
|
283 |
</div>
|
284 |
|
285 |
### II:自定义函数插件
|
|
|
286 |
编写强大的函数插件来执行任何你想得到的和想不到的任务。
|
287 |
本项目的插件编写、调试难度很低,只要您具备一定的python基础知识,就可以仿照我们提供的模板实现自己的插件功能。
|
288 |
详情请参考[函数插件指南](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)。
|
289 |
|
290 |
-
<br><br>
|
291 |
|
292 |
-
#
|
293 |
-
### I
|
294 |
|
295 |
1. 对话保存功能。在函数插件区调用 `保存当前的对话` 即可将当前对话保存为可读+可复原的html文件,
|
296 |
另外在函数插件区(下拉菜单)调用 `载入对话历史存档` ,即可还原之前的会话。
|
@@ -331,23 +291,28 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h
|
|
331 |
<img src="https://user-images.githubusercontent.com/96192199/236432361-67739153-73e8-43fe-8111-b61296edabd9.png" width="500" >
|
332 |
</div>
|
333 |
|
334 |
-
7.
|
|
|
|
|
|
|
|
|
|
|
335 |
<div align="center">
|
336 |
<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/bc7ab234-ad90-48a0-8d62-f703d9e74665" width="500" >
|
337 |
</div>
|
338 |
|
339 |
-
|
340 |
<div align="center">
|
341 |
-
<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/
|
342 |
</div>
|
343 |
|
344 |
-
|
345 |
<div align="center">
|
346 |
<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/651ccd98-02c9-4464-91e1-77a6b7d1b033" height="200" > ===>
|
347 |
<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/476f66d9-7716-4537-b5c1-735372c25adb" height="200">
|
348 |
</div>
|
349 |
|
350 |
-
|
351 |
<div align="center">
|
352 |
<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/b6799499-b6fb-4f0c-9c8e-1b441872f4e8" width="500" >
|
353 |
</div>
|
@@ -355,14 +320,7 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h
|
|
355 |
|
356 |
|
357 |
### II:版本:
|
358 |
-
- version 3.
|
359 |
-
- version 3.70: 引入Mermaid绘图,实现GPT画脑图等功能
|
360 |
-
- version 3.60: 引入AutoGen作为新一代插件的基石
|
361 |
-
- version 3.57: 支持GLM3,星火v3,文心一言v4,修复本地模型的并发BUG
|
362 |
-
- version 3.56: 支持动态追加基础功能按钮,新汇报PDF汇总页面
|
363 |
-
- version 3.55: 重构前端界面,引入悬浮窗口与菜单栏
|
364 |
-
- version 3.54: 新增动态代码解释器(Code Interpreter)(待完善)
|
365 |
-
- version 3.53: 支持动态选择不同界面主题,提高稳定性&解决多用户冲突问题
|
366 |
- version 3.50: 使用自然语言调用本项目的所有函数插件(虚空终端),支持插件分类,改进UI,设计新主题
|
367 |
- version 3.49: 支持百度千帆平台和文心一言
|
368 |
- version 3.48: 支持阿里达摩院通义千问,上海AI-Lab书生,讯飞星火
|
@@ -376,58 +334,25 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h
|
|
376 |
- version 3.0: 对chatglm���其他小型llm的支持
|
377 |
- version 2.6: 重构了插件结构,提高了交互性,加入更多插件
|
378 |
- version 2.5: 自更新,解决总结大工程源代码时文本过长、token溢出的问题
|
379 |
-
- version 2.4: 新增PDF全文翻译功能;
|
380 |
- version 2.3: 增强多线程交互性
|
381 |
- version 2.2: 函数插件支持热重载
|
382 |
- version 2.1: 可折叠式布局
|
383 |
- version 2.0: 引入模块化函数插件
|
384 |
- version 1.0: 基础功能
|
385 |
|
386 |
-
|
387 |
|
388 |
- 已知问题
|
389 |
- 某些浏览器翻译插件干扰此软件前端的运行
|
390 |
-
- 官方Gradio
|
391 |
-
|
392 |
-
```mermaid
|
393 |
-
timeline LR
|
394 |
-
title GPT-Academic项目发展历程
|
395 |
-
section 2.x
|
396 |
-
1.0~2.2: 基础功能: 引入模块化函数插件: 可折叠式布局: 函数插件支持热重载
|
397 |
-
2.3~2.5: 增强多线程交互性: 新增PDF全文翻译功能: 新增输入区切换位置的功能: 自更新
|
398 |
-
2.6: 重构了插件结构: 提高了交互性: 加入更多插件
|
399 |
-
section 3.x
|
400 |
-
3.0~3.1: 对chatglm支持: 对其他小型llm支持: 支持同时问询多个gpt模型: 支持多个apikey负载均衡
|
401 |
-
3.2~3.3: 函数插件支持更多参数接口: 保存对话功能: 解读任意语言代码: 同时询问任意的LLM组合: 互联网信息综合功能
|
402 |
-
3.4: 加入arxiv论文翻译: 加入latex论文批改功能
|
403 |
-
3.44: 正式支持Azure: 优化界面易用性
|
404 |
-
3.46: 自定义ChatGLM2微调模型: 实时语音对话
|
405 |
-
3.49: 支持阿里达摩院通义千问: 上海AI-Lab书生: 讯飞星火: 支持百度千帆平台 & 文心一言
|
406 |
-
3.50: 虚空终端: 支持插件分类: 改进UI: 设计新主题
|
407 |
-
3.53: 动态选择不同界面主题: 提高稳定性: 解决多用户冲突问题
|
408 |
-
3.55: 动态代码解释器: 重构前端界面: 引入悬浮窗口与菜单栏
|
409 |
-
3.56: 动态追加基础功能按钮: 新汇报PDF汇总页面
|
410 |
-
3.57: GLM3, 星火v3: 支持文心一言v4: 修复本地模型的并发BUG
|
411 |
-
3.60: 引入AutoGen
|
412 |
-
3.70: 引入Mermaid绘图: 实现GPT画脑图等功能
|
413 |
-
3.80(TODO): 优化AutoGen插件主题: 设计衍生插件
|
414 |
-
|
415 |
-
```
|
416 |
-
|
417 |
|
418 |
### III:主题
|
419 |
可以通过修改`THEME`选项(config.py)变更主题
|
420 |
1. `Chuanhu-Small-and-Beautiful` [网址](https://github.com/GaiZhenbiao/ChuanhuChatGPT/)
|
421 |
|
422 |
|
423 |
-
### IV
|
424 |
-
|
425 |
-
1. `master` 分支: 主分支,稳定版
|
426 |
-
2. `frontier` 分支: 开发分支,测试版
|
427 |
-
3. 如何[接入其他大模型](request_llms/README.md)
|
428 |
-
4. 访问GPT-Academic的[在线服务并支持我们](https://github.com/binary-husky/gpt_academic/wiki/online)
|
429 |
-
|
430 |
-
### V:参考与学习
|
431 |
|
432 |
```
|
433 |
代码中参考了很多其他优秀项目中的设计,顺序不分先后:
|
|
|
11 |
|
12 |
# ChatGPT 学术优化
|
13 |
> **Note**
|
14 |
+
>
|
15 |
+
> 2023.7.8: Gradio, Pydantic依赖调整,已修改 `requirements.txt`。请及时**更新代码**,安装依赖时,请严格选择`requirements.txt`中**指定的版本**
|
16 |
+
>
|
17 |
+
> `pip install -r requirements.txt`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
|
20 |
+
# <div align=center><img src="docs/logo.png" width="40"> GPT 学术优化 (GPT Academic)</div>
|
|
|
21 |
|
22 |
+
**如果喜欢这个项目,请给它一个Star;如果您发明了好用的快捷键或函数插件,欢迎发pull requests!**
|
23 |
|
24 |
+
If you like this project, please give it a Star. If you've come up with more useful academic shortcuts or functional plugins, feel free to open an issue or pull request. We also have a README in [English|](docs/README_EN.md)[日本語|](docs/README_JP.md)[한국어|](https://github.com/mldljyh/ko_gpt_academic)[Русский|](docs/README_RS.md)[Français](docs/README_FR.md) translated by this project itself.
|
25 |
+
To translate this project to arbitrary language with GPT, read and run [`multi_language.py`](multi_language.py) (experimental).
|
|
|
26 |
|
27 |
+
> **Note**
|
|
|
|
|
28 |
>
|
29 |
+
> 1.请注意只有 **高亮** 标识的函数插件(按钮)才支持读取文件,部分插件位于插件区的**下拉菜单**中。另外我们以**最高优先级**欢迎和处理任何新插件的PR。
|
30 |
+
>
|
31 |
+
> 2.本项目中每个文件的功能都在[自译解报告`self_analysis.md`](https://github.com/binary-husky/gpt_academic/wiki/GPT‐Academic项目自译解报告)详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题[`wiki`](https://github.com/binary-husky/gpt_academic/wiki)。[安装方法](#installation) | [配置说明](https://github.com/binary-husky/gpt_academic/wiki/%E9%A1%B9%E7%9B%AE%E9%85%8D%E7%BD%AE%E8%AF%B4%E6%98%8E)。
|
32 |
+
>
|
33 |
+
> 3.本项目兼容并鼓励尝试国产大语言模型ChatGLM和Moss等等。支持多个api-key共存,可在配置文件中填写如`API_KEY="openai-key1,openai-key2,azure-key3,api2d-key4"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交后即可生效。
|
34 |
+
|
35 |
|
36 |
+
|
37 |
|
38 |
<div align="center">
|
39 |
|
40 |
功能(⭐= 近期新增功能) | 描述
|
41 |
--- | ---
|
42 |
+
⭐[接入新模型](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E5%88%87%E6%8D%A2%E6%A8%A1%E5%9E%8B)! | 百度[千帆](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu)与文心一言, [通义千问](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary),上海AI-Lab[书生](https://github.com/InternLM/InternLM),讯飞[星火](https://xinghuo.xfyun.cn/),[LLaMa2](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf)
|
43 |
+
一键润色 | 支持一键润色、一键查找论文语法错误
|
44 |
+
一键中英互译 | 一键中英互译
|
45 |
+
一键代码解释 | 显示代码、解释代码、生成代码、给代码加注释
|
|
|
|
|
|
|
46 |
[自定义快捷键](https://www.bilibili.com/video/BV14s4y1E7jN) | 支持自定义快捷键
|
47 |
+
模块化设计 | 支持自定义强大的[函数插件](https://github.com/binary-husky/gpt_academic/tree/master/crazy_functions),插件支持[热更新](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)
|
48 |
+
[自我程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] [一键读懂](https://github.com/binary-husky/gpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)本项目的源代码
|
49 |
+
[程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] 一键可以剖析其他Python/C/C++/Java/Lua/...项目树
|
50 |
+
读论文、[翻译](https://www.bilibili.com/video/BV1KT411x7Wn)论文 | [函数插件] 一键解读latex/pdf论文全文并生成摘要
|
51 |
+
Latex全文[翻译](https://www.bilibili.com/video/BV1nk4y1Y7Js/)、[润色](https://www.bilibili.com/video/BV1FT411H7c5/) | [函数插件] 一键翻译或润色latex论文
|
52 |
+
批量注释生成 | [函数插件] 一键批量生成函数注释
|
53 |
+
Markdown[中英互译](https://www.bilibili.com/video/BV1yo4y157jV/) | [函数插件] 看到上面5种语言的[README](https://github.com/binary-husky/gpt_academic/blob/master/docs/README_EN.md)了吗?
|
54 |
+
chat分析报告生成 | [函数插件] 运行后自动生成总结汇报
|
55 |
+
[PDF论文全文翻译功能](https://www.bilibili.com/video/BV1KT411x7Wn) | [函数插件] PDF论文提取题目&摘要+翻译全文(多线程)
|
56 |
+
[Arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [函数插件] 输入arxiv文章url即可一键翻译摘要+下载PDF
|
57 |
+
Latex论文一键校对 | [函数插件] 仿Grammarly对Latex文章进行语法、拼写纠错+输出对照PDF
|
58 |
+
[谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [函数插件] 给定任意谷歌学术搜索页面URL,让gpt帮你[写relatedworks](https://www.bilibili.com/video/BV1GP411U7Az/)
|
59 |
+
互联网信息聚合+GPT | [函数插件] 一键[让GPT从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck)回答问题,让信息永不过时
|
60 |
+
⭐Arxiv论文精细翻译 ([Docker](https://github.com/binary-husky/gpt_academic/pkgs/container/gpt_academic_with_latex)) | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),目前最好的论文翻译工具
|
61 |
+
⭐[实时语音对话输入](https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md) | [函数插件] 异步[监听音频](https://www.bilibili.com/video/BV1AV4y187Uy/),自动断句,自动寻找回答时机
|
62 |
公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮
|
63 |
+
多线程函数插件支持 | 支持多线调用chatgpt,一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序
|
64 |
启动暗色[主题](https://github.com/binary-husky/gpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题
|
65 |
+
[多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持 | 同时被GPT3.5、GPT4、[清华ChatGLM2](https://github.com/THUDM/ChatGLM2-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)同时伺候的感觉一定会很不错吧?
|
66 |
+
⭐ChatGLM2微调模型 | 支持加载ChatGLM2微调模型,提供ChatGLM2微调辅助插件
|
67 |
更多LLM模型接入,支持[huggingface部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入Newbing接口(新必应),引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama)和[盘古α](https://openi.org.cn/pangu/)
|
68 |
⭐[void-terminal](https://github.com/binary-husky/void-terminal) pip包 | 脱离GUI,在Python中直接调用本项目的所有函数插件(开发中)
|
69 |
+
⭐虚空终端插件 | [函数插件] 用自然语言,直接调度本项目其他插件
|
70 |
更多新功能展示 (图像生成等) …… | 见本文档结尾处 ……
|
71 |
</div>
|
72 |
|
73 |
|
74 |
- 新界面(修改`config.py`中的LAYOUT选项即可实现“左右布局”和“上下布局”的切换)
|
75 |
<div align="center">
|
76 |
+
<img src="https://user-images.githubusercontent.com/96192199/230361456-61078362-a966-4eb5-b49e-3c62ef18b860.gif" width="700" >
|
77 |
</div>
|
78 |
|
79 |
|
80 |
+
- 所有按钮都通过读取functional.py动态生成,可随意加自定义功能,解放粘贴板
|
81 |
<div align="center">
|
82 |
<img src="https://user-images.githubusercontent.com/96192199/231975334-b4788e91-4887-412f-8b43-2b9c5f41d248.gif" width="700" >
|
83 |
</div>
|
|
|
87 |
<img src="https://user-images.githubusercontent.com/96192199/231980294-f374bdcb-3309-4560-b424-38ef39f04ebd.gif" width="700" >
|
88 |
</div>
|
89 |
|
90 |
+
- 如果输出包含公式,会同时以tex形式和渲染形式显示,方便复制和阅读
|
91 |
<div align="center">
|
92 |
<img src="https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png" width="700" >
|
93 |
</div>
|
94 |
|
95 |
+
- 懒得看项目代码?整个工程直接给chatgpt炫嘴里
|
96 |
<div align="center">
|
97 |
<img src="https://user-images.githubusercontent.com/96192199/226935232-6b6a73ce-8900-4aee-93f9-733c7e6fef53.png" width="700" >
|
98 |
</div>
|
99 |
|
100 |
+
- 多种大语言模型混合调用(ChatGLM + OpenAI-GPT3.5 + [API2D](https://api2d.com/)-GPT4)
|
101 |
<div align="center">
|
102 |
<img src="https://user-images.githubusercontent.com/96192199/232537274-deca0563-7aa6-4b5d-94a2-b7c453c47794.png" width="700" >
|
103 |
</div>
|
104 |
|
|
|
|
|
105 |
# Installation
|
106 |
+
### 安装方法I:直接运行 (Windows, Linux or MacOS)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
1. 下载项目
|
109 |
+
```sh
|
110 |
+
git clone --depth=1 https://github.com/binary-husky/gpt_academic.git
|
111 |
+
cd gpt_academic
|
112 |
+
```
|
113 |
|
114 |
+
2. 配置API_KEY
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
+
在`config.py`中,配置API KEY等设置,[点击查看特殊网络环境设置方法](https://github.com/binary-husky/gpt_academic/issues/1) 。
|
117 |
|
118 |
+
(P.S. 程序运行时会优先检查是否存在名为`config_private.py`的私密配置文件,并用其中的配置覆盖`config.py`的同名配置。因此,如果您能理解我们的配置读取逻辑,我们强烈建议您在`config.py`旁边创建一个名为`config_private.py`的新配置文件,并把`config.py`中的配置转移(复制)到`config_private.py`中(仅复制您修改过的配置条目即可)。`config_private.py`不受git管控,可以让您的隐私信息更加安全。P.S.项目同样支持通过`环境变量`配置大多数选项,环境变量的书写格式参考`docker-compose`文件。读取优先级: `环境变量` > `config_private.py` > `config.py`)
|
119 |
|
120 |
|
121 |
3. 安装依赖
|
122 |
+
```sh
|
123 |
+
# (选择I: 如熟悉python)(python版本3.9以上,越新越好),备注:使用官方pip源或者阿里pip源,临时换源方法:python -m pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/
|
124 |
+
python -m pip install -r requirements.txt
|
125 |
|
126 |
+
# (选择II: 如不熟悉python)使用anaconda,步骤也是类似的 (https://www.bilibili.com/video/BV1rc411W7Dr):
|
127 |
+
conda create -n gptac_venv python=3.11 # 创建anaconda环境
|
128 |
+
conda activate gptac_venv # 激活anaconda环境
|
129 |
+
python -m pip install -r requirements.txt # 这个步骤和pip安装一样的步骤
|
130 |
+
```
|
131 |
|
132 |
|
133 |
<details><summary>如果需要支持清华ChatGLM2/复旦MOSS/RWKV作为后端,请点击展开此处</summary>
|
134 |
<p>
|
135 |
|
136 |
+
【可选步骤】如果需要支持清华ChatGLM2/复旦MOSS作为后端,需要额外安装更多依赖(前提条件:熟悉Python + 用过Pytorch + 电脑配置够强):
|
|
|
137 |
```sh
|
138 |
+
# 【可选步骤I】支持清华ChatGLM2。清华ChatGLM备注:如果遇到"Call ChatGLM fail 不能正常加载ChatGLM的参数" 错误,参考如下: 1:以上默认安装的为torch+cpu版,使用cuda需要卸载torch重新安装torch+cuda; 2:如因本机配置不够无法加载模型,可以修改request_llm/bridge_chatglm.py中的模型精度, 将 AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) 都修改为 AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
|
139 |
+
python -m pip install -r request_llm/requirements_chatglm.txt
|
140 |
|
141 |
# 【可选步骤II】支持复旦MOSS
|
142 |
+
python -m pip install -r request_llm/requirements_moss.txt
|
143 |
+
git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llm/moss # 注意执行此行代码时,必须处于项目根路径
|
144 |
|
145 |
# 【可选步骤III】支持RWKV Runner
|
146 |
参考wiki:https://github.com/binary-husky/gpt_academic/wiki/%E9%80%82%E9%85%8DRWKV-Runner
|
147 |
|
148 |
# 【可选步骤IV】确保config.py配置文件的AVAIL_LLM_MODELS包含了期望的模型,目前支持的全部模型如下(jittorllms系列目前仅支持docker方案):
|
149 |
+
AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "newbing", "moss"] # + ["jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
```
|
151 |
|
152 |
</p>
|
|
|
155 |
|
156 |
|
157 |
4. 运行
|
158 |
+
```sh
|
159 |
+
python main.py
|
160 |
+
```
|
161 |
|
162 |
### 安装方法II:使用Docker
|
163 |
|
164 |
+
[![fullcapacity](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-all-capacity.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml)
|
|
|
165 |
|
166 |
+
1. 仅ChatGPT(推荐大多数人选择,等价于docker-compose方案1)
|
|
|
|
|
|
|
|
|
|
|
167 |
[![basic](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml)
|
168 |
[![basiclatex](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml)
|
169 |
[![basicaudio](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml)
|
170 |
|
|
|
|
|
|
|
|
|
171 |
|
172 |
+
``` sh
|
173 |
+
git clone --depth=1 https://github.com/binary-husky/gpt_academic.git # 下载项目
|
174 |
+
cd gpt_academic # 进入路径
|
175 |
+
nano config.py # 用任意文本编辑器编辑config.py, 配置 “Proxy”, “API_KEY” 以及 “WEB_PORT” (例如50923) 等
|
176 |
+
docker build -t gpt-academic . # 安装
|
177 |
|
178 |
+
#(最后一步-Linux操作系统)用`--net=host`更方便快捷
|
179 |
+
docker run --rm -it --net=host gpt-academic
|
180 |
+
#(最后一步-MacOS/Windows操作系统)只能用-p选项将容器上的端口(例如50923)暴露给主机上的端口
|
181 |
+
docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic
|
182 |
+
```
|
183 |
+
P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以直接使用docker-compose获取Latex功能(修改docker-compose.yml,保留方案4并删除其他方案)。
|
184 |
+
|
185 |
+
2. ChatGPT + ChatGLM2 + MOSS + LLAMA2 + 通义千问(需要熟悉[Nvidia Docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian)运行时)
|
186 |
[![chatglm](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml)
|
187 |
|
188 |
+
``` sh
|
189 |
+
# 修改docker-compose.yml,保留方案2并删除其他方案。修改docker-compose.yml中方案2的配置,参考其中注释即可
|
190 |
+
docker-compose up
|
191 |
+
```
|
192 |
+
|
193 |
+
3. ChatGPT + LLAMA + 盘古 + RWKV(需要熟悉[Nvidia Docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian)运行时)
|
194 |
+
[![jittorllms](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-jittorllms.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-jittorllms.yml)
|
195 |
|
196 |
+
``` sh
|
197 |
+
# 修改docker-compose.yml,保留方案3并删除其他方案。修改docker-compose.yml中方案3的配置,参考其中注释即可
|
198 |
+
docker-compose up
|
199 |
+
```
|
200 |
+
|
201 |
+
|
202 |
+
### 安装方法III:其他部署姿势
|
203 |
+
1. 一键运行脚本。
|
204 |
+
完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本。
|
205 |
+
脚本的贡献来源是[oobabooga](https://github.com/oobabooga/one-click-installers)。
|
206 |
+
|
207 |
+
2. 使用docker-compose运行。
|
208 |
+
请阅读docker-compose.yml后,按照其中的提示操作即可
|
209 |
|
210 |
+
3. 如何使用反代URL
|
211 |
+
按照`config.py`中的说明配置API_URL_REDIRECT即可。
|
|
|
212 |
|
213 |
+
4. 微软云AzureAPI
|
214 |
+
按照`config.py`中的说明配置即可(AZURE_ENDPOINT等四个配置)
|
215 |
|
216 |
+
5. 远程云服务器部署(需要云服务器知识与经验)。
|
217 |
+
请访问[部署wiki-1](https://github.com/binary-husky/gpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97)
|
218 |
|
219 |
+
6. 使用Sealos[一键部署](https://github.com/binary-husky/gpt_academic/issues/993)。
|
220 |
+
|
221 |
+
7. 使用WSL2(Windows Subsystem for Linux 子系统)。
|
222 |
+
请访问[部署wiki-2](https://github.com/binary-husky/gpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2)
|
223 |
+
|
224 |
+
8. 如何在二级网址(如`http://localhost/subpath`)下运行。
|
225 |
+
请访问[FastAPI运行说明](docs/WithFastapi.md)
|
226 |
|
|
|
227 |
|
228 |
# Advanced Usage
|
229 |
### I:自定义新的便捷按钮(学术快捷键)
|
230 |
+
任意文本编辑器打开`core_functional.py`,添加条目如下,然后重启程序即可。(如果按钮已经添加成功并可见,那么前缀、后缀都支持热修改,无需重启程序即可生效。)
|
|
|
231 |
例如
|
232 |
+
```
|
|
|
233 |
"超级英译中": {
|
234 |
# 前缀,会被加在你的输入之前。例如,用来描述你的要求,例如翻译、解释代码、润色等等
|
235 |
+
"Prefix": "请翻译把下面一段内容成中文,然后用一个markdown表格逐一解释文中出现的专有名词:\n\n",
|
236 |
+
|
237 |
# 后缀,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来。
|
238 |
"Suffix": "",
|
239 |
},
|
240 |
```
|
|
|
241 |
<div align="center">
|
242 |
<img src="https://user-images.githubusercontent.com/96192199/226899272-477c2134-ed71-4326-810c-29891fe4a508.png" width="500" >
|
243 |
</div>
|
244 |
|
245 |
### II:自定义函数插件
|
246 |
+
|
247 |
编写强大的函数插件来执行任何你想得到的和想不到的任务。
|
248 |
本项目的插件编写、调试难度很低,只要您具备一定的python基础知识,就可以仿照我们提供的模板实现自己的插件功能。
|
249 |
详情请参考[函数插件指南](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)。
|
250 |
|
|
|
251 |
|
252 |
+
# Latest Update
|
253 |
+
### I:新功能动态
|
254 |
|
255 |
1. 对话保存功能。在函数插件区调用 `保存当前的对话` 即可将当前对话保存为可读+可复原的html文件,
|
256 |
另外在函数插件区(下拉菜单)调用 `载入对话历史存档` ,即可还原之前的会话。
|
|
|
291 |
<img src="https://user-images.githubusercontent.com/96192199/236432361-67739153-73e8-43fe-8111-b61296edabd9.png" width="500" >
|
292 |
</div>
|
293 |
|
294 |
+
7. 新增MOSS大语言模型支持
|
295 |
+
<div align="center">
|
296 |
+
<img src="https://user-images.githubusercontent.com/96192199/236639178-92836f37-13af-4fdd-984d-b4450fe30336.png" width="500" >
|
297 |
+
</div>
|
298 |
+
|
299 |
+
8. OpenAI图像生成
|
300 |
<div align="center">
|
301 |
<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/bc7ab234-ad90-48a0-8d62-f703d9e74665" width="500" >
|
302 |
</div>
|
303 |
|
304 |
+
9. OpenAI音频解析与总结
|
305 |
<div align="center">
|
306 |
+
<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/709ccf95-3aee-498a-934a-e1c22d3d5d5b" width="500" >
|
307 |
</div>
|
308 |
|
309 |
+
10. Latex全文校对纠错
|
310 |
<div align="center">
|
311 |
<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/651ccd98-02c9-4464-91e1-77a6b7d1b033" height="200" > ===>
|
312 |
<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/476f66d9-7716-4537-b5c1-735372c25adb" height="200">
|
313 |
</div>
|
314 |
|
315 |
+
11. 语言、主题切换
|
316 |
<div align="center">
|
317 |
<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/b6799499-b6fb-4f0c-9c8e-1b441872f4e8" width="500" >
|
318 |
</div>
|
|
|
320 |
|
321 |
|
322 |
### II:版本:
|
323 |
+
- version 3.60(todo): 优化虚空终端,引入code interpreter和更多插件
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
324 |
- version 3.50: 使用自然语言调用本项目的所有函数插件(虚空终端),支持插件分类,改进UI,设计新主题
|
325 |
- version 3.49: 支持百度千帆平台和文心一言
|
326 |
- version 3.48: 支持阿里达摩院通义千问,上海AI-Lab书生,讯飞星火
|
|
|
334 |
- version 3.0: 对chatglm���其他小型llm的支持
|
335 |
- version 2.6: 重构了插件结构,提高了交互性,加入更多插件
|
336 |
- version 2.5: 自更新,解决总结大工程源代码时文本过长、token溢出的问题
|
337 |
+
- version 2.4: (1)新增PDF全文翻译功能; (2)新增输入区切换位置的功能; (3)新增垂直布局选项; (4)多线程函数插件优化。
|
338 |
- version 2.3: 增强多线程交互性
|
339 |
- version 2.2: 函数插件支持热重载
|
340 |
- version 2.1: 可折叠式布局
|
341 |
- version 2.0: 引入模块化函数插件
|
342 |
- version 1.0: 基础功能
|
343 |
|
344 |
+
gpt_academic开发者QQ群-2:610599535
|
345 |
|
346 |
- 已知问题
|
347 |
- 某些浏览器翻译插件干扰此软件前端的运行
|
348 |
+
- 官方Gradio目前有很多兼容性Bug,请务必使用`requirement.txt`安装Gradio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
349 |
|
350 |
### III:主题
|
351 |
可以通过修改`THEME`选项(config.py)变更主题
|
352 |
1. `Chuanhu-Small-and-Beautiful` [网址](https://github.com/GaiZhenbiao/ChuanhuChatGPT/)
|
353 |
|
354 |
|
355 |
+
### IV:参考与学习
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
356 |
|
357 |
```
|
358 |
代码中参考了很多其他优秀项目中的设计,顺序不分先后:
|
app.py
CHANGED
@@ -1,40 +1,24 @@
|
|
1 |
import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
|
2 |
|
3 |
-
help_menu_description = \
|
4 |
-
"""Github源代码开源和更新[地址🚀](https://github.com/binary-husky/gpt_academic),
|
5 |
-
感谢热情的[开发者们❤️](https://github.com/binary-husky/gpt_academic/graphs/contributors).
|
6 |
-
</br></br>常见问题请查阅[项目Wiki](https://github.com/binary-husky/gpt_academic/wiki),
|
7 |
-
如遇到Bug请前往[Bug反馈](https://github.com/binary-husky/gpt_academic/issues).
|
8 |
-
</br></br>普通对话使用说明: 1. 输入问题; 2. 点击提交
|
9 |
-
</br></br>基础功能区使用说明: 1. 输入文本; 2. 点击任意基础功能区按钮
|
10 |
-
</br></br>函数插件区使用说明: 1. 输入路径/问题, 或者上传文件; 2. 点击任意函数插件区按钮
|
11 |
-
</br></br>虚空终端使用说明: 点击虚空终端, 然后根据提示输入指令, 再次点击虚空终端
|
12 |
-
</br></br>如何保存对话: 点击保存当前的对话按钮
|
13 |
-
</br></br>如何语音对话: 请阅读Wiki
|
14 |
-
</br></br>如何临时更换API_KEY: 在输入区输入临时API_KEY后提交(网页刷新后失效)"""
|
15 |
-
|
16 |
def main():
|
17 |
import subprocess, sys
|
18 |
-
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '
|
19 |
import gradio as gr
|
20 |
-
|
21 |
-
raise ModuleNotFoundError("使用项目内置Gradio获取最优体验! 请运行 `pip install -r requirements.txt` 指令安装内置Gradio及其他依赖, 详情信息见requirements.txt.")
|
22 |
-
from request_llms.bridge_all import predict
|
23 |
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, load_chat_cookies, DummyWith
|
24 |
-
# 建议您复制一个config_private.py放自己的秘密, 如API
|
25 |
proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION = get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION')
|
26 |
CHATBOT_HEIGHT, LAYOUT, AVAIL_LLM_MODELS, AUTO_CLEAR_TXT = get_conf('CHATBOT_HEIGHT', 'LAYOUT', 'AVAIL_LLM_MODELS', 'AUTO_CLEAR_TXT')
|
27 |
-
ENABLE_AUDIO, AUTO_CLEAR_TXT, PATH_LOGGING
|
28 |
-
DARK_MODE, NUM_CUSTOM_BASIC_BTN, SSL_KEYFILE, SSL_CERTFILE = get_conf('DARK_MODE', 'NUM_CUSTOM_BASIC_BTN', 'SSL_KEYFILE', 'SSL_CERTFILE')
|
29 |
-
INIT_SYS_PROMPT = get_conf('INIT_SYS_PROMPT')
|
30 |
|
31 |
# 如果WEB_PORT是-1, 则随机选取WEB端口
|
32 |
PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
|
33 |
from check_proxy import get_current_version
|
34 |
-
from themes.theme import adjust_theme, advanced_css, theme_declaration
|
35 |
-
|
36 |
-
from themes.theme import load_dynamic_theme, to_cookie_str, from_cookie_str, init_cookie
|
37 |
title_html = f"<h1 align=\"center\">GPT 学术优化 {get_current_version()}</h1>{theme_declaration}"
|
|
|
|
|
38 |
|
39 |
# 问询记录, python 版本建议3.9+(越新越好)
|
40 |
import logging, uuid
|
@@ -51,7 +35,7 @@ def main():
|
|
51 |
|
52 |
# 高级函数插件
|
53 |
from crazy_functional import get_crazy_functions
|
54 |
-
DEFAULT_FN_GROUPS = get_conf('DEFAULT_FN_GROUPS')
|
55 |
plugins = get_crazy_functions()
|
56 |
all_plugin_groups = list(set([g for _, plugin in plugins.items() for g in plugin['Group'].split('|')]))
|
57 |
match_group = lambda tags, groups: any([g in groups for g in tags.split('|')])
|
@@ -67,19 +51,16 @@ def main():
|
|
67 |
proxy_info = check_proxy(proxies)
|
68 |
|
69 |
gr_L1 = lambda: gr.Row().style()
|
70 |
-
gr_L2 = lambda scale, elem_id: gr.Column(scale=scale, elem_id=elem_id
|
71 |
if LAYOUT == "TOP-DOWN":
|
72 |
gr_L1 = lambda: DummyWith()
|
73 |
gr_L2 = lambda scale, elem_id: gr.Row()
|
74 |
CHATBOT_HEIGHT /= 2
|
75 |
|
76 |
cancel_handles = []
|
77 |
-
customize_btns = {}
|
78 |
-
predefined_btns = {}
|
79 |
with gr.Blocks(title="GPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
|
80 |
gr.HTML(title_html)
|
81 |
gr.HTML('''<center><a href="https://huggingface.co/spaces/qingxu98/gpt-academic?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>请您打开此页面后务必点击上方的“复制空间”(Duplicate Space)按钮!<font color="#FF00FF">使用时,先在输入框填入API-KEY然后回车。</font><br/>切忌在“复制空间”(Duplicate Space)之前填入API_KEY或进行提问,否则您的API_KEY将极可能被空间所有者攫取!<br/>支持任意数量的OpenAI的密钥和API2D的密钥共存,例如输入"OpenAI密钥1,API2D密钥2",然后提交,即可同时使用两种模型接口。</center>''')
|
82 |
-
secret_css, dark_mode, py_pickle_cookie = gr.Textbox(visible=False), gr.Textbox(DARK_MODE, visible=False), gr.Textbox(visible=False)
|
83 |
cookies = gr.State(load_chat_cookies())
|
84 |
with gr_L1():
|
85 |
with gr_L2(scale=2, elem_id="gpt-chat"):
|
@@ -89,45 +70,37 @@ def main():
|
|
89 |
with gr_L2(scale=1, elem_id="gpt-panel"):
|
90 |
with gr.Accordion("输入区", open=True, elem_id="input-panel") as area_input_primary:
|
91 |
with gr.Row():
|
92 |
-
txt = gr.Textbox(show_label=False, lines=2, placeholder="输入问题或API
|
93 |
with gr.Row():
|
94 |
-
submitBtn = gr.Button("提交",
|
95 |
with gr.Row():
|
96 |
-
resetBtn = gr.Button("重置",
|
97 |
-
stopBtn = gr.Button("停止",
|
98 |
-
clearBtn = gr.Button("清除",
|
99 |
-
if ENABLE_AUDIO:
|
100 |
with gr.Row():
|
101 |
-
audio_mic = gr.Audio(source="microphone", type="numpy",
|
102 |
with gr.Row():
|
103 |
status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。当前模型: {LLM_MODEL} \n {proxy_info}", elem_id="state-panel")
|
104 |
-
|
105 |
with gr.Accordion("基础功能区", open=True, elem_id="basic-panel") as area_basic_fn:
|
106 |
with gr.Row():
|
107 |
-
for k in range(NUM_CUSTOM_BASIC_BTN):
|
108 |
-
customize_btn = gr.Button("自定义按钮" + str(k+1), visible=False, variant="secondary", info_str=f'基础功能区: 自定义按钮')
|
109 |
-
customize_btn.style(size="sm")
|
110 |
-
customize_btns.update({"自定义按钮" + str(k+1): customize_btn})
|
111 |
for k in functional:
|
112 |
if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue
|
113 |
variant = functional[k]["Color"] if "Color" in functional[k] else "secondary"
|
114 |
-
functional[k]["Button"] = gr.Button(k, variant=variant
|
115 |
functional[k]["Button"].style(size="sm")
|
116 |
-
predefined_btns.update({k: functional[k]["Button"]})
|
117 |
with gr.Accordion("函数插件区", open=True, elem_id="plugin-panel") as area_crazy_fn:
|
118 |
with gr.Row():
|
119 |
gr.Markdown("插件可读取“输入区”文本/路径作为参数(上传文件自动修正路径)")
|
120 |
with gr.Row(elem_id="input-plugin-group"):
|
121 |
-
plugin_group_sel = gr.Dropdown(choices=all_plugin_groups, label='', show_label=False, value=DEFAULT_FN_GROUPS,
|
122 |
multiselect=True, interactive=True, elem_classes='normal_mut_select').style(container=False)
|
123 |
with gr.Row():
|
124 |
for k, plugin in plugins.items():
|
125 |
if not plugin.get("AsButton", True): continue
|
126 |
visible = True if match_group(plugin['Group'], DEFAULT_FN_GROUPS) else False
|
127 |
variant = plugins[k]["Color"] if "Color" in plugin else "secondary"
|
128 |
-
|
129 |
-
plugin['Button'] = plugins[k]['Button'] = gr.Button(k, variant=variant,
|
130 |
-
visible=visible, info_str=f'函数插件区: {info}').style(size="sm")
|
131 |
with gr.Row():
|
132 |
with gr.Accordion("更多函数插件", open=True):
|
133 |
dropdown_fn_list = []
|
@@ -138,143 +111,53 @@ def main():
|
|
138 |
with gr.Row():
|
139 |
dropdown = gr.Dropdown(dropdown_fn_list, value=r"打开插件列表", label="", show_label=False).style(container=False)
|
140 |
with gr.Row():
|
141 |
-
plugin_advanced_arg = gr.Textbox(show_label=True, label="高级参数输入区", visible=False,
|
142 |
placeholder="这里是特殊函数插件的高级参数输入区").style(container=False)
|
143 |
with gr.Row():
|
144 |
switchy_bt = gr.Button(r"请先从插件列表中选择", variant="secondary").style(size="sm")
|
145 |
with gr.Row():
|
146 |
-
with gr.Accordion("
|
147 |
-
file_upload = gr.Files(label="任何文件,
|
148 |
-
|
149 |
-
|
150 |
-
with gr.Row():
|
151 |
-
with gr.Tab("上传文件", elem_id="interact-panel"):
|
152 |
-
gr.Markdown("请上传本地文件/压缩包供“函数插件区”功能调用。请注意: 上传文件后会自动把输入区修改为相应路径。")
|
153 |
-
file_upload_2 = gr.Files(label="任何文件, 推荐上传压缩文件(zip, tar)", file_count="multiple", elem_id="elem_upload_float")
|
154 |
-
|
155 |
-
with gr.Tab("更换模型", elem_id="interact-panel"):
|
156 |
-
md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型/请求源").style(container=False)
|
157 |
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
|
158 |
temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
|
159 |
-
max_length_sl = gr.Slider(minimum=256, maximum=
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
with gr.Column(scale=10):
|
179 |
-
txt2 = gr.Textbox(show_label=False, placeholder="Input question here.",
|
180 |
-
elem_id='user_input_float', lines=8, label="输入区2").style(container=False)
|
181 |
-
with gr.Column(scale=1, min_width=40):
|
182 |
-
submitBtn2 = gr.Button("提交", variant="primary"); submitBtn2.style(size="sm")
|
183 |
resetBtn2 = gr.Button("重置", variant="secondary"); resetBtn2.style(size="sm")
|
184 |
stopBtn2 = gr.Button("停止", variant="secondary"); stopBtn2.style(size="sm")
|
185 |
-
clearBtn2 = gr.Button("清除",
|
186 |
-
|
187 |
-
|
188 |
-
with gr.Floating(init_x="20%", init_y="50%", visible=False, width="40%", drag="top") as area_customize:
|
189 |
-
with gr.Accordion("自定义菜单", open=True, elem_id="edit-panel"):
|
190 |
-
with gr.Row() as row:
|
191 |
-
with gr.Column(scale=10):
|
192 |
-
AVAIL_BTN = [btn for btn in customize_btns.keys()] + [k for k in functional]
|
193 |
-
basic_btn_dropdown = gr.Dropdown(AVAIL_BTN, value="自定义按钮1", label="选择一个需要自定义基础功能区按钮").style(container=False)
|
194 |
-
basic_fn_title = gr.Textbox(show_label=False, placeholder="输入新按钮名称", lines=1).style(container=False)
|
195 |
-
basic_fn_prefix = gr.Textbox(show_label=False, placeholder="输入新提示前缀", lines=4).style(container=False)
|
196 |
-
basic_fn_suffix = gr.Textbox(show_label=False, placeholder="输入新提示后缀", lines=4).style(container=False)
|
197 |
-
with gr.Column(scale=1, min_width=70):
|
198 |
-
basic_fn_confirm = gr.Button("确认并保存", variant="primary"); basic_fn_confirm.style(size="sm")
|
199 |
-
basic_fn_clean = gr.Button("恢复默认", variant="primary"); basic_fn_clean.style(size="sm")
|
200 |
-
def assign_btn(persistent_cookie_, cookies_, basic_btn_dropdown_, basic_fn_title, basic_fn_prefix, basic_fn_suffix, clean_up=False):
|
201 |
-
ret = {}
|
202 |
-
# 读取之前的自定义按钮
|
203 |
-
customize_fn_overwrite_ = cookies_['customize_fn_overwrite']
|
204 |
-
# 更新新的自定义按钮
|
205 |
-
customize_fn_overwrite_.update({
|
206 |
-
basic_btn_dropdown_:
|
207 |
-
{
|
208 |
-
"Title":basic_fn_title,
|
209 |
-
"Prefix":basic_fn_prefix,
|
210 |
-
"Suffix":basic_fn_suffix,
|
211 |
-
}
|
212 |
-
}
|
213 |
-
)
|
214 |
-
if clean_up:
|
215 |
-
customize_fn_overwrite_ = {}
|
216 |
-
cookies_.update(customize_fn_overwrite_) # 更新cookie
|
217 |
-
visible = (not clean_up) and (basic_fn_title != "")
|
218 |
-
if basic_btn_dropdown_ in customize_btns:
|
219 |
-
# 是自定义按钮,不是预定义按钮
|
220 |
-
ret.update({customize_btns[basic_btn_dropdown_]: gr.update(visible=visible, value=basic_fn_title)})
|
221 |
-
else:
|
222 |
-
# 是预定义按钮
|
223 |
-
ret.update({predefined_btns[basic_btn_dropdown_]: gr.update(visible=visible, value=basic_fn_title)})
|
224 |
-
ret.update({cookies: cookies_})
|
225 |
-
try: persistent_cookie_ = from_cookie_str(persistent_cookie_) # persistent cookie to dict
|
226 |
-
except: persistent_cookie_ = {}
|
227 |
-
persistent_cookie_["custom_bnt"] = customize_fn_overwrite_ # dict update new value
|
228 |
-
persistent_cookie_ = to_cookie_str(persistent_cookie_) # persistent cookie to dict
|
229 |
-
ret.update({py_pickle_cookie: persistent_cookie_}) # write persistent cookie
|
230 |
-
return ret
|
231 |
-
|
232 |
-
# update btn
|
233 |
-
h = basic_fn_confirm.click(assign_btn, [py_pickle_cookie, cookies, basic_btn_dropdown, basic_fn_title, basic_fn_prefix, basic_fn_suffix],
|
234 |
-
[py_pickle_cookie, cookies, *customize_btns.values(), *predefined_btns.values()])
|
235 |
-
h.then(None, [py_pickle_cookie], None, _js="""(py_pickle_cookie)=>{setCookie("py_pickle_cookie", py_pickle_cookie, 365);}""")
|
236 |
-
# clean up btn
|
237 |
-
h2 = basic_fn_clean.click(assign_btn, [py_pickle_cookie, cookies, basic_btn_dropdown, basic_fn_title, basic_fn_prefix, basic_fn_suffix, gr.State(True)],
|
238 |
-
[py_pickle_cookie, cookies, *customize_btns.values(), *predefined_btns.values()])
|
239 |
-
h2.then(None, [py_pickle_cookie], None, _js="""(py_pickle_cookie)=>{setCookie("py_pickle_cookie", py_pickle_cookie, 365);}""")
|
240 |
-
|
241 |
-
def persistent_cookie_reload(persistent_cookie_, cookies_):
|
242 |
-
ret = {}
|
243 |
-
for k in customize_btns:
|
244 |
-
ret.update({customize_btns[k]: gr.update(visible=False, value="")})
|
245 |
-
|
246 |
-
try: persistent_cookie_ = from_cookie_str(persistent_cookie_) # persistent cookie to dict
|
247 |
-
except: return ret
|
248 |
-
|
249 |
-
customize_fn_overwrite_ = persistent_cookie_.get("custom_bnt", {})
|
250 |
-
cookies_['customize_fn_overwrite'] = customize_fn_overwrite_
|
251 |
-
ret.update({cookies: cookies_})
|
252 |
-
|
253 |
-
for k,v in persistent_cookie_["custom_bnt"].items():
|
254 |
-
if v['Title'] == "": continue
|
255 |
-
if k in customize_btns: ret.update({customize_btns[k]: gr.update(visible=True, value=v['Title'])})
|
256 |
-
else: ret.update({predefined_btns[k]: gr.update(visible=True, value=v['Title'])})
|
257 |
-
return ret
|
258 |
|
259 |
# 功能区显示开关与功能区的互动
|
260 |
def fn_area_visibility(a):
|
261 |
ret = {}
|
262 |
-
ret.update({
|
263 |
-
ret.update({
|
|
|
|
|
|
|
|
|
264 |
ret.update({plugin_advanced_arg: gr.update(visible=("插件参数区" in a))})
|
265 |
-
if "
|
266 |
-
return ret
|
267 |
-
checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, txt, txt2, plugin_advanced_arg] )
|
268 |
-
checkboxes.select(None, [checkboxes], None, _js=js_code_show_or_hide)
|
269 |
-
|
270 |
-
# 功能区显示开关与功能区的互动
|
271 |
-
def fn_area_visibility_2(a):
|
272 |
-
ret = {}
|
273 |
-
ret.update({area_customize: gr.update(visible=("自定义菜单" in a))})
|
274 |
return ret
|
275 |
-
|
276 |
-
checkboxes_2.select(None, [checkboxes_2], None, _js=js_code_show_or_hide_group2)
|
277 |
-
|
278 |
# 整理反复出现的控件句柄组合
|
279 |
input_combo = [cookies, max_length_sl, md_dropdown, txt, txt2, top_p, temperature, chatbot, history, system_prompt, plugin_advanced_arg]
|
280 |
output_combo = [cookies, chatbot, history, status]
|
@@ -284,28 +167,22 @@ def main():
|
|
284 |
cancel_handles.append(txt2.submit(**predict_args))
|
285 |
cancel_handles.append(submitBtn.click(**predict_args))
|
286 |
cancel_handles.append(submitBtn2.click(**predict_args))
|
287 |
-
resetBtn.click(
|
288 |
-
resetBtn2.click(
|
289 |
-
|
290 |
-
|
291 |
-
clearBtn.click(None, None, [txt, txt2], _js=js_code_clear)
|
292 |
-
clearBtn2.click(None, None, [txt, txt2], _js=js_code_clear)
|
293 |
if AUTO_CLEAR_TXT:
|
294 |
-
submitBtn.click(
|
295 |
-
submitBtn2.click(
|
296 |
-
txt.submit(
|
297 |
-
txt2.submit(
|
298 |
# 基础功能区的回调函数注册
|
299 |
for k in functional:
|
300 |
if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue
|
301 |
click_handle = functional[k]["Button"].click(fn=ArgsGeneralWrapper(predict), inputs=[*input_combo, gr.State(True), gr.State(k)], outputs=output_combo)
|
302 |
cancel_handles.append(click_handle)
|
303 |
-
for btn in customize_btns.values():
|
304 |
-
click_handle = btn.click(fn=ArgsGeneralWrapper(predict), inputs=[*input_combo, gr.State(True), gr.State(btn.value)], outputs=output_combo)
|
305 |
-
cancel_handles.append(click_handle)
|
306 |
# 文件上传区,接收文件后与chatbot的互动
|
307 |
-
file_upload.upload(on_file_uploaded, [file_upload, chatbot, txt, txt2, checkboxes, cookies], [chatbot, txt, txt2, cookies])
|
308 |
-
file_upload_2.upload(on_file_uploaded, [file_upload_2, chatbot, txt, txt2, checkboxes, cookies], [chatbot, txt, txt2, cookies]).then(None, None, None, _js=r"()=>{toast_push('上传完毕 ...'); cancel_loading_status();}")
|
309 |
# 函数插件-固定按钮区
|
310 |
for k in plugins:
|
311 |
if not plugins[k].get("AsButton", True): continue
|
@@ -315,34 +192,16 @@ def main():
|
|
315 |
# 函数插件-下拉菜单与随变按钮的互动
|
316 |
def on_dropdown_changed(k):
|
317 |
variant = plugins[k]["Color"] if "Color" in plugins[k] else "secondary"
|
318 |
-
|
319 |
-
ret = {switchy_bt: gr.update(value=k, variant=variant, info_str=f'函数插件区: {info}')}
|
320 |
if plugins[k].get("AdvancedArgs", False): # 是否唤起高级插件参数区
|
321 |
ret.update({plugin_advanced_arg: gr.update(visible=True, label=f"插件[{k}]的高级参数说明:" + plugins[k].get("ArgsReminder", [f"没有提供高级参数功能说明"]))})
|
322 |
else:
|
323 |
ret.update({plugin_advanced_arg: gr.update(visible=False, label=f"插件[{k}]不需要高级参数。")})
|
324 |
return ret
|
325 |
dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt, plugin_advanced_arg] )
|
326 |
-
|
327 |
def on_md_dropdown_changed(k):
|
328 |
return {chatbot: gr.update(label="当前模型:"+k)}
|
329 |
md_dropdown.select(on_md_dropdown_changed, [md_dropdown], [chatbot] )
|
330 |
-
|
331 |
-
def on_theme_dropdown_changed(theme, secret_css):
|
332 |
-
adjust_theme, css_part1, _, adjust_dynamic_theme = load_dynamic_theme(theme)
|
333 |
-
if adjust_dynamic_theme:
|
334 |
-
css_part2 = adjust_dynamic_theme._get_theme_css()
|
335 |
-
else:
|
336 |
-
css_part2 = adjust_theme()._get_theme_css()
|
337 |
-
return css_part2 + css_part1
|
338 |
-
|
339 |
-
theme_handle = theme_dropdown.select(on_theme_dropdown_changed, [theme_dropdown, secret_css], [secret_css])
|
340 |
-
theme_handle.then(
|
341 |
-
None,
|
342 |
-
[secret_css],
|
343 |
-
None,
|
344 |
-
_js=js_code_for_css_changing
|
345 |
-
)
|
346 |
# 随变按钮的回调函数注册
|
347 |
def route(request: gr.Request, k, *args, **kwargs):
|
348 |
if k in [r"打开插件列表", r"请先从插件列表中选择"]: return
|
@@ -360,53 +219,52 @@ def main():
|
|
360 |
if not group_list: # 处理特殊情况:没有选择任何插件组
|
361 |
return [*[plugin['Button'].update(visible=False) for _, plugin in plugins_as_btn.items()], gr.Dropdown.update(choices=[])]
|
362 |
for k, plugin in plugins.items():
|
363 |
-
if plugin.get("AsButton", True):
|
364 |
btn_list.append(plugin['Button'].update(visible=match_group(plugin['Group'], group_list))) # 刷新按钮
|
365 |
if plugin.get('AdvancedArgs', False): dropdown_fn_list.append(k) # 对于需要高级参数的插件,亦在下拉菜单中显示
|
366 |
elif match_group(plugin['Group'], group_list): fns_list.append(k) # 刷新下拉列表
|
367 |
return [*btn_list, gr.Dropdown.update(choices=fns_list)]
|
368 |
plugin_group_sel.select(fn=on_group_change, inputs=[plugin_group_sel], outputs=[*[plugin['Button'] for name, plugin in plugins_as_btn.items()], dropdown])
|
369 |
-
if ENABLE_AUDIO:
|
370 |
from crazy_functions.live_audio.audio_io import RealtimeAudioDistribution
|
371 |
rad = RealtimeAudioDistribution()
|
372 |
def deal_audio(audio, cookies):
|
373 |
rad.feed(cookies['uuid'].hex, audio)
|
374 |
audio_mic.stream(deal_audio, inputs=[audio_mic, cookies])
|
375 |
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
demo.load(
|
381 |
-
demo.load(
|
382 |
-
|
383 |
# gradio的inbrowser触发不太稳定,回滚代码到原始的浏览器打开函数
|
384 |
-
def
|
385 |
import threading, webbrowser, time
|
386 |
print(f"如果浏览器没有自动打开,请复制并转到以下URL:")
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
threading.Thread(target=
|
395 |
-
threading.Thread(target=
|
396 |
-
threading.Thread(target=
|
397 |
-
|
398 |
-
|
399 |
demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", share=False, favicon_path="docs/logo.png", blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile"])
|
400 |
|
401 |
-
|
402 |
# 如果需要在二级路径下运行
|
403 |
-
# CUSTOM_PATH = get_conf('CUSTOM_PATH')
|
404 |
-
# if CUSTOM_PATH != "/":
|
405 |
# from toolbox import run_gradio_in_subpath
|
406 |
# run_gradio_in_subpath(demo, auth=AUTHENTICATION, port=PORT, custom_path=CUSTOM_PATH)
|
407 |
-
# else:
|
408 |
# demo.launch(server_name="0.0.0.0", server_port=PORT, auth=AUTHENTICATION, favicon_path="docs/logo.png",
|
409 |
-
# blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile"
|
410 |
|
411 |
if __name__ == "__main__":
|
412 |
main()
|
|
|
1 |
import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
|
2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
def main():
|
4 |
import subprocess, sys
|
5 |
+
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'gradio-stable-fork'])
|
6 |
import gradio as gr
|
7 |
+
from request_llm.bridge_all import predict
|
|
|
|
|
8 |
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, load_chat_cookies, DummyWith
|
9 |
+
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
|
10 |
proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION = get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION')
|
11 |
CHATBOT_HEIGHT, LAYOUT, AVAIL_LLM_MODELS, AUTO_CLEAR_TXT = get_conf('CHATBOT_HEIGHT', 'LAYOUT', 'AVAIL_LLM_MODELS', 'AUTO_CLEAR_TXT')
|
12 |
+
ENABLE_AUDIO, AUTO_CLEAR_TXT, PATH_LOGGING = get_conf('ENABLE_AUDIO', 'AUTO_CLEAR_TXT', 'PATH_LOGGING')
|
|
|
|
|
13 |
|
14 |
# 如果WEB_PORT是-1, 则随机选取WEB端口
|
15 |
PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
|
16 |
from check_proxy import get_current_version
|
17 |
+
from themes.theme import adjust_theme, advanced_css, theme_declaration
|
18 |
+
initial_prompt = "Serve me as a writing and programming assistant."
|
|
|
19 |
title_html = f"<h1 align=\"center\">GPT 学术优化 {get_current_version()}</h1>{theme_declaration}"
|
20 |
+
description = "代码开源和更新[地址🚀](https://github.com/binary-husky/gpt_academic),"
|
21 |
+
description += "感谢热情的[开发者们❤️](https://github.com/binary-husky/gpt_academic/graphs/contributors)"
|
22 |
|
23 |
# 问询记录, python 版本建议3.9+(越新越好)
|
24 |
import logging, uuid
|
|
|
35 |
|
36 |
# 高级函数插件
|
37 |
from crazy_functional import get_crazy_functions
|
38 |
+
DEFAULT_FN_GROUPS, = get_conf('DEFAULT_FN_GROUPS')
|
39 |
plugins = get_crazy_functions()
|
40 |
all_plugin_groups = list(set([g for _, plugin in plugins.items() for g in plugin['Group'].split('|')]))
|
41 |
match_group = lambda tags, groups: any([g in groups for g in tags.split('|')])
|
|
|
51 |
proxy_info = check_proxy(proxies)
|
52 |
|
53 |
gr_L1 = lambda: gr.Row().style()
|
54 |
+
gr_L2 = lambda scale, elem_id: gr.Column(scale=scale, elem_id=elem_id)
|
55 |
if LAYOUT == "TOP-DOWN":
|
56 |
gr_L1 = lambda: DummyWith()
|
57 |
gr_L2 = lambda scale, elem_id: gr.Row()
|
58 |
CHATBOT_HEIGHT /= 2
|
59 |
|
60 |
cancel_handles = []
|
|
|
|
|
61 |
with gr.Blocks(title="GPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
|
62 |
gr.HTML(title_html)
|
63 |
gr.HTML('''<center><a href="https://huggingface.co/spaces/qingxu98/gpt-academic?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>请您打开此页面后务必点击上方的“复制空间”(Duplicate Space)按钮!<font color="#FF00FF">使用时,先在输入框填入API-KEY然后回车。</font><br/>切忌在“复制空间”(Duplicate Space)之前填入API_KEY或进行提问,否则您的API_KEY将极可能被空间所有者攫取!<br/>支持任意数量的OpenAI的密钥和API2D的密钥共存,例如输入"OpenAI密钥1,API2D密钥2",然后提交,即可同时使用两种模型接口。</center>''')
|
|
|
64 |
cookies = gr.State(load_chat_cookies())
|
65 |
with gr_L1():
|
66 |
with gr_L2(scale=2, elem_id="gpt-chat"):
|
|
|
70 |
with gr_L2(scale=1, elem_id="gpt-panel"):
|
71 |
with gr.Accordion("输入区", open=True, elem_id="input-panel") as area_input_primary:
|
72 |
with gr.Row():
|
73 |
+
txt = gr.Textbox(show_label=False, lines=2, placeholder="输入问题或API密钥,输入多个密钥时,用英文逗号间隔。支持OpenAI密钥和API2D密钥共存。").style(container=False)
|
74 |
with gr.Row():
|
75 |
+
submitBtn = gr.Button("提交", variant="primary")
|
76 |
with gr.Row():
|
77 |
+
resetBtn = gr.Button("重置", variant="secondary"); resetBtn.style(size="sm")
|
78 |
+
stopBtn = gr.Button("停止", variant="secondary"); stopBtn.style(size="sm")
|
79 |
+
clearBtn = gr.Button("清除", variant="secondary", visible=False); clearBtn.style(size="sm")
|
80 |
+
if ENABLE_AUDIO:
|
81 |
with gr.Row():
|
82 |
+
audio_mic = gr.Audio(source="microphone", type="numpy", streaming=True, show_label=False).style(container=False)
|
83 |
with gr.Row():
|
84 |
status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。当前模型: {LLM_MODEL} \n {proxy_info}", elem_id="state-panel")
|
|
|
85 |
with gr.Accordion("基础功能区", open=True, elem_id="basic-panel") as area_basic_fn:
|
86 |
with gr.Row():
|
|
|
|
|
|
|
|
|
87 |
for k in functional:
|
88 |
if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue
|
89 |
variant = functional[k]["Color"] if "Color" in functional[k] else "secondary"
|
90 |
+
functional[k]["Button"] = gr.Button(k, variant=variant)
|
91 |
functional[k]["Button"].style(size="sm")
|
|
|
92 |
with gr.Accordion("函数插件区", open=True, elem_id="plugin-panel") as area_crazy_fn:
|
93 |
with gr.Row():
|
94 |
gr.Markdown("插件可读取“输入区”文本/路径作为参数(上传文件自动修正路径)")
|
95 |
with gr.Row(elem_id="input-plugin-group"):
|
96 |
+
plugin_group_sel = gr.Dropdown(choices=all_plugin_groups, label='', show_label=False, value=DEFAULT_FN_GROUPS,
|
97 |
multiselect=True, interactive=True, elem_classes='normal_mut_select').style(container=False)
|
98 |
with gr.Row():
|
99 |
for k, plugin in plugins.items():
|
100 |
if not plugin.get("AsButton", True): continue
|
101 |
visible = True if match_group(plugin['Group'], DEFAULT_FN_GROUPS) else False
|
102 |
variant = plugins[k]["Color"] if "Color" in plugin else "secondary"
|
103 |
+
plugin['Button'] = plugins[k]['Button'] = gr.Button(k, variant=variant, visible=visible).style(size="sm")
|
|
|
|
|
104 |
with gr.Row():
|
105 |
with gr.Accordion("更多函数插件", open=True):
|
106 |
dropdown_fn_list = []
|
|
|
111 |
with gr.Row():
|
112 |
dropdown = gr.Dropdown(dropdown_fn_list, value=r"打开插件列表", label="", show_label=False).style(container=False)
|
113 |
with gr.Row():
|
114 |
+
plugin_advanced_arg = gr.Textbox(show_label=True, label="高级参数输入区", visible=False,
|
115 |
placeholder="这里是特殊函数插件的高级参数输入区").style(container=False)
|
116 |
with gr.Row():
|
117 |
switchy_bt = gr.Button(r"请先从插件列表中选择", variant="secondary").style(size="sm")
|
118 |
with gr.Row():
|
119 |
+
with gr.Accordion("点击展开“文件上传区”。上传本地文件/压缩包供函数插件调用。", open=False) as area_file_up:
|
120 |
+
file_upload = gr.Files(label="任何文件, 但推荐上传压缩文件(zip, tar)", file_count="multiple")
|
121 |
+
with gr.Accordion("更换模型 & SysPrompt & 交互界面布局", open=(LAYOUT == "TOP-DOWN"), elem_id="interact-panel"):
|
122 |
+
system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
|
124 |
temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
|
125 |
+
max_length_sl = gr.Slider(minimum=256, maximum=8192, value=4096, step=1, interactive=True, label="Local LLM MaxLength",)
|
126 |
+
checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区", "输入清除键", "插件参数区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
|
127 |
+
md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型/请求源").style(container=False)
|
128 |
+
dark_mode_btn = gr.Button("Toggle Dark Mode ☀", variant="secondary").style(size="sm")
|
129 |
+
dark_mode_btn.click(None, None, None, _js="""() => {
|
130 |
+
if (document.querySelectorAll('.dark').length) {
|
131 |
+
document.querySelectorAll('.dark').forEach(el => el.classList.remove('dark'));
|
132 |
+
} else {
|
133 |
+
document.querySelector('body').classList.add('dark');
|
134 |
+
}
|
135 |
+
}""",
|
136 |
+
)
|
137 |
+
gr.Markdown(description)
|
138 |
+
with gr.Accordion("备选输入区", open=True, visible=False, elem_id="input-panel2") as area_input_secondary:
|
139 |
+
with gr.Row():
|
140 |
+
txt2 = gr.Textbox(show_label=False, placeholder="Input question here.", label="输入区2").style(container=False)
|
141 |
+
with gr.Row():
|
142 |
+
submitBtn2 = gr.Button("提交", variant="primary")
|
143 |
+
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
144 |
resetBtn2 = gr.Button("重置", variant="secondary"); resetBtn2.style(size="sm")
|
145 |
stopBtn2 = gr.Button("停止", variant="secondary"); stopBtn2.style(size="sm")
|
146 |
+
clearBtn2 = gr.Button("清除", variant="secondary", visible=False); clearBtn2.style(size="sm")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
|
148 |
# 功能区显示开关与功能区的互动
|
149 |
def fn_area_visibility(a):
|
150 |
ret = {}
|
151 |
+
ret.update({area_basic_fn: gr.update(visible=("基础功能区" in a))})
|
152 |
+
ret.update({area_crazy_fn: gr.update(visible=("函数插件区" in a))})
|
153 |
+
ret.update({area_input_primary: gr.update(visible=("底部输入区" not in a))})
|
154 |
+
ret.update({area_input_secondary: gr.update(visible=("底部输入区" in a))})
|
155 |
+
ret.update({clearBtn: gr.update(visible=("输入清除键" in a))})
|
156 |
+
ret.update({clearBtn2: gr.update(visible=("输入清除键" in a))})
|
157 |
ret.update({plugin_advanced_arg: gr.update(visible=("插件参数区" in a))})
|
158 |
+
if "底部输入区" in a: ret.update({txt: gr.update(value="")})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
return ret
|
160 |
+
checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, txt, txt2, clearBtn, clearBtn2, plugin_advanced_arg] )
|
|
|
|
|
161 |
# 整理反复出现的控件句柄组合
|
162 |
input_combo = [cookies, max_length_sl, md_dropdown, txt, txt2, top_p, temperature, chatbot, history, system_prompt, plugin_advanced_arg]
|
163 |
output_combo = [cookies, chatbot, history, status]
|
|
|
167 |
cancel_handles.append(txt2.submit(**predict_args))
|
168 |
cancel_handles.append(submitBtn.click(**predict_args))
|
169 |
cancel_handles.append(submitBtn2.click(**predict_args))
|
170 |
+
resetBtn.click(lambda: ([], [], "已重置"), None, [chatbot, history, status])
|
171 |
+
resetBtn2.click(lambda: ([], [], "已重置"), None, [chatbot, history, status])
|
172 |
+
clearBtn.click(lambda: ("",""), None, [txt, txt2])
|
173 |
+
clearBtn2.click(lambda: ("",""), None, [txt, txt2])
|
|
|
|
|
174 |
if AUTO_CLEAR_TXT:
|
175 |
+
submitBtn.click(lambda: ("",""), None, [txt, txt2])
|
176 |
+
submitBtn2.click(lambda: ("",""), None, [txt, txt2])
|
177 |
+
txt.submit(lambda: ("",""), None, [txt, txt2])
|
178 |
+
txt2.submit(lambda: ("",""), None, [txt, txt2])
|
179 |
# 基础功能区的回调函数注册
|
180 |
for k in functional:
|
181 |
if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue
|
182 |
click_handle = functional[k]["Button"].click(fn=ArgsGeneralWrapper(predict), inputs=[*input_combo, gr.State(True), gr.State(k)], outputs=output_combo)
|
183 |
cancel_handles.append(click_handle)
|
|
|
|
|
|
|
184 |
# 文件上传区,接收文件后与chatbot的互动
|
185 |
+
file_upload.upload(on_file_uploaded, [file_upload, chatbot, txt, txt2, checkboxes, cookies], [chatbot, txt, txt2, cookies])
|
|
|
186 |
# 函数插件-固定按钮区
|
187 |
for k in plugins:
|
188 |
if not plugins[k].get("AsButton", True): continue
|
|
|
192 |
# 函数插件-下拉菜单与随变按钮的互动
|
193 |
def on_dropdown_changed(k):
|
194 |
variant = plugins[k]["Color"] if "Color" in plugins[k] else "secondary"
|
195 |
+
ret = {switchy_bt: gr.update(value=k, variant=variant)}
|
|
|
196 |
if plugins[k].get("AdvancedArgs", False): # 是否唤起高级插件参数区
|
197 |
ret.update({plugin_advanced_arg: gr.update(visible=True, label=f"插件[{k}]的高级参数说明:" + plugins[k].get("ArgsReminder", [f"没有提供高级参数功能说明"]))})
|
198 |
else:
|
199 |
ret.update({plugin_advanced_arg: gr.update(visible=False, label=f"插件[{k}]不需要高级参数。")})
|
200 |
return ret
|
201 |
dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt, plugin_advanced_arg] )
|
|
|
202 |
def on_md_dropdown_changed(k):
|
203 |
return {chatbot: gr.update(label="当前模型:"+k)}
|
204 |
md_dropdown.select(on_md_dropdown_changed, [md_dropdown], [chatbot] )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
# 随变按钮的回调函数注册
|
206 |
def route(request: gr.Request, k, *args, **kwargs):
|
207 |
if k in [r"打开插件列表", r"请先从插件列表中选择"]: return
|
|
|
219 |
if not group_list: # 处理特殊情况:没有选择任何插件组
|
220 |
return [*[plugin['Button'].update(visible=False) for _, plugin in plugins_as_btn.items()], gr.Dropdown.update(choices=[])]
|
221 |
for k, plugin in plugins.items():
|
222 |
+
if plugin.get("AsButton", True):
|
223 |
btn_list.append(plugin['Button'].update(visible=match_group(plugin['Group'], group_list))) # 刷新按钮
|
224 |
if plugin.get('AdvancedArgs', False): dropdown_fn_list.append(k) # 对于需要高级参数的插件,亦在下拉菜单中显示
|
225 |
elif match_group(plugin['Group'], group_list): fns_list.append(k) # 刷新下拉列表
|
226 |
return [*btn_list, gr.Dropdown.update(choices=fns_list)]
|
227 |
plugin_group_sel.select(fn=on_group_change, inputs=[plugin_group_sel], outputs=[*[plugin['Button'] for name, plugin in plugins_as_btn.items()], dropdown])
|
228 |
+
if ENABLE_AUDIO:
|
229 |
from crazy_functions.live_audio.audio_io import RealtimeAudioDistribution
|
230 |
rad = RealtimeAudioDistribution()
|
231 |
def deal_audio(audio, cookies):
|
232 |
rad.feed(cookies['uuid'].hex, audio)
|
233 |
audio_mic.stream(deal_audio, inputs=[audio_mic, cookies])
|
234 |
|
235 |
+
def init_cookie(cookies, chatbot):
|
236 |
+
# 为每一位访问的用户赋予一个独一无二的uuid编码
|
237 |
+
cookies.update({'uuid': uuid.uuid4()})
|
238 |
+
return cookies
|
239 |
+
demo.load(init_cookie, inputs=[cookies, chatbot], outputs=[cookies])
|
240 |
+
demo.load(lambda: 0, inputs=None, outputs=None, _js='()=>{ChatBotHeight();}')
|
241 |
+
|
242 |
# gradio的inbrowser触发不太稳定,回滚代码到原始的浏览器打开函数
|
243 |
+
def auto_opentab_delay():
|
244 |
import threading, webbrowser, time
|
245 |
print(f"如果浏览器没有自动打开,请复制并转到以下URL:")
|
246 |
+
print(f"\t(亮色主题): http://localhost:{PORT}")
|
247 |
+
print(f"\t(暗色主题): http://localhost:{PORT}/?__theme=dark")
|
248 |
+
def open():
|
249 |
+
time.sleep(2) # 打开浏览器
|
250 |
+
DARK_MODE, = get_conf('DARK_MODE')
|
251 |
+
if DARK_MODE: webbrowser.open_new_tab(f"http://localhost:{PORT}/?__theme=dark")
|
252 |
+
else: webbrowser.open_new_tab(f"http://localhost:{PORT}")
|
253 |
+
threading.Thread(target=open, name="open-browser", daemon=True).start()
|
254 |
+
threading.Thread(target=auto_update, name="self-upgrade", daemon=True).start()
|
255 |
+
threading.Thread(target=warm_up_modules, name="warm-up", daemon=True).start()
|
256 |
+
|
257 |
+
auto_opentab_delay()
|
258 |
demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", share=False, favicon_path="docs/logo.png", blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile"])
|
259 |
|
|
|
260 |
# 如果需要在二级路径下运行
|
261 |
+
# CUSTOM_PATH, = get_conf('CUSTOM_PATH')
|
262 |
+
# if CUSTOM_PATH != "/":
|
263 |
# from toolbox import run_gradio_in_subpath
|
264 |
# run_gradio_in_subpath(demo, auth=AUTHENTICATION, port=PORT, custom_path=CUSTOM_PATH)
|
265 |
+
# else:
|
266 |
# demo.launch(server_name="0.0.0.0", server_port=PORT, auth=AUTHENTICATION, favicon_path="docs/logo.png",
|
267 |
+
# blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile"])
|
268 |
|
269 |
if __name__ == "__main__":
|
270 |
main()
|
check_proxy.py
CHANGED
@@ -5,6 +5,7 @@ def check_proxy(proxies):
|
|
5 |
try:
|
6 |
response = requests.get("https://ipapi.co/json/", proxies=proxies, timeout=4)
|
7 |
data = response.json()
|
|
|
8 |
if 'country_name' in data:
|
9 |
country = data['country_name']
|
10 |
result = f"代理配置 {proxies_https}, 代理所在地:{country}"
|
@@ -45,9 +46,9 @@ def backup_and_download(current_version, remote_version):
|
|
45 |
return new_version_dir
|
46 |
os.makedirs(new_version_dir)
|
47 |
shutil.copytree('./', backup_dir, ignore=lambda x, y: ['history'])
|
48 |
-
proxies = get_conf('proxies')
|
49 |
-
|
50 |
-
|
51 |
zip_file_path = backup_dir+'/master.zip'
|
52 |
with open(zip_file_path, 'wb+') as f:
|
53 |
f.write(r.content)
|
@@ -110,10 +111,11 @@ def auto_update(raise_error=False):
|
|
110 |
try:
|
111 |
from toolbox import get_conf
|
112 |
import requests
|
|
|
113 |
import json
|
114 |
-
proxies = get_conf('proxies')
|
115 |
-
|
116 |
-
|
117 |
remote_json_data = json.loads(response.text)
|
118 |
remote_version = remote_json_data['version']
|
119 |
if remote_json_data["show_feature"]:
|
@@ -125,7 +127,8 @@ def auto_update(raise_error=False):
|
|
125 |
current_version = json.loads(current_version)['version']
|
126 |
if (remote_version - current_version) >= 0.01-1e-5:
|
127 |
from colorful import print亮黄
|
128 |
-
print亮黄(
|
|
|
129 |
print('(1)Github更新地址:\nhttps://github.com/binary-husky/chatgpt_academic\n')
|
130 |
user_instruction = input('(2)是否一键更新代码(Y+回车=确认,输入其他/无输入+回车=不更新)?')
|
131 |
if user_instruction in ['Y', 'y']:
|
@@ -151,26 +154,16 @@ def auto_update(raise_error=False):
|
|
151 |
print(msg)
|
152 |
|
153 |
def warm_up_modules():
|
154 |
-
print('
|
155 |
-
from
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
enc = model_info["gpt-4"]['tokenizer']
|
161 |
-
enc.encode("模块预热", disallowed_special=())
|
162 |
-
|
163 |
-
def warm_up_vectordb():
|
164 |
-
print('正在执行一些模块的预热 ...')
|
165 |
-
from toolbox import ProxyNetworkActivate
|
166 |
-
with ProxyNetworkActivate("Warmup_Modules"):
|
167 |
-
import nltk
|
168 |
-
with ProxyNetworkActivate("Warmup_Modules"): nltk.download("punkt")
|
169 |
|
170 |
-
|
171 |
if __name__ == '__main__':
|
172 |
import os
|
173 |
os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
|
174 |
from toolbox import get_conf
|
175 |
-
proxies = get_conf('proxies')
|
176 |
check_proxy(proxies)
|
|
|
5 |
try:
|
6 |
response = requests.get("https://ipapi.co/json/", proxies=proxies, timeout=4)
|
7 |
data = response.json()
|
8 |
+
# print(f'查询代理的地理位置,返回的结果是{data}')
|
9 |
if 'country_name' in data:
|
10 |
country = data['country_name']
|
11 |
result = f"代理配置 {proxies_https}, 代理所在地:{country}"
|
|
|
46 |
return new_version_dir
|
47 |
os.makedirs(new_version_dir)
|
48 |
shutil.copytree('./', backup_dir, ignore=lambda x, y: ['history'])
|
49 |
+
proxies, = get_conf('proxies')
|
50 |
+
r = requests.get(
|
51 |
+
'https://github.com/binary-husky/chatgpt_academic/archive/refs/heads/master.zip', proxies=proxies, stream=True)
|
52 |
zip_file_path = backup_dir+'/master.zip'
|
53 |
with open(zip_file_path, 'wb+') as f:
|
54 |
f.write(r.content)
|
|
|
111 |
try:
|
112 |
from toolbox import get_conf
|
113 |
import requests
|
114 |
+
import time
|
115 |
import json
|
116 |
+
proxies, = get_conf('proxies')
|
117 |
+
response = requests.get(
|
118 |
+
"https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version", proxies=proxies, timeout=5)
|
119 |
remote_json_data = json.loads(response.text)
|
120 |
remote_version = remote_json_data['version']
|
121 |
if remote_json_data["show_feature"]:
|
|
|
127 |
current_version = json.loads(current_version)['version']
|
128 |
if (remote_version - current_version) >= 0.01-1e-5:
|
129 |
from colorful import print亮黄
|
130 |
+
print亮黄(
|
131 |
+
f'\n新版本可用。新版本:{remote_version},当前版本:{current_version}。{new_feature}')
|
132 |
print('(1)Github更新地址:\nhttps://github.com/binary-husky/chatgpt_academic\n')
|
133 |
user_instruction = input('(2)是否一键更新代码(Y+回车=确认,输入其他/无输入+回车=不更新)?')
|
134 |
if user_instruction in ['Y', 'y']:
|
|
|
154 |
print(msg)
|
155 |
|
156 |
def warm_up_modules():
|
157 |
+
print('正在执行一些模块的预热...')
|
158 |
+
from request_llm.bridge_all import model_info
|
159 |
+
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
160 |
+
enc.encode("模块预热", disallowed_special=())
|
161 |
+
enc = model_info["gpt-4"]['tokenizer']
|
162 |
+
enc.encode("模块预热", disallowed_special=())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
|
|
|
164 |
if __name__ == '__main__':
|
165 |
import os
|
166 |
os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
|
167 |
from toolbox import get_conf
|
168 |
+
proxies, = get_conf('proxies')
|
169 |
check_proxy(proxies)
|
config.py
CHANGED
@@ -2,8 +2,8 @@
|
|
2 |
以下所有配置也都支持利用环境变量覆写,环境变量配置格式见docker-compose.yml。
|
3 |
读取优先级:环境变量 > config_private.py > config.py
|
4 |
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
|
5 |
-
All the following configurations also support using environment variables to override,
|
6 |
-
and the environment variable configuration format can be seen in docker-compose.yml.
|
7 |
Configuration reading priority: environment variable > config_private.py > config.py
|
8 |
"""
|
9 |
|
@@ -19,13 +19,13 @@ API_KEY = "此处填API密钥" # 可同时填写多个API-KEY,用英文逗
|
|
19 |
USE_PROXY = False
|
20 |
if USE_PROXY:
|
21 |
"""
|
22 |
-
代理网络的地址,打开你的代理软件查看代理协议(socks5h / http)、地址(localhost)和端口(11284)
|
23 |
填写格式是 [协议]:// [地址] :[端口],填写之前不要忘记把USE_PROXY改成True,如果直接在海外服务器部署,此处不修改
|
24 |
<配置教程&视频教程> https://github.com/binary-husky/gpt_academic/issues/1>
|
25 |
[协议] 常见协议无非socks5h/http; 例如 v2**y 和 ss* 的默认本地协议是socks5h; 而cl**h 的默认本地协议是http
|
26 |
-
[地址]
|
27 |
[端口] 在代理软件的设置里找。虽然不同的代理软件界面不一样,但端口号都应该在最显眼的位置上
|
28 |
"""
|
|
|
29 |
proxies = {
|
30 |
# [协议]:// [地址] :[端口]
|
31 |
"http": "socks5h://localhost:11284", # 再例如 "http": "http://127.0.0.1:7890",
|
@@ -37,7 +37,7 @@ else:
|
|
37 |
# ------------------------------------ 以下配置可以优化体验, 但大部分场合下并不需要修改 ------------------------------------
|
38 |
|
39 |
# 重新URL重新定向,实现更换API_URL的作用(高危设置! 常规情况下不要修改! 通过修改此设置,您将把您的API-KEY和对话隐私完全暴露给您设定的中间人!)
|
40 |
-
# 格式: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "在这里填写重定向的api.openai.com的URL"}
|
41 |
# 举例: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "https://reverse-proxy-url/v1/chat/completions"}
|
42 |
API_URL_REDIRECT = {}
|
43 |
|
@@ -50,11 +50,6 @@ DEFAULT_WORKER_NUM = 3
|
|
50 |
# 色彩主题, 可选 ["Default", "Chuanhu-Small-and-Beautiful", "High-Contrast"]
|
51 |
# 更多主题, 请查阅Gradio主题商店: https://huggingface.co/spaces/gradio/theme-gallery 可选 ["Gstaff/Xkcd", "NoCrypt/Miku", ...]
|
52 |
THEME = "Chuanhu-Small-and-Beautiful"
|
53 |
-
AVAIL_THEMES = ["Default", "Chuanhu-Small-and-Beautiful", "High-Contrast", "Gstaff/Xkcd", "NoCrypt/Miku"]
|
54 |
-
|
55 |
-
|
56 |
-
# 默认的系统提示词(system prompt)
|
57 |
-
INIT_SYS_PROMPT = "Serve me as a writing and programming assistant."
|
58 |
|
59 |
|
60 |
# 对话窗的高度 (仅在LAYOUT="TOP-DOWN"时生效)
|
@@ -67,10 +62,7 @@ CODE_HIGHLIGHT = True
|
|
67 |
|
68 |
# 窗口布局
|
69 |
LAYOUT = "LEFT-RIGHT" # "LEFT-RIGHT"(左右布局) # "TOP-DOWN"(上下布局)
|
70 |
-
|
71 |
-
|
72 |
-
# 暗色模式 / 亮色模式
|
73 |
-
DARK_MODE = False
|
74 |
|
75 |
|
76 |
# 发送请求到OpenAI后,等待多久判定为超时
|
@@ -89,41 +81,21 @@ LLM_MODEL = "gpt-3.5-turbo" # 可选 "chatglm"
|
|
89 |
AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "api2d-gpt-3.5-turbo", "spark", "azure-gpt-3.5"]
|
90 |
|
91 |
# 插件分类默认选项
|
92 |
-
DEFAULT_FN_GROUPS = ['对话', '编程', '学术'
|
93 |
|
94 |
|
95 |
# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 )
|
96 |
-
LLM_MODEL = "gpt-3.5-turbo
|
97 |
-
AVAIL_LLM_MODELS = ["gpt-
|
98 |
-
"gpt-
|
99 |
-
|
100 |
-
|
101 |
-
# P.S. 其他可用的模型还包括 [
|
102 |
-
# "moss", "qwen-turbo", "qwen-plus", "qwen-max"
|
103 |
-
# "zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen-local", "gpt-3.5-turbo-0613",
|
104 |
-
# "gpt-3.5-turbo-16k-0613", "gpt-3.5-random", "api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k',
|
105 |
-
# "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"
|
106 |
-
# ]
|
107 |
-
|
108 |
-
|
109 |
-
# 定义界面上“询问多个GPT模型”插件应该使用哪些模型,请从AVAIL_LLM_MODELS中选择,并在不同模型之间用`&`间隔,例如"gpt-3.5-turbo&chatglm3&azure-gpt-4"
|
110 |
-
MULTI_QUERY_LLM_MODELS = "gpt-3.5-turbo&chatglm3"
|
111 |
-
|
112 |
-
|
113 |
-
# 选择本地模型变体(只有当AVAIL_LLM_MODELS包含了对应本地模型时,才会起作用)
|
114 |
-
# 如果你选择Qwen系列的模型,那么请在下面的QWEN_MODEL_SELECTION中指定具体的模型
|
115 |
-
# 也可以是具体的模型路径
|
116 |
-
QWEN_LOCAL_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8"
|
117 |
-
|
118 |
-
|
119 |
-
# 接入通义千问在线大模型 https://dashscope.console.aliyun.com/
|
120 |
-
DASHSCOPE_API_KEY = "" # 阿里灵积云API_KEY
|
121 |
|
122 |
|
123 |
# 百度千帆(LLM_MODEL="qianfan")
|
124 |
BAIDU_CLOUD_API_KEY = ''
|
125 |
BAIDU_CLOUD_SECRET_KEY = ''
|
126 |
-
BAIDU_CLOUD_QIANFAN_MODEL = 'ERNIE-Bot' # 可选 "ERNIE-Bot
|
127 |
|
128 |
|
129 |
# 如果使用ChatGLM2微调模型,请把 LLM_MODEL="chatglmft",并在此处指定模型路径
|
@@ -134,6 +106,7 @@ CHATGLM_PTUNING_CHECKPOINT = "" # 例如"/home/hmp/ChatGLM2-6B/ptuning/output/6b
|
|
134 |
LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
|
135 |
LOCAL_MODEL_QUANT = "FP16" # 默认 "FP16" "INT4" 启用量化INT4版本 "INT8" 启用量化INT8版本
|
136 |
|
|
|
137 |
# 设置gradio的并行线程数(不需要修改)
|
138 |
CONCURRENT_COUNT = 100
|
139 |
|
@@ -143,7 +116,7 @@ AUTO_CLEAR_TXT = False
|
|
143 |
|
144 |
|
145 |
# 加一个live2d装饰
|
146 |
-
ADD_WAIFU =
|
147 |
|
148 |
|
149 |
# 设置用户名和密码(不需要修改)(相关功能不稳定,与gradio版本和网络都相关,如果本地使用不建议加这个)
|
@@ -155,31 +128,22 @@ AUTHENTICATION = []
|
|
155 |
CUSTOM_PATH = "/"
|
156 |
|
157 |
|
158 |
-
# HTTPS 秘钥和证书(不需要修改)
|
159 |
-
SSL_KEYFILE = ""
|
160 |
-
SSL_CERTFILE = ""
|
161 |
-
|
162 |
-
|
163 |
# 极少数情况下,openai的官方KEY需要伴随组织编码(格式如org-xxxxxxxxxxxxxxxxxxxxxxxx)使用
|
164 |
API_ORG = ""
|
165 |
|
166 |
|
167 |
-
# 如果需要使用Slack Claude,使用教程详情见
|
168 |
-
SLACK_CLAUDE_BOT_ID = ''
|
169 |
SLACK_CLAUDE_USER_TOKEN = ''
|
170 |
|
171 |
|
172 |
-
# 如果需要使用AZURE
|
173 |
AZURE_ENDPOINT = "https://你亲手写的api名称.openai.azure.com/"
|
174 |
AZURE_API_KEY = "填入azure openai api的密钥" # 建议直接在API_KEY处填写,该选项即将被弃用
|
175 |
AZURE_ENGINE = "填入你亲手写的部署名" # 读 docs\use_azure.md
|
176 |
|
177 |
|
178 |
-
#
|
179 |
-
AZURE_CFG_ARRAY = {}
|
180 |
-
|
181 |
-
|
182 |
-
# 使用Newbing (不推荐使用,未来将删除)
|
183 |
NEWBING_STYLE = "creative" # ["creative", "balanced", "precise"]
|
184 |
NEWBING_COOKIES = """
|
185 |
put your new bing cookies here
|
@@ -200,79 +164,33 @@ XFYUN_API_SECRET = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
|
|
200 |
XFYUN_API_KEY = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
|
201 |
|
202 |
|
203 |
-
# 接入智谱大模型
|
204 |
-
ZHIPUAI_API_KEY = ""
|
205 |
-
ZHIPUAI_MODEL = "" # 此选项已废弃,不再需要填写
|
206 |
-
|
207 |
-
|
208 |
-
# # 火山引擎YUNQUE大模型
|
209 |
-
# YUNQUE_SECRET_KEY = ""
|
210 |
-
# YUNQUE_ACCESS_KEY = ""
|
211 |
-
# YUNQUE_MODEL = ""
|
212 |
-
|
213 |
-
|
214 |
# Claude API KEY
|
215 |
ANTHROPIC_API_KEY = ""
|
216 |
|
217 |
|
218 |
-
# Mathpix 拥有执行PDF的OCR功能,但是需要注册账号
|
219 |
-
MATHPIX_APPID = ""
|
220 |
-
MATHPIX_APPKEY = ""
|
221 |
-
|
222 |
-
|
223 |
# 自定义API KEY格式
|
224 |
CUSTOM_API_KEY_PATTERN = ""
|
225 |
|
226 |
|
227 |
-
# Google Gemini API-Key
|
228 |
-
GEMINI_API_KEY = ''
|
229 |
-
|
230 |
-
|
231 |
# HUGGINGFACE的TOKEN,下载LLAMA时起作用 https://huggingface.co/docs/hub/security-tokens
|
232 |
-
HUGGINGFACE_ACCESS_TOKEN = ""
|
233 |
|
234 |
|
235 |
# GROBID服务器地址(填写多个可以均衡负载),用于高质量地读取PDF文档
|
236 |
# 获取方法:复制以下空间https://huggingface.co/spaces/qingxu98/grobid,设为public,然后GROBID_URL = "https://(你的hf用户名如qingxu98)-(你的填写的空间名如grobid).hf.space"
|
237 |
GROBID_URLS = [
|
238 |
"https://qingxu98-grobid.hf.space","https://qingxu98-grobid2.hf.space","https://qingxu98-grobid3.hf.space",
|
239 |
-
"https://
|
240 |
-
"https://qingxu98-grobid7.hf.space", "https://qingxu98-grobid8.hf.space",
|
241 |
]
|
242 |
|
243 |
|
244 |
# 是否允许通过自然语言描述修改本页的配置,该功能具有一定的危险性,默认关闭
|
245 |
ALLOW_RESET_CONFIG = False
|
246 |
-
|
247 |
-
|
248 |
-
# 在使用AutoGen插件时,是否使用Docker容器运行代码
|
249 |
-
AUTOGEN_USE_DOCKER = False
|
250 |
-
|
251 |
-
|
252 |
# 临时的上传文件夹位置,请勿修改
|
253 |
PATH_PRIVATE_UPLOAD = "private_upload"
|
254 |
-
|
255 |
-
|
256 |
# 日志文件夹的位置,请勿修改
|
257 |
PATH_LOGGING = "gpt_log"
|
258 |
|
259 |
-
|
260 |
-
# 除了连接OpenAI之外,还有哪些场合允许使用代理,请勿修改
|
261 |
-
WHEN_TO_USE_PROXY = ["Download_LLM", "Download_Gradio_Theme", "Connect_Grobid",
|
262 |
-
"Warmup_Modules", "Nougat_Download", "AutoGen"]
|
263 |
-
|
264 |
-
|
265 |
-
# *实验性功能*: 自动检测并屏蔽失效的KEY,请勿使用
|
266 |
-
BLOCK_INVALID_APIKEY = False
|
267 |
-
|
268 |
-
|
269 |
-
# 启用插件热加载
|
270 |
-
PLUGIN_HOT_RELOAD = False
|
271 |
-
|
272 |
-
|
273 |
-
# 自定义按钮的最大数量限制
|
274 |
-
NUM_CUSTOM_BASIC_BTN = 4
|
275 |
-
|
276 |
"""
|
277 |
在线大模型配置关联关系示意图
|
278 |
│
|
@@ -282,16 +200,13 @@ NUM_CUSTOM_BASIC_BTN = 4
|
|
282 |
│ ├── API_ORG(不常用)
|
283 |
│ └── API_URL_REDIRECT(不常用)
|
284 |
│
|
285 |
-
├── "azure-gpt-3.5" 等azure
|
286 |
│ ├── API_KEY
|
287 |
│ ├── AZURE_ENDPOINT
|
288 |
│ ├── AZURE_API_KEY
|
289 |
│ ├── AZURE_ENGINE
|
290 |
│ └── API_URL_REDIRECT
|
291 |
│
|
292 |
-
├── "azure-gpt-3.5" 等azure模型(多个azure模型,需要动态切换,高优先级)
|
293 |
-
│ └── AZURE_CFG_ARRAY
|
294 |
-
│
|
295 |
├── "spark" 星火认知大模型 spark & sparkv2
|
296 |
│ ├── XFYUN_APPID
|
297 |
│ ├── XFYUN_API_SECRET
|
@@ -309,36 +224,11 @@ NUM_CUSTOM_BASIC_BTN = 4
|
|
309 |
│ ├── BAIDU_CLOUD_API_KEY
|
310 |
│ └── BAIDU_CLOUD_SECRET_KEY
|
311 |
│
|
312 |
-
├── "
|
313 |
-
│ └── ZHIPUAI_API_KEY
|
314 |
-
│
|
315 |
-
├── "qwen-turbo" 等通义千问大模型
|
316 |
-
│ └── DASHSCOPE_API_KEY
|
317 |
-
│
|
318 |
-
├── "Gemini"
|
319 |
-
│ └── GEMINI_API_KEY
|
320 |
-
│
|
321 |
-
└── "newbing" Newbing接口不再稳定,不推荐使用
|
322 |
├── NEWBING_STYLE
|
323 |
└── NEWBING_COOKIES
|
324 |
|
325 |
-
|
326 |
-
本地大模型示意图
|
327 |
-
│
|
328 |
-
├── "chatglm3"
|
329 |
-
├── "chatglm"
|
330 |
-
├── "chatglm_onnx"
|
331 |
-
├── "chatglmft"
|
332 |
-
├── "internlm"
|
333 |
-
├── "moss"
|
334 |
-
├── "jittorllms_pangualpha"
|
335 |
-
├── "jittorllms_llama"
|
336 |
-
├── "deepseekcoder"
|
337 |
-
├── "qwen-local"
|
338 |
-
├── RWKV的支持见Wiki
|
339 |
-
└── "llama2"
|
340 |
-
|
341 |
-
|
342 |
用户图形界面布局依赖关系示意图
|
343 |
│
|
344 |
├── CHATBOT_HEIGHT 对话窗的高度
|
@@ -349,7 +239,7 @@ NUM_CUSTOM_BASIC_BTN = 4
|
|
349 |
├── THEME 色彩主题
|
350 |
├── AUTO_CLEAR_TXT 是否在提交时自动清空输入框
|
351 |
├── ADD_WAIFU 加一个live2d装饰
|
352 |
-
|
353 |
|
354 |
|
355 |
插件在线服务配置依赖关系示意图
|
@@ -361,10 +251,7 @@ NUM_CUSTOM_BASIC_BTN = 4
|
|
361 |
│ ├── ALIYUN_ACCESSKEY
|
362 |
│ └── ALIYUN_SECRET
|
363 |
│
|
364 |
-
|
365 |
-
|
366 |
-
├── MATHPIX_APPID
|
367 |
-
└── MATHPIX_APPKEY
|
368 |
-
|
369 |
|
370 |
"""
|
|
|
2 |
以下所有配置也都支持利用环境变量覆写,环境变量配置格式见docker-compose.yml。
|
3 |
读取优先级:环境变量 > config_private.py > config.py
|
4 |
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
|
5 |
+
All the following configurations also support using environment variables to override,
|
6 |
+
and the environment variable configuration format can be seen in docker-compose.yml.
|
7 |
Configuration reading priority: environment variable > config_private.py > config.py
|
8 |
"""
|
9 |
|
|
|
19 |
USE_PROXY = False
|
20 |
if USE_PROXY:
|
21 |
"""
|
|
|
22 |
填写格式是 [协议]:// [地址] :[端口],填写之前不要忘记把USE_PROXY改成True,如果直接在海外服务器部署,此处不修改
|
23 |
<配置教程&视频教程> https://github.com/binary-husky/gpt_academic/issues/1>
|
24 |
[协议] 常见协议无非socks5h/http; 例如 v2**y 和 ss* 的默认本地协议是socks5h; 而cl**h 的默认本地协议是http
|
25 |
+
[地址] 懂的都懂,不懂就填localhost或者127.0.0.1肯定错不了(localhost意思是代理软件安装在本机上)
|
26 |
[端口] 在代理软件的设置里找。虽然不同的代理软件界面不一样,但端口号都应该在最显眼的位置上
|
27 |
"""
|
28 |
+
# 代理网络的地址,打开你的*学*网软件查看代理的协议(socks5h / http)、地址(localhost)和端口(11284)
|
29 |
proxies = {
|
30 |
# [协议]:// [地址] :[端口]
|
31 |
"http": "socks5h://localhost:11284", # 再例如 "http": "http://127.0.0.1:7890",
|
|
|
37 |
# ------------------------------------ 以下配置可以优化体验, 但大部分场合下并不需要修改 ------------------------------------
|
38 |
|
39 |
# 重新URL重新定向,实现更换API_URL的作用(高危设置! 常规情况下不要修改! 通过修改此设置,您将把您的API-KEY和对话隐私完全暴露给您设定的中间人!)
|
40 |
+
# 格式: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "在这里填写重定向的api.openai.com的URL"}
|
41 |
# 举例: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "https://reverse-proxy-url/v1/chat/completions"}
|
42 |
API_URL_REDIRECT = {}
|
43 |
|
|
|
50 |
# 色彩主题, 可选 ["Default", "Chuanhu-Small-and-Beautiful", "High-Contrast"]
|
51 |
# 更多主题, 请查阅Gradio主题商店: https://huggingface.co/spaces/gradio/theme-gallery 可选 ["Gstaff/Xkcd", "NoCrypt/Miku", ...]
|
52 |
THEME = "Chuanhu-Small-and-Beautiful"
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
|
55 |
# 对话窗的高度 (仅在LAYOUT="TOP-DOWN"时生效)
|
|
|
62 |
|
63 |
# 窗口布局
|
64 |
LAYOUT = "LEFT-RIGHT" # "LEFT-RIGHT"(左右布局) # "TOP-DOWN"(上下布局)
|
65 |
+
DARK_MODE = True # 暗色模式 / 亮色模式
|
|
|
|
|
|
|
66 |
|
67 |
|
68 |
# 发送请求到OpenAI后,等待多久判定为超时
|
|
|
81 |
AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "api2d-gpt-3.5-turbo", "spark", "azure-gpt-3.5"]
|
82 |
|
83 |
# 插件分类默认选项
|
84 |
+
DEFAULT_FN_GROUPS = ['对话', '编程', '学术']
|
85 |
|
86 |
|
87 |
# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 )
|
88 |
+
LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
|
89 |
+
AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "api2d-gpt-3.5-turbo",
|
90 |
+
"gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "stack-claude"]
|
91 |
+
# P.S. 其他可用的模型还包括 ["qianfan", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613",
|
92 |
+
# "spark", "sparkv2", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
|
95 |
# 百度千帆(LLM_MODEL="qianfan")
|
96 |
BAIDU_CLOUD_API_KEY = ''
|
97 |
BAIDU_CLOUD_SECRET_KEY = ''
|
98 |
+
BAIDU_CLOUD_QIANFAN_MODEL = 'ERNIE-Bot' # 可选 "ERNIE-Bot"(文心一言), "ERNIE-Bot-turbo", "BLOOMZ-7B", "Llama-2-70B-Chat", "Llama-2-13B-Chat", "Llama-2-7B-Chat"
|
99 |
|
100 |
|
101 |
# 如果使用ChatGLM2微调模型,请把 LLM_MODEL="chatglmft",并在此处指定模型路径
|
|
|
106 |
LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
|
107 |
LOCAL_MODEL_QUANT = "FP16" # 默认 "FP16" "INT4" 启用量化INT4版本 "INT8" 启用量化INT8版本
|
108 |
|
109 |
+
|
110 |
# 设置gradio的并行线程数(不需要修改)
|
111 |
CONCURRENT_COUNT = 100
|
112 |
|
|
|
116 |
|
117 |
|
118 |
# 加一个live2d装饰
|
119 |
+
ADD_WAIFU = False
|
120 |
|
121 |
|
122 |
# 设置用户名和密码(不需要修改)(相关功能不稳定,与gradio版本和网络都相关,如果本地使用不建议加这个)
|
|
|
128 |
CUSTOM_PATH = "/"
|
129 |
|
130 |
|
|
|
|
|
|
|
|
|
|
|
131 |
# 极少数情况下,openai的官方KEY需要伴随组织编码(格式如org-xxxxxxxxxxxxxxxxxxxxxxxx)使用
|
132 |
API_ORG = ""
|
133 |
|
134 |
|
135 |
+
# 如果需要使用Slack Claude,使用教程详情见 request_llm/README.md
|
136 |
+
SLACK_CLAUDE_BOT_ID = ''
|
137 |
SLACK_CLAUDE_USER_TOKEN = ''
|
138 |
|
139 |
|
140 |
+
# 如果需要使用AZURE 详情请见额外文档 docs\use_azure.md
|
141 |
AZURE_ENDPOINT = "https://你亲手写的api名称.openai.azure.com/"
|
142 |
AZURE_API_KEY = "填入azure openai api的密钥" # 建议直接在API_KEY处填写,该选项即将被弃用
|
143 |
AZURE_ENGINE = "填入你亲手写的部署名" # 读 docs\use_azure.md
|
144 |
|
145 |
|
146 |
+
# 使用Newbing
|
|
|
|
|
|
|
|
|
147 |
NEWBING_STYLE = "creative" # ["creative", "balanced", "precise"]
|
148 |
NEWBING_COOKIES = """
|
149 |
put your new bing cookies here
|
|
|
164 |
XFYUN_API_KEY = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
|
165 |
|
166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
# Claude API KEY
|
168 |
ANTHROPIC_API_KEY = ""
|
169 |
|
170 |
|
|
|
|
|
|
|
|
|
|
|
171 |
# 自定义API KEY格式
|
172 |
CUSTOM_API_KEY_PATTERN = ""
|
173 |
|
174 |
|
|
|
|
|
|
|
|
|
175 |
# HUGGINGFACE的TOKEN,下载LLAMA时起作用 https://huggingface.co/docs/hub/security-tokens
|
176 |
+
HUGGINGFACE_ACCESS_TOKEN = "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"
|
177 |
|
178 |
|
179 |
# GROBID服务器地址(填写多个可以均衡负载),用于高质量地读取PDF文档
|
180 |
# 获取方法:复制以下空间https://huggingface.co/spaces/qingxu98/grobid,设为public,然后GROBID_URL = "https://(你的hf用户名如qingxu98)-(你的填写的空间名如grobid).hf.space"
|
181 |
GROBID_URLS = [
|
182 |
"https://qingxu98-grobid.hf.space","https://qingxu98-grobid2.hf.space","https://qingxu98-grobid3.hf.space",
|
183 |
+
"https://shaocongma-grobid.hf.space","https://FBR123-grobid.hf.space", "https://yeku-grobid.hf.space",
|
|
|
184 |
]
|
185 |
|
186 |
|
187 |
# 是否允许通过自然语言描述修改本页的配置,该功能具有一定的危险性,默认关闭
|
188 |
ALLOW_RESET_CONFIG = False
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
# 临时的上传文件夹位置,请勿修改
|
190 |
PATH_PRIVATE_UPLOAD = "private_upload"
|
|
|
|
|
191 |
# 日志文件夹的位置,请勿修改
|
192 |
PATH_LOGGING = "gpt_log"
|
193 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
"""
|
195 |
在线大模型配置关联关系示意图
|
196 |
│
|
|
|
200 |
│ ├── API_ORG(不常用)
|
201 |
│ └── API_URL_REDIRECT(不常用)
|
202 |
│
|
203 |
+
├── "azure-gpt-3.5" 等azure模型
|
204 |
│ ├── API_KEY
|
205 |
│ ├── AZURE_ENDPOINT
|
206 |
│ ├── AZURE_API_KEY
|
207 |
│ ├── AZURE_ENGINE
|
208 |
│ └── API_URL_REDIRECT
|
209 |
│
|
|
|
|
|
|
|
210 |
├── "spark" 星火认知大模型 spark & sparkv2
|
211 |
│ ├── XFYUN_APPID
|
212 |
│ ├── XFYUN_API_SECRET
|
|
|
224 |
│ ├── BAIDU_CLOUD_API_KEY
|
225 |
│ └── BAIDU_CLOUD_SECRET_KEY
|
226 |
│
|
227 |
+
├── "newbing" Newbing接口不再稳定,不推荐使用
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
├── NEWBING_STYLE
|
229 |
└── NEWBING_COOKIES
|
230 |
|
231 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
用户图形界面布局依赖关系示意图
|
233 |
│
|
234 |
├── CHATBOT_HEIGHT 对话窗的高度
|
|
|
239 |
├── THEME 色彩主题
|
240 |
├── AUTO_CLEAR_TXT 是否在提交时自动清空输入框
|
241 |
├── ADD_WAIFU 加一个live2d装饰
|
242 |
+
├── ALLOW_RESET_CONFIG 是否允许通过自然语言描述修改本页的配置,该功能具有一定的危险性
|
243 |
|
244 |
|
245 |
插件在线服务配置依赖关系示意图
|
|
|
251 |
│ ├── ALIYUN_ACCESSKEY
|
252 |
│ └── ALIYUN_SECRET
|
253 |
│
|
254 |
+
├── PDF文档精准解析
|
255 |
+
│ └── GROBID_URLS
|
|
|
|
|
|
|
256 |
|
257 |
"""
|
core_functional.py
CHANGED
@@ -3,143 +3,83 @@
|
|
3 |
# 'stop' 颜色对应 theme.py 中的 color_er
|
4 |
import importlib
|
5 |
from toolbox import clear_line_break
|
6 |
-
|
7 |
-
from toolbox import build_gpt_academic_masked_string_langbased
|
8 |
-
from textwrap import dedent
|
9 |
|
10 |
def get_core_functions():
|
11 |
return {
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
r"Below is a paragraph from an academic paper. Polish the writing to meet the academic style, "
|
19 |
-
r"improve the spelling, grammar, clarity, concision and overall readability. When necessary, rewrite the whole sentence. "
|
20 |
-
r"Firstly, you should provide the polished paragraph. "
|
21 |
-
r"Secondly, you should list all your modification and explain the reasons to do so in markdown table.",
|
22 |
-
text_show_chinese=
|
23 |
-
r"作为一名中文学术论文写作改进助理,你的任务是改进所提供文本的拼写、语法、清晰、简洁和整体可读性,"
|
24 |
-
r"同时分解长句,减少重复,并提供改进建议。请先提供文本的更正版本,然后在markdown表格中列出修改的内容,并给出修改的理由:"
|
25 |
-
) + "\n\n",
|
26 |
-
# [2*] 后缀字符串,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来
|
27 |
"Suffix": r"",
|
28 |
-
#
|
29 |
"Color": r"secondary",
|
30 |
-
#
|
31 |
"Visible": True,
|
32 |
-
#
|
33 |
-
"AutoClearHistory": False
|
34 |
-
# [6] 文本预处理 (可选参数,默认 None,举例:写个函数移除所有的换行符)
|
35 |
-
"PreProcess": None,
|
36 |
},
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
"Prefix": r"",
|
42 |
-
# 后缀,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来
|
43 |
-
"Suffix":
|
44 |
-
# dedent() 函数用于去除多行字符串的缩进
|
45 |
-
dedent("\n"+r'''
|
46 |
-
==============================
|
47 |
-
|
48 |
-
使用mermaid flowchart对以上文本进行总结,概括上述段落的内容以及内在逻辑关系,例如:
|
49 |
-
|
50 |
-
以下是对以上文本的总结,以mermaid flowchart的形式展示:
|
51 |
-
```mermaid
|
52 |
-
flowchart LR
|
53 |
-
A["节点名1"] --> B("节点名2")
|
54 |
-
B --> C{"节点名3"}
|
55 |
-
C --> D["节点名4"]
|
56 |
-
C --> |"箭头名1"| E["节点名5"]
|
57 |
-
C --> |"箭头名2"| F["节点名6"]
|
58 |
-
```
|
59 |
-
|
60 |
-
警告:
|
61 |
-
(1)使用中文
|
62 |
-
(2)节点名字使用引号包裹,如["Laptop"]
|
63 |
-
(3)`|` 和 `"`之间不要存在空格
|
64 |
-
(4)根据情况选择flowchart LR(从左到右)或者flowchart TD(从上到下)
|
65 |
-
'''),
|
66 |
},
|
67 |
-
|
68 |
-
|
69 |
"查找语法错误": {
|
70 |
-
"Prefix": r"
|
71 |
-
r"Do not try to polish the text, if no mistake is found, tell me that this paragraph is good.
|
72 |
-
r"If you find grammar or spelling mistakes, please list mistakes you find in a two-column markdown table, "
|
73 |
-
r"put the original text the first column, "
|
74 |
-
r"put the corrected text in the second column and highlight the key words you fixed.
|
75 |
-
r"Finally, please provide the proofreaded text.""\n\n"
|
76 |
r"Example:""\n"
|
77 |
r"Paragraph: How is you? Do you knows what is it?""\n"
|
78 |
r"| Original sentence | Corrected sentence |""\n"
|
79 |
r"| :--- | :--- |""\n"
|
80 |
r"| How **is** you? | How **are** you? |""\n"
|
81 |
-
r"| Do you **knows** what **is** **it**? | Do you **know** what **it** **is** ? |""\n
|
82 |
r"Below is a paragraph from an academic paper. "
|
83 |
r"You need to report all grammar and spelling mistakes as the example before."
|
84 |
+ "\n\n",
|
85 |
"Suffix": r"",
|
86 |
"PreProcess": clear_line_break, # 预处理:清除换行符
|
87 |
},
|
88 |
-
|
89 |
-
|
90 |
"中译英": {
|
91 |
"Prefix": r"Please translate following sentence to English:" + "\n\n",
|
92 |
"Suffix": r"",
|
93 |
},
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
r"and experience about effective writing techniques to reply. "
|
106 |
-
r"I'll give you my paragraphs as follows, tell me what language it is written in, and then translate:",
|
107 |
-
text_show_english=
|
108 |
-
r"你是经验丰富的翻译,请把以下学术文章段落翻译成中文,"
|
109 |
-
r"并同时充分考虑中文的语法、清晰、简洁和整体可读性,"
|
110 |
-
r"必要时,你可以修改整个句子的顺序以确保翻译后的段落符合中文的语言习惯。"
|
111 |
-
r"你需要翻译的文本如下:"
|
112 |
-
) + "\n\n",
|
113 |
-
"Suffix": r"",
|
114 |
},
|
115 |
-
|
116 |
-
|
117 |
"英译中": {
|
118 |
"Prefix": r"翻译成地道的中文:" + "\n\n",
|
119 |
"Suffix": r"",
|
120 |
-
"Visible":
|
121 |
},
|
122 |
-
|
123 |
-
|
124 |
"找图片": {
|
125 |
-
"Prefix": r"我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL,"
|
126 |
r"然后请使用Markdown格式封装,并且不要有反斜线,不要用代码块。现在,请按以下描述给我发送图片:" + "\n\n",
|
127 |
"Suffix": r"",
|
128 |
-
"Visible":
|
129 |
},
|
130 |
-
|
131 |
-
|
132 |
"解释代码": {
|
133 |
"Prefix": r"请解释以下代码:" + "\n```\n",
|
134 |
"Suffix": "\n```\n",
|
135 |
},
|
136 |
-
|
137 |
-
|
138 |
"参考文献转Bib": {
|
139 |
-
"Prefix": r"Here are some bibliography items, please transform them into bibtex style."
|
140 |
-
r"Note that, reference styles maybe more than one kind, you should transform each item correctly."
|
141 |
-
r"Items need to be transformed:"
|
142 |
-
"Visible":
|
143 |
"Suffix": r"",
|
144 |
}
|
145 |
}
|
@@ -149,25 +89,8 @@ def handle_core_functionality(additional_fn, inputs, history, chatbot):
|
|
149 |
import core_functional
|
150 |
importlib.reload(core_functional) # 热更新prompt
|
151 |
core_functional = core_functional.get_core_functions()
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
else:
|
158 |
-
# 预制功能
|
159 |
-
if "PreProcess" in core_functional[additional_fn]:
|
160 |
-
if core_functional[additional_fn]["PreProcess"] is not None:
|
161 |
-
inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
|
162 |
-
# 为字符串加上上面定义的前缀和后缀。
|
163 |
-
inputs = apply_gpt_academic_string_mask_langbased(
|
164 |
-
string = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"],
|
165 |
-
lang_reference = inputs,
|
166 |
-
)
|
167 |
-
if core_functional[additional_fn].get("AutoClearHistory", False):
|
168 |
-
history = []
|
169 |
-
return inputs, history
|
170 |
-
|
171 |
-
if __name__ == "__main__":
|
172 |
-
t = get_core_functions()["总结绘制脑图"]
|
173 |
-
print(t["Prefix"] + t["Suffix"])
|
|
|
3 |
# 'stop' 颜色对应 theme.py 中的 color_er
|
4 |
import importlib
|
5 |
from toolbox import clear_line_break
|
6 |
+
|
|
|
|
|
7 |
|
8 |
def get_core_functions():
|
9 |
return {
|
10 |
+
"英语学术润色": {
|
11 |
+
# 前缀,会被加在你的输入之前。例如,用来描述你的要求,例如翻译、解释代码、润色等等
|
12 |
+
"Prefix": r"Below is a paragraph from an academic paper. Polish the writing to meet the academic style, " +
|
13 |
+
r"improve the spelling, grammar, clarity, concision and overall readability. When necessary, rewrite the whole sentence. " +
|
14 |
+
r"Furthermore, list all modification and explain the reasons to do so in markdown table." + "\n\n",
|
15 |
+
# 后缀,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
"Suffix": r"",
|
17 |
+
# 按钮颜色 (默认 secondary)
|
18 |
"Color": r"secondary",
|
19 |
+
# 按钮是否可见 (默认 True,即可见)
|
20 |
"Visible": True,
|
21 |
+
# 是否在触发时清除历史 (默认 False,即不处理之前的对话历史)
|
22 |
+
"AutoClearHistory": False
|
|
|
|
|
23 |
},
|
24 |
+
"中文学术润色": {
|
25 |
+
"Prefix": r"作为一名中文学术论文写作改进助理,你的任务是改进所提供文本的拼写、语法、清晰、简洁和整体可读性," +
|
26 |
+
r"同时分解长句,减少重复,并提供改进建议。请只提供文本的更正版本,避免包括解释。请编辑以下文本" + "\n\n",
|
27 |
+
"Suffix": r"",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
},
|
|
|
|
|
29 |
"查找语法错误": {
|
30 |
+
"Prefix": r"Can you help me ensure that the grammar and the spelling is correct? " +
|
31 |
+
r"Do not try to polish the text, if no mistake is found, tell me that this paragraph is good." +
|
32 |
+
r"If you find grammar or spelling mistakes, please list mistakes you find in a two-column markdown table, " +
|
33 |
+
r"put the original text the first column, " +
|
34 |
+
r"put the corrected text in the second column and highlight the key words you fixed.""\n"
|
|
|
35 |
r"Example:""\n"
|
36 |
r"Paragraph: How is you? Do you knows what is it?""\n"
|
37 |
r"| Original sentence | Corrected sentence |""\n"
|
38 |
r"| :--- | :--- |""\n"
|
39 |
r"| How **is** you? | How **are** you? |""\n"
|
40 |
+
r"| Do you **knows** what **is** **it**? | Do you **know** what **it** **is** ? |""\n"
|
41 |
r"Below is a paragraph from an academic paper. "
|
42 |
r"You need to report all grammar and spelling mistakes as the example before."
|
43 |
+ "\n\n",
|
44 |
"Suffix": r"",
|
45 |
"PreProcess": clear_line_break, # 预处理:清除换行符
|
46 |
},
|
|
|
|
|
47 |
"中译英": {
|
48 |
"Prefix": r"Please translate following sentence to English:" + "\n\n",
|
49 |
"Suffix": r"",
|
50 |
},
|
51 |
+
"学术中英互译": {
|
52 |
+
"Prefix": r"I want you to act as a scientific English-Chinese translator, " +
|
53 |
+
r"I will provide you with some paragraphs in one language " +
|
54 |
+
r"and your task is to accurately and academically translate the paragraphs only into the other language. " +
|
55 |
+
r"Do not repeat the original provided paragraphs after translation. " +
|
56 |
+
r"You should use artificial intelligence tools, " +
|
57 |
+
r"such as natural language processing, and rhetorical knowledge " +
|
58 |
+
r"and experience about effective writing techniques to reply. " +
|
59 |
+
r"I'll give you my paragraphs as follows, tell me what language it is written in, and then translate:" + "\n\n",
|
60 |
+
"Suffix": "",
|
61 |
+
"Color": "secondary",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
},
|
|
|
|
|
63 |
"英译中": {
|
64 |
"Prefix": r"翻译成地道的中文:" + "\n\n",
|
65 |
"Suffix": r"",
|
66 |
+
"Visible": False,
|
67 |
},
|
|
|
|
|
68 |
"找图片": {
|
69 |
+
"Prefix": r"我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL," +
|
70 |
r"然后请使用Markdown格式封装,并且不要有反斜线,不要用代码块。现在,请按以下描述给我发送图片:" + "\n\n",
|
71 |
"Suffix": r"",
|
72 |
+
"Visible": False,
|
73 |
},
|
|
|
|
|
74 |
"解释代码": {
|
75 |
"Prefix": r"请解释以下代码:" + "\n```\n",
|
76 |
"Suffix": "\n```\n",
|
77 |
},
|
|
|
|
|
78 |
"参考文献转Bib": {
|
79 |
+
"Prefix": r"Here are some bibliography items, please transform them into bibtex style." +
|
80 |
+
r"Note that, reference styles maybe more than one kind, you should transform each item correctly." +
|
81 |
+
r"Items need to be transformed:",
|
82 |
+
"Visible": False,
|
83 |
"Suffix": r"",
|
84 |
}
|
85 |
}
|
|
|
89 |
import core_functional
|
90 |
importlib.reload(core_functional) # 热更新prompt
|
91 |
core_functional = core_functional.get_core_functions()
|
92 |
+
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
|
93 |
+
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
|
94 |
+
if core_functional[additional_fn].get("AutoClearHistory", False):
|
95 |
+
history = []
|
96 |
+
return inputs, history
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functional.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
from toolbox import HotReload # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效
|
2 |
-
from toolbox import trimmed_format_exc
|
3 |
|
4 |
|
5 |
def get_crazy_functions():
|
@@ -7,7 +6,6 @@ def get_crazy_functions():
|
|
7 |
from crazy_functions.生成函数注释 import 批量生成函数注释
|
8 |
from crazy_functions.解析项目源代码 import 解析项目本身
|
9 |
from crazy_functions.解析项目源代码 import 解析一个Python项目
|
10 |
-
from crazy_functions.解析项目源代码 import 解析一个Matlab项目
|
11 |
from crazy_functions.解析项目源代码 import 解析一个C项目的头文件
|
12 |
from crazy_functions.解析项目源代码 import 解析一个C项目
|
13 |
from crazy_functions.解析项目源代码 import 解析一个Golang项目
|
@@ -32,122 +30,108 @@ def get_crazy_functions():
|
|
32 |
from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入
|
33 |
from crazy_functions.Latex全文润色 import Latex中文润色
|
34 |
from crazy_functions.Latex全文润色 import Latex英文纠错
|
|
|
|
|
35 |
from crazy_functions.批量Markdown翻译 import Markdown中译英
|
36 |
from crazy_functions.虚空终端 import 虚空终端
|
37 |
-
|
38 |
|
39 |
function_plugins = {
|
40 |
"虚空终端": {
|
41 |
-
"Group": "
|
42 |
"Color": "stop",
|
43 |
"AsButton": True,
|
44 |
-
"Function": HotReload(虚空终端)
|
45 |
},
|
46 |
"解析整个Python项目": {
|
47 |
"Group": "编程",
|
48 |
"Color": "stop",
|
49 |
"AsButton": True,
|
50 |
"Info": "解析一个Python项目的所有源文件(.py) | 输入参数为路径",
|
51 |
-
"Function": HotReload(解析一个Python项目)
|
52 |
},
|
53 |
"载入对话历史存档(先上传存档或输入路径)": {
|
54 |
"Group": "对话",
|
55 |
"Color": "stop",
|
56 |
"AsButton": False,
|
57 |
"Info": "载入对话历史存档 | 输入参数为路径",
|
58 |
-
"Function": HotReload(载入对话历史存档)
|
59 |
},
|
60 |
"删除所有本地对话历史记录(谨慎操作)": {
|
61 |
"Group": "对话",
|
62 |
"AsButton": False,
|
63 |
"Info": "删除所有本地对话历史记录,谨慎操作 | 不需要输入参数",
|
64 |
-
"Function": HotReload(删除所有本地对话历史记录)
|
65 |
},
|
66 |
"清除所有缓存文件(谨慎操作)": {
|
67 |
"Group": "对话",
|
68 |
"Color": "stop",
|
69 |
"AsButton": False, # 加入下拉菜单中
|
70 |
"Info": "清除所有缓存文件,谨慎操作 | 不需要输入参数",
|
71 |
-
"Function": HotReload(清除缓存)
|
72 |
-
},
|
73 |
-
"生成多种Mermaid图表(从当前对话或路径(.pdf/.md/.docx)中生产图表)": {
|
74 |
-
"Group": "对话",
|
75 |
-
"Color": "stop",
|
76 |
-
"AsButton": False,
|
77 |
-
"Info" : "基于当前对话或文件生成多种Mermaid图表,图表类型由模型判断",
|
78 |
-
"Function": HotReload(生成多种Mermaid图表),
|
79 |
-
"AdvancedArgs": True,
|
80 |
-
"ArgsReminder": "请输入图类型对应的数字,不输入则为模型自行判断:1-流程图,2-序列图,3-类图,4-饼图,5-甘特图,6-状态图,7-实体关系图,8-象限提示图,9-思维导图",
|
81 |
},
|
82 |
"批量总结Word文档": {
|
83 |
"Group": "学术",
|
84 |
"Color": "stop",
|
85 |
"AsButton": True,
|
86 |
"Info": "批量总结word文档 | 输入参数为路径",
|
87 |
-
"Function": HotReload(总结word文档)
|
88 |
-
},
|
89 |
-
"解析整个Matlab项目": {
|
90 |
-
"Group": "编程",
|
91 |
-
"Color": "stop",
|
92 |
-
"AsButton": False,
|
93 |
-
"Info": "解析一个Matlab项目的所有源文件(.m) | 输入参数为路径",
|
94 |
-
"Function": HotReload(解析一个Matlab项目),
|
95 |
},
|
96 |
"解析整个C++项目头文件": {
|
97 |
"Group": "编程",
|
98 |
"Color": "stop",
|
99 |
"AsButton": False, # 加入下拉菜单中
|
100 |
"Info": "解析一个C++项目的所有头文件(.h/.hpp) | 输入参数为路径",
|
101 |
-
"Function": HotReload(解析一个C项目的头文件)
|
102 |
},
|
103 |
"解析整个C++项目(.cpp/.hpp/.c/.h)": {
|
104 |
"Group": "编程",
|
105 |
"Color": "stop",
|
106 |
"AsButton": False, # 加入下拉菜单中
|
107 |
"Info": "解析一个C++项目的所有源文件(.cpp/.hpp/.c/.h)| 输入参数为路径",
|
108 |
-
"Function": HotReload(解析一个C项目)
|
109 |
},
|
110 |
"解析整个Go项目": {
|
111 |
"Group": "编程",
|
112 |
"Color": "stop",
|
113 |
"AsButton": False, # 加入下拉菜单中
|
114 |
"Info": "解析一个Go项目的所有源文件 | 输入参数为路径",
|
115 |
-
"Function": HotReload(解析一个Golang项目)
|
116 |
},
|
117 |
"解析整个Rust项目": {
|
118 |
"Group": "编程",
|
119 |
"Color": "stop",
|
120 |
"AsButton": False, # 加入下拉菜单中
|
121 |
"Info": "解析一个Rust项目的所有源文件 | 输入参数为路径",
|
122 |
-
"Function": HotReload(解析一个Rust项目)
|
123 |
},
|
124 |
"解析整个Java项目": {
|
125 |
"Group": "编程",
|
126 |
"Color": "stop",
|
127 |
"AsButton": False, # 加入下拉菜单中
|
128 |
"Info": "解析一个Java项目的所有源文件 | 输入参数为路径",
|
129 |
-
"Function": HotReload(解析一个Java项目)
|
130 |
},
|
131 |
"解析整个前端项目(js,ts,css等)": {
|
132 |
"Group": "编程",
|
133 |
"Color": "stop",
|
134 |
"AsButton": False, # 加入下拉菜单中
|
135 |
"Info": "解析一个前端项目的所有源文件(js,ts,css等) | 输入参数为路径",
|
136 |
-
"Function": HotReload(解析一个前端项目)
|
137 |
},
|
138 |
"解析整个Lua项目": {
|
139 |
"Group": "编程",
|
140 |
"Color": "stop",
|
141 |
"AsButton": False, # 加入下拉菜单中
|
142 |
"Info": "解析一个Lua项目的所有源文件 | 输入参数为路径",
|
143 |
-
"Function": HotReload(解析一个Lua项目)
|
144 |
},
|
145 |
"解析整个CSharp项目": {
|
146 |
"Group": "编程",
|
147 |
"Color": "stop",
|
148 |
"AsButton": False, # 加入下拉菜单中
|
149 |
"Info": "解析一个CSharp项目的所有源文件 | 输入参数为路径",
|
150 |
-
"Function": HotReload(解析一个CSharp项目)
|
151 |
},
|
152 |
"解析Jupyter Notebook文件": {
|
153 |
"Group": "编程",
|
@@ -163,530 +147,384 @@ def get_crazy_functions():
|
|
163 |
"Color": "stop",
|
164 |
"AsButton": False,
|
165 |
"Info": "读取Tex论文并写摘要 | 输入参数为路径",
|
166 |
-
"Function": HotReload(读文章写摘要)
|
167 |
},
|
168 |
"翻译README或MD": {
|
169 |
"Group": "编程",
|
170 |
"Color": "stop",
|
171 |
"AsButton": True,
|
172 |
"Info": "将Markdown翻译为中文 | 输入参数为路径或URL",
|
173 |
-
"Function": HotReload(Markdown英译中)
|
174 |
},
|
175 |
"翻译Markdown或README(支持Github链接)": {
|
176 |
"Group": "编程",
|
177 |
"Color": "stop",
|
178 |
"AsButton": False,
|
179 |
"Info": "将Markdown或README翻译为中文 | 输入参数为路径或URL",
|
180 |
-
"Function": HotReload(Markdown英译中)
|
181 |
},
|
182 |
"批量生成函数注释": {
|
183 |
"Group": "编程",
|
184 |
"Color": "stop",
|
185 |
"AsButton": False, # 加入下拉菜单中
|
186 |
"Info": "批量生成函数的注释 | 输入参数为路径",
|
187 |
-
"Function": HotReload(批量生成函数注释)
|
188 |
},
|
189 |
"保存当前的对话": {
|
190 |
"Group": "对话",
|
191 |
"AsButton": True,
|
192 |
"Info": "保存当前的对话 | 不需要输入参数",
|
193 |
-
"Function": HotReload(对话历史存档)
|
194 |
},
|
195 |
"[多线程Demo]解析此项目本身(源码自译解)": {
|
196 |
"Group": "对话|编程",
|
197 |
"AsButton": False, # 加入下拉菜单中
|
198 |
"Info": "多线程解析并翻译此项目的源码 | 不需要输入参数",
|
199 |
-
"Function": HotReload(解析项目本身)
|
200 |
},
|
201 |
-
"历史上的今天": {
|
202 |
"Group": "对话",
|
203 |
"AsButton": True,
|
204 |
-
"Info": "查看历史上的今天事件
|
205 |
-
"Function": HotReload(
|
206 |
},
|
207 |
"精准翻译PDF论文": {
|
208 |
"Group": "学术",
|
209 |
"Color": "stop",
|
210 |
-
"AsButton": True,
|
211 |
"Info": "精准翻译PDF论文为中文 | 输入参数为路径",
|
212 |
-
"Function": HotReload(批量翻译PDF文档)
|
213 |
},
|
214 |
"询问多个GPT模型": {
|
215 |
"Group": "对话",
|
216 |
"Color": "stop",
|
217 |
"AsButton": True,
|
218 |
-
"Function": HotReload(同时问询)
|
219 |
},
|
220 |
"批量总结PDF文档": {
|
221 |
"Group": "学术",
|
222 |
"Color": "stop",
|
223 |
"AsButton": False, # 加入下拉菜单中
|
224 |
"Info": "批量总结PDF文档的内容 | 输入参数为路径",
|
225 |
-
"Function": HotReload(批量总结PDF文档)
|
226 |
},
|
227 |
"谷歌学术检索助手(输入谷歌学术搜索页url)": {
|
228 |
"Group": "学术",
|
229 |
"Color": "stop",
|
230 |
"AsButton": False, # 加入下拉菜单中
|
231 |
"Info": "使用谷歌学术检索助手搜索指定URL的结果 | 输入参数为谷歌学术搜索页的URL",
|
232 |
-
"Function": HotReload(谷歌检索小助手)
|
233 |
},
|
234 |
"理解PDF文档内容 (模仿ChatPDF)": {
|
235 |
"Group": "学术",
|
236 |
"Color": "stop",
|
237 |
"AsButton": False, # 加入下拉菜单中
|
238 |
"Info": "理解PDF文档的内容并进行回答 | 输入参数为路径",
|
239 |
-
"Function": HotReload(理解PDF文档内容标准文件输入)
|
240 |
},
|
241 |
"英文Latex项目全文润色(输入路径或上传压缩包)": {
|
242 |
"Group": "学术",
|
243 |
"Color": "stop",
|
244 |
"AsButton": False, # 加入下拉菜单中
|
245 |
"Info": "对英文Latex项目全文进行润色处理 | 输入参数为路径或上传压缩包",
|
246 |
-
"Function": HotReload(Latex英文润色)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
},
|
248 |
-
|
249 |
"中文Latex项目全文润色(输入路径或上传压缩包)": {
|
250 |
"Group": "学术",
|
251 |
"Color": "stop",
|
252 |
"AsButton": False, # 加入下拉菜单中
|
253 |
"Info": "对中文Latex项目全文进行润色处理 | 输入参数为路径或上传压缩包",
|
254 |
-
"Function": HotReload(Latex中文润色)
|
255 |
-
},
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
# "Function": HotReload(Latex中译英)
|
271 |
-
# },
|
272 |
-
# 已经被新插件取代
|
273 |
-
# "Latex项目全文英译中(输入路径或上传压缩包)": {
|
274 |
-
# "Group": "学术",
|
275 |
-
# "Color": "stop",
|
276 |
-
# "AsButton": False, # 加入下拉菜单中
|
277 |
-
# "Info": "对Latex项目全文进行英译中处理 | 输入参数为路径或上传压缩包",
|
278 |
-
# "Function": HotReload(Latex英译中)
|
279 |
-
# },
|
280 |
"批量Markdown中译英(输入路径或上传压缩包)": {
|
281 |
"Group": "编程",
|
282 |
"Color": "stop",
|
283 |
"AsButton": False, # 加入下拉菜单中
|
284 |
"Info": "批量将Markdown文件中文翻译为英文 | 输入参数为路径或上传压缩包",
|
285 |
-
"Function": HotReload(Markdown中译英)
|
286 |
},
|
287 |
}
|
288 |
|
289 |
# -=--=- 尚未充分测试的实验性插件 & 需要额外依赖的插件 -=--=-
|
290 |
try:
|
291 |
from crazy_functions.下载arxiv论文翻译摘要 import 下载arxiv论文并翻译摘要
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
"
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
# "Info": "下载arxiv论文并翻译摘要 | 输入参数为arxiv编号如1812.10695",
|
300 |
-
"Function": HotReload(下载arxiv论文并翻译摘要),
|
301 |
-
}
|
302 |
}
|
303 |
-
)
|
304 |
except:
|
305 |
-
print(
|
306 |
-
print("Load function plugin failed")
|
307 |
|
308 |
try:
|
309 |
from crazy_functions.联网的ChatGPT import 连接网络回答问题
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
"
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
# "Info": "连接网络回答问题(需要访问谷歌)| 输入参数是一个问题",
|
318 |
-
"Function": HotReload(连接网络回答问题),
|
319 |
-
}
|
320 |
}
|
321 |
-
)
|
322 |
from crazy_functions.联网的ChatGPT_bing版 import 连接bing搜索回答问题
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
"
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
"Info": "连接网络回答问题(需要访问中文Bing)| 输入参数是一个问题",
|
331 |
-
"Function": HotReload(连接bing搜索回答问题),
|
332 |
-
}
|
333 |
}
|
334 |
-
)
|
335 |
except:
|
336 |
-
print(
|
337 |
-
print("Load function plugin failed")
|
338 |
|
339 |
try:
|
340 |
from crazy_functions.解析项目源代码 import 解析任意code项目
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
"
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
},
|
352 |
-
}
|
353 |
-
)
|
354 |
except:
|
355 |
-
print(
|
356 |
-
print("Load function plugin failed")
|
357 |
|
358 |
try:
|
359 |
from crazy_functions.询问多个大语言模型 import 同时问询_指定模型
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
"
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
},
|
371 |
-
}
|
372 |
-
)
|
373 |
except:
|
374 |
-
print(
|
375 |
-
print("Load function plugin failed")
|
376 |
|
377 |
try:
|
378 |
-
from crazy_functions.图片生成 import 图片生成
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
"
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
},
|
391 |
-
}
|
392 |
-
)
|
393 |
-
function_plugins.update(
|
394 |
-
{
|
395 |
-
"图片生成_DALLE3 (先切换模型到gpt-*)": {
|
396 |
-
"Group": "对话",
|
397 |
-
"Color": "stop",
|
398 |
-
"AsButton": False,
|
399 |
-
"AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
|
400 |
-
"ArgsReminder": "在这里输入自定义参数「分辨率-质量(可选)-风格(可选)」, 参数示例「1024x1024-hd-vivid」 || 分辨率支持 「1024x1024」(默认) /「1792x1024」/「1024x1792」 || 质量支持 「-standard」(默认) /「-hd」 || 风格支持 「-vivid」(默认) /「-natural」", # 高级参数输入区的显示提示
|
401 |
-
"Info": "使用DALLE3生成图片 | 输入参数字符串,提供图像的内容",
|
402 |
-
"Function": HotReload(图片生成_DALLE3),
|
403 |
-
},
|
404 |
-
}
|
405 |
-
)
|
406 |
-
function_plugins.update(
|
407 |
-
{
|
408 |
-
"图片修改_DALLE2 (先切换模型到gpt-*)": {
|
409 |
-
"Group": "对话",
|
410 |
-
"Color": "stop",
|
411 |
-
"AsButton": False,
|
412 |
-
"AdvancedArgs": False, # 调用时,唤起高级参数输入区(默认False)
|
413 |
-
# "Info": "使用DALLE2修改图片 | 输入参数字符串,提供图像的内容",
|
414 |
-
"Function": HotReload(图片修改_DALLE2),
|
415 |
-
},
|
416 |
-
}
|
417 |
-
)
|
418 |
except:
|
419 |
-
print(
|
420 |
-
print("Load function plugin failed")
|
421 |
|
422 |
try:
|
423 |
from crazy_functions.总结音视频 import 总结音视频
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
"
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
"Info": "批量总结音频或视频 | 输入参数为路径",
|
434 |
-
"Function": HotReload(总结音视频),
|
435 |
-
}
|
436 |
}
|
437 |
-
)
|
438 |
except:
|
439 |
-
print(
|
440 |
-
print("Load function plugin failed")
|
441 |
|
442 |
try:
|
443 |
from crazy_functions.数学动画生成manim import 动画生成
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
"
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
"Info": "按照自然语言描述生成一个动画 | 输入参数是一段话",
|
452 |
-
"Function": HotReload(动画生成),
|
453 |
-
}
|
454 |
}
|
455 |
-
)
|
456 |
except:
|
457 |
-
print(
|
458 |
-
print("Load function plugin failed")
|
459 |
|
460 |
try:
|
461 |
from crazy_functions.批量Markdown翻译 import Markdown翻译指定语言
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
"
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
"ArgsReminder": "请输入要翻译成哪种语言,默认为Chinese。",
|
471 |
-
"Function": HotReload(Markdown翻译指定语言),
|
472 |
-
}
|
473 |
}
|
474 |
-
)
|
475 |
except:
|
476 |
-
print(
|
477 |
-
print("Load function plugin failed")
|
478 |
|
479 |
try:
|
480 |
-
from crazy_functions
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
"
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
"ArgsReminder": "此处待注入的知识库名称id, 默认为default。文件进入知识库后可长期保存。可以通过再次调用本插件的方式,向知识库追加更多文档。",
|
490 |
-
"Function": HotReload(知识库文件注入),
|
491 |
-
}
|
492 |
}
|
493 |
-
)
|
494 |
except:
|
495 |
-
print(
|
496 |
-
print("Load function plugin failed")
|
497 |
|
498 |
try:
|
499 |
-
from crazy_functions
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
"
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
"ArgsReminder": "待提取的知识库名称id, 默认为default, 您需要构建知识库后再运行此插件。",
|
509 |
-
"Function": HotReload(读取知识库作答),
|
510 |
-
}
|
511 |
}
|
512 |
-
)
|
513 |
except:
|
514 |
-
print(
|
515 |
-
print("Load function plugin failed")
|
516 |
|
517 |
try:
|
518 |
from crazy_functions.交互功能函数模板 import 交互功能模板函数
|
519 |
-
|
520 |
-
|
521 |
-
|
522 |
-
"
|
523 |
-
|
524 |
-
|
525 |
-
"AsButton": False,
|
526 |
-
"Function": HotReload(交互功能模板函数),
|
527 |
-
}
|
528 |
}
|
529 |
-
)
|
530 |
except:
|
531 |
-
print(
|
532 |
-
print("Load function plugin failed")
|
533 |
|
534 |
try:
|
535 |
-
from crazy_functions.Latex输出PDF import Latex英文纠错加PDF对比
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
"
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
"
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
"
|
555 |
-
|
556 |
-
|
557 |
-
|
558 |
-
|
559 |
-
},
|
560 |
-
"本地Latex论文精细翻译(上传Latex项目)[需Latex]": {
|
561 |
-
"Group": "学术",
|
562 |
-
"Color": "stop",
|
563 |
-
"AsButton": False,
|
564 |
-
"AdvancedArgs": True,
|
565 |
-
"ArgsReminder": r"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "
|
566 |
-
r"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: "
|
567 |
-
r'If the term "agent" is used in this section, it should be translated to "智能体". ',
|
568 |
-
"Info": "本地Latex论文精细翻译 | 输入参数是路径",
|
569 |
-
"Function": HotReload(Latex翻译中文并重新编译PDF),
|
570 |
-
},
|
571 |
-
"PDF翻译中文并重新编译PDF(上传PDF)[需Latex]": {
|
572 |
-
"Group": "学术",
|
573 |
-
"Color": "stop",
|
574 |
-
"AsButton": False,
|
575 |
-
"AdvancedArgs": True,
|
576 |
-
"ArgsReminder": r"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "
|
577 |
-
r"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: "
|
578 |
-
r'If the term "agent" is used in this section, it should be translated to "智能体". ',
|
579 |
-
"Info": "PDF翻译中文,并重新编译PDF | 输入参数为路径",
|
580 |
-
"Function": HotReload(PDF翻译中文并重新编译PDF)
|
581 |
-
}
|
582 |
}
|
583 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
584 |
except:
|
585 |
-
print(
|
586 |
-
print("Load function plugin failed")
|
587 |
|
588 |
try:
|
589 |
from toolbox import get_conf
|
590 |
-
|
591 |
-
ENABLE_AUDIO = get_conf("ENABLE_AUDIO")
|
592 |
if ENABLE_AUDIO:
|
593 |
from crazy_functions.语音助手 import 语音助手
|
594 |
-
|
595 |
-
|
596 |
-
|
597 |
-
"实时语音对话": {
|
598 |
-
"Group": "对话",
|
599 |
-
"Color": "stop",
|
600 |
-
"AsButton": True,
|
601 |
-
"Info": "这是一个时刻聆听着的语音对话助手 | 没有输入参数",
|
602 |
-
"Function": HotReload(语音助手),
|
603 |
-
}
|
604 |
-
}
|
605 |
-
)
|
606 |
-
except:
|
607 |
-
print(trimmed_format_exc())
|
608 |
-
print("Load function plugin failed")
|
609 |
-
|
610 |
-
try:
|
611 |
-
from crazy_functions.批量翻译PDF文档_NOUGAT import 批量翻译PDF文档
|
612 |
-
|
613 |
-
function_plugins.update(
|
614 |
-
{
|
615 |
-
"精准翻译PDF文档(NOUGAT)": {
|
616 |
-
"Group": "学术",
|
617 |
-
"Color": "stop",
|
618 |
-
"AsButton": False,
|
619 |
-
"Function": HotReload(批量翻译PDF文档),
|
620 |
-
}
|
621 |
-
}
|
622 |
-
)
|
623 |
-
except:
|
624 |
-
print(trimmed_format_exc())
|
625 |
-
print("Load function plugin failed")
|
626 |
-
|
627 |
-
try:
|
628 |
-
from crazy_functions.函数动态生成 import 函数动态生成
|
629 |
-
|
630 |
-
function_plugins.update(
|
631 |
-
{
|
632 |
-
"动态代码解释器(CodeInterpreter)": {
|
633 |
-
"Group": "智能体",
|
634 |
"Color": "stop",
|
635 |
-
"AsButton":
|
636 |
-
"
|
|
|
637 |
}
|
638 |
-
}
|
639 |
-
)
|
640 |
except:
|
641 |
-
print(
|
642 |
-
print("Load function plugin failed")
|
643 |
|
644 |
try:
|
645 |
-
from crazy_functions
|
646 |
-
|
647 |
-
|
648 |
-
|
649 |
-
"
|
650 |
-
|
651 |
-
|
652 |
-
"AsButton": False,
|
653 |
-
"Function": HotReload(多智能体终端),
|
654 |
-
}
|
655 |
}
|
656 |
-
)
|
657 |
except:
|
658 |
-
print(
|
659 |
-
print("Load function plugin failed")
|
660 |
-
|
661 |
-
try:
|
662 |
-
from crazy_functions.互动小游戏 import 随机小游戏
|
663 |
|
664 |
-
function_plugins.update(
|
665 |
-
{
|
666 |
-
"随机互动小游戏(仅供测试)": {
|
667 |
-
"Group": "智能体",
|
668 |
-
"Color": "stop",
|
669 |
-
"AsButton": False,
|
670 |
-
"Function": HotReload(随机小游戏),
|
671 |
-
}
|
672 |
-
}
|
673 |
-
)
|
674 |
-
except:
|
675 |
-
print(trimmed_format_exc())
|
676 |
-
print("Load function plugin failed")
|
677 |
|
678 |
# try:
|
679 |
-
# from crazy_functions
|
680 |
# function_plugins.update({
|
681 |
-
# "
|
682 |
-
# "Group": "
|
683 |
# "Color": "stop",
|
684 |
-
# "AsButton":
|
685 |
-
# "Function": HotReload(
|
686 |
# }
|
687 |
# })
|
688 |
# except:
|
689 |
-
# print(trimmed_format_exc())
|
690 |
# print('Load function plugin failed')
|
691 |
|
692 |
# try:
|
@@ -703,6 +541,8 @@ def get_crazy_functions():
|
|
703 |
# except:
|
704 |
# print('Load function plugin failed')
|
705 |
|
|
|
|
|
706 |
"""
|
707 |
设置默认值:
|
708 |
- 默认 Group = 对话
|
@@ -712,12 +552,12 @@ def get_crazy_functions():
|
|
712 |
"""
|
713 |
for name, function_meta in function_plugins.items():
|
714 |
if "Group" not in function_meta:
|
715 |
-
function_plugins[name]["Group"] =
|
716 |
if "AsButton" not in function_meta:
|
717 |
function_plugins[name]["AsButton"] = True
|
718 |
if "AdvancedArgs" not in function_meta:
|
719 |
function_plugins[name]["AdvancedArgs"] = False
|
720 |
if "Color" not in function_meta:
|
721 |
-
function_plugins[name]["Color"] =
|
722 |
|
723 |
return function_plugins
|
|
|
1 |
from toolbox import HotReload # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效
|
|
|
2 |
|
3 |
|
4 |
def get_crazy_functions():
|
|
|
6 |
from crazy_functions.生成函数注释 import 批量生成函数注释
|
7 |
from crazy_functions.解析项目源代码 import 解析项目本身
|
8 |
from crazy_functions.解析项目源代码 import 解析一个Python项目
|
|
|
9 |
from crazy_functions.解析项目源代码 import 解析一个C项目的头文件
|
10 |
from crazy_functions.解析项目源代码 import 解析一个C项目
|
11 |
from crazy_functions.解析项目源代码 import 解析一个Golang项目
|
|
|
30 |
from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入
|
31 |
from crazy_functions.Latex全文润色 import Latex中文润色
|
32 |
from crazy_functions.Latex全文润色 import Latex英文纠错
|
33 |
+
from crazy_functions.Latex全文翻译 import Latex中译英
|
34 |
+
from crazy_functions.Latex全文翻译 import Latex英译中
|
35 |
from crazy_functions.批量Markdown翻译 import Markdown中译英
|
36 |
from crazy_functions.虚空终端 import 虚空终端
|
37 |
+
|
38 |
|
39 |
function_plugins = {
|
40 |
"虚空终端": {
|
41 |
+
"Group": "对话|编程|学术",
|
42 |
"Color": "stop",
|
43 |
"AsButton": True,
|
44 |
+
"Function": HotReload(虚空终端)
|
45 |
},
|
46 |
"解析整个Python项目": {
|
47 |
"Group": "编程",
|
48 |
"Color": "stop",
|
49 |
"AsButton": True,
|
50 |
"Info": "解析一个Python项目的所有源文件(.py) | 输入参数为路径",
|
51 |
+
"Function": HotReload(解析一个Python项目)
|
52 |
},
|
53 |
"载入对话历史存档(先上传存档或输入路径)": {
|
54 |
"Group": "对话",
|
55 |
"Color": "stop",
|
56 |
"AsButton": False,
|
57 |
"Info": "载入对话历史存档 | 输入参数为路径",
|
58 |
+
"Function": HotReload(载入对话历史存档)
|
59 |
},
|
60 |
"删除所有本地对话历史记录(谨慎操作)": {
|
61 |
"Group": "对话",
|
62 |
"AsButton": False,
|
63 |
"Info": "删除所有本地对话历史记录,谨慎操作 | 不需要输入参数",
|
64 |
+
"Function": HotReload(删除所有本地对话历史记录)
|
65 |
},
|
66 |
"清除所有缓存文件(谨慎操作)": {
|
67 |
"Group": "对话",
|
68 |
"Color": "stop",
|
69 |
"AsButton": False, # 加入下拉菜单中
|
70 |
"Info": "清除所有缓存文件,谨慎操作 | 不需要输入参数",
|
71 |
+
"Function": HotReload(清除缓存)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
},
|
73 |
"批量总结Word文档": {
|
74 |
"Group": "学术",
|
75 |
"Color": "stop",
|
76 |
"AsButton": True,
|
77 |
"Info": "批量总结word文档 | 输入参数为路径",
|
78 |
+
"Function": HotReload(总结word文档)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
},
|
80 |
"解析整个C++项目头文件": {
|
81 |
"Group": "编程",
|
82 |
"Color": "stop",
|
83 |
"AsButton": False, # 加入下拉菜单中
|
84 |
"Info": "解析一个C++项目的所有头文件(.h/.hpp) | 输入参数为路径",
|
85 |
+
"Function": HotReload(解析一个C项目的头文件)
|
86 |
},
|
87 |
"解析整个C++项目(.cpp/.hpp/.c/.h)": {
|
88 |
"Group": "编程",
|
89 |
"Color": "stop",
|
90 |
"AsButton": False, # 加入下拉菜单中
|
91 |
"Info": "解析一个C++项目的所有源文件(.cpp/.hpp/.c/.h)| 输入参数为路径",
|
92 |
+
"Function": HotReload(解析一个C项目)
|
93 |
},
|
94 |
"解析整个Go项目": {
|
95 |
"Group": "编程",
|
96 |
"Color": "stop",
|
97 |
"AsButton": False, # 加入下拉菜单中
|
98 |
"Info": "解析一个Go项目的所有源文件 | 输入参数为路径",
|
99 |
+
"Function": HotReload(解析一个Golang项目)
|
100 |
},
|
101 |
"解析整个Rust项目": {
|
102 |
"Group": "编程",
|
103 |
"Color": "stop",
|
104 |
"AsButton": False, # 加入下拉菜单中
|
105 |
"Info": "解析一个Rust项目的所有源文件 | 输入参数为路径",
|
106 |
+
"Function": HotReload(解析一个Rust项目)
|
107 |
},
|
108 |
"解析整个Java项目": {
|
109 |
"Group": "编程",
|
110 |
"Color": "stop",
|
111 |
"AsButton": False, # 加入下拉菜单中
|
112 |
"Info": "解析一个Java项目的所有源文件 | 输入参数为路径",
|
113 |
+
"Function": HotReload(解析一个Java项目)
|
114 |
},
|
115 |
"解析整个前端项目(js,ts,css等)": {
|
116 |
"Group": "编程",
|
117 |
"Color": "stop",
|
118 |
"AsButton": False, # 加入下拉菜单中
|
119 |
"Info": "解析一个前端项目的所有源文件(js,ts,css等) | 输入参数为路径",
|
120 |
+
"Function": HotReload(解析一个前端项目)
|
121 |
},
|
122 |
"解析整个Lua项目": {
|
123 |
"Group": "编程",
|
124 |
"Color": "stop",
|
125 |
"AsButton": False, # 加入下拉菜单中
|
126 |
"Info": "解析一个Lua项目的所有源文件 | 输入参数为路径",
|
127 |
+
"Function": HotReload(解析一个Lua项目)
|
128 |
},
|
129 |
"解析整个CSharp项目": {
|
130 |
"Group": "编程",
|
131 |
"Color": "stop",
|
132 |
"AsButton": False, # 加入下拉菜单中
|
133 |
"Info": "解析一个CSharp项目的所有源文件 | 输入参数为路径",
|
134 |
+
"Function": HotReload(解析一个CSharp项目)
|
135 |
},
|
136 |
"解析Jupyter Notebook文件": {
|
137 |
"Group": "编程",
|
|
|
147 |
"Color": "stop",
|
148 |
"AsButton": False,
|
149 |
"Info": "读取Tex论文并写摘要 | 输入参数为路径",
|
150 |
+
"Function": HotReload(读文章写摘要)
|
151 |
},
|
152 |
"翻译README或MD": {
|
153 |
"Group": "编程",
|
154 |
"Color": "stop",
|
155 |
"AsButton": True,
|
156 |
"Info": "将Markdown翻译为中文 | 输入参数为路径或URL",
|
157 |
+
"Function": HotReload(Markdown英译中)
|
158 |
},
|
159 |
"翻译Markdown或README(支持Github链接)": {
|
160 |
"Group": "编程",
|
161 |
"Color": "stop",
|
162 |
"AsButton": False,
|
163 |
"Info": "将Markdown或README翻译为中文 | 输入参数为路径或URL",
|
164 |
+
"Function": HotReload(Markdown英译中)
|
165 |
},
|
166 |
"批量生成函数注释": {
|
167 |
"Group": "编程",
|
168 |
"Color": "stop",
|
169 |
"AsButton": False, # 加入下拉菜单中
|
170 |
"Info": "批量生成函数的注释 | 输入参数为路径",
|
171 |
+
"Function": HotReload(批量生成函数注释)
|
172 |
},
|
173 |
"保存当前的对话": {
|
174 |
"Group": "对话",
|
175 |
"AsButton": True,
|
176 |
"Info": "保存当前的对话 | 不需要输入参数",
|
177 |
+
"Function": HotReload(对话历史存档)
|
178 |
},
|
179 |
"[多线程Demo]解析此项目本身(源码自译解)": {
|
180 |
"Group": "对话|编程",
|
181 |
"AsButton": False, # 加入下拉菜单中
|
182 |
"Info": "多线程解析并翻译此项目的源码 | 不需要输入参数",
|
183 |
+
"Function": HotReload(解析项目本身)
|
184 |
},
|
185 |
+
"[插件demo]历史上的今天": {
|
186 |
"Group": "对话",
|
187 |
"AsButton": True,
|
188 |
+
"Info": "查看历史上的今天事件 | 不需要输入参数",
|
189 |
+
"Function": HotReload(高阶功能模板���数)
|
190 |
},
|
191 |
"精准翻译PDF论文": {
|
192 |
"Group": "学术",
|
193 |
"Color": "stop",
|
194 |
+
"AsButton": True,
|
195 |
"Info": "精准翻译PDF论文为中文 | 输入参数为路径",
|
196 |
+
"Function": HotReload(批量翻译PDF文档)
|
197 |
},
|
198 |
"询问多个GPT模型": {
|
199 |
"Group": "对话",
|
200 |
"Color": "stop",
|
201 |
"AsButton": True,
|
202 |
+
"Function": HotReload(同时问询)
|
203 |
},
|
204 |
"批量总结PDF文档": {
|
205 |
"Group": "学术",
|
206 |
"Color": "stop",
|
207 |
"AsButton": False, # 加入下拉菜单中
|
208 |
"Info": "批量总结PDF文档的内容 | 输入参数为路径",
|
209 |
+
"Function": HotReload(批量总结PDF文档)
|
210 |
},
|
211 |
"谷歌学术检索助手(输入谷歌学术搜索页url)": {
|
212 |
"Group": "学术",
|
213 |
"Color": "stop",
|
214 |
"AsButton": False, # 加入下拉菜单中
|
215 |
"Info": "使用谷歌学术检索助手搜索指定URL的结果 | 输入参数为谷歌学术搜索页的URL",
|
216 |
+
"Function": HotReload(谷歌检索小助手)
|
217 |
},
|
218 |
"理解PDF文档内容 (模仿ChatPDF)": {
|
219 |
"Group": "学术",
|
220 |
"Color": "stop",
|
221 |
"AsButton": False, # 加入下拉菜单中
|
222 |
"Info": "理解PDF文档的内容并进行回答 | 输入参数为路径",
|
223 |
+
"Function": HotReload(理解PDF文档内容标准文件输入)
|
224 |
},
|
225 |
"英文Latex项目全文润色(输入路径或上传压缩包)": {
|
226 |
"Group": "学术",
|
227 |
"Color": "stop",
|
228 |
"AsButton": False, # 加入下拉菜单中
|
229 |
"Info": "对英文Latex项目全文进行润色处理 | 输入参数为路径或上传压缩包",
|
230 |
+
"Function": HotReload(Latex英文润色)
|
231 |
+
},
|
232 |
+
"英文Latex项目全文纠错(输入路径或上传压缩包)": {
|
233 |
+
"Group": "学术",
|
234 |
+
"Color": "stop",
|
235 |
+
"AsButton": False, # 加入下拉菜单中
|
236 |
+
"Info": "对英文Latex项目全文进行纠错处理 | 输入参数为路径或上传压缩包",
|
237 |
+
"Function": HotReload(Latex英文纠错)
|
238 |
},
|
|
|
239 |
"中文Latex项目全文润色(输入路径或上传压缩包)": {
|
240 |
"Group": "学术",
|
241 |
"Color": "stop",
|
242 |
"AsButton": False, # 加入下拉菜单中
|
243 |
"Info": "对中文Latex项目全文进行润色处理 | 输入参数为路径或上传压缩包",
|
244 |
+
"Function": HotReload(Latex中文润色)
|
245 |
+
},
|
246 |
+
"Latex项目全文中译英(输入路径或上传压缩包)": {
|
247 |
+
"Group": "学术",
|
248 |
+
"Color": "stop",
|
249 |
+
"AsButton": False, # 加入下拉菜单中
|
250 |
+
"Info": "对Latex项目全文进行中译英处理 | 输入参数为路径或上传压缩包",
|
251 |
+
"Function": HotReload(Latex中译英)
|
252 |
+
},
|
253 |
+
"Latex项目全文英译中(输入路径或上传压缩包)": {
|
254 |
+
"Group": "学术",
|
255 |
+
"Color": "stop",
|
256 |
+
"AsButton": False, # 加入下拉菜单中
|
257 |
+
"Info": "对Latex项目全文进行英译中处理 | 输入参数为路径或上传压缩包",
|
258 |
+
"Function": HotReload(Latex英译中)
|
259 |
+
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
"批量Markdown中译英(输入路径或上传压缩包)": {
|
261 |
"Group": "编程",
|
262 |
"Color": "stop",
|
263 |
"AsButton": False, # 加入下拉菜单中
|
264 |
"Info": "批量将Markdown文件中文翻译为英文 | 输入参数为路径或上传压缩包",
|
265 |
+
"Function": HotReload(Markdown中译英)
|
266 |
},
|
267 |
}
|
268 |
|
269 |
# -=--=- 尚未充分测试的实验性插件 & 需要额外依赖的插件 -=--=-
|
270 |
try:
|
271 |
from crazy_functions.下载arxiv论文翻译摘要 import 下载arxiv论文并翻译摘要
|
272 |
+
function_plugins.update({
|
273 |
+
"一键下载arxiv论文并翻译摘要(先在input输入编号,如1812.10695)": {
|
274 |
+
"Group": "学术",
|
275 |
+
"Color": "stop",
|
276 |
+
"AsButton": False, # 加入下拉菜单中
|
277 |
+
# "Info": "下载arxiv论文并翻译摘要 | 输入参数为arxiv编号如1812.10695",
|
278 |
+
"Function": HotReload(下载arxiv论文并翻译摘要)
|
|
|
|
|
|
|
279 |
}
|
280 |
+
})
|
281 |
except:
|
282 |
+
print('Load function plugin failed')
|
|
|
283 |
|
284 |
try:
|
285 |
from crazy_functions.联网的ChatGPT import 连接网络回答问题
|
286 |
+
function_plugins.update({
|
287 |
+
"连接网络回答问题(输入问题后点击该插件,需要访问谷歌)": {
|
288 |
+
"Group": "对话",
|
289 |
+
"Color": "stop",
|
290 |
+
"AsButton": False, # 加入下拉菜单中
|
291 |
+
# "Info": "连接网络回答问题(需要访问谷歌)| 输入参数是一个问题",
|
292 |
+
"Function": HotReload(连接网络回答问题)
|
|
|
|
|
|
|
293 |
}
|
294 |
+
})
|
295 |
from crazy_functions.联网的ChatGPT_bing版 import 连接bing搜索回答问题
|
296 |
+
function_plugins.update({
|
297 |
+
"连接网络回答问题(中文Bing版,输入问题后点击该插件)": {
|
298 |
+
"Group": "对话",
|
299 |
+
"Color": "stop",
|
300 |
+
"AsButton": False, # 加入下拉菜单中
|
301 |
+
"Info": "连接网络回答问题(需要访问中文Bing)| 输入参数是一个问题",
|
302 |
+
"Function": HotReload(连接bing搜索回答问题)
|
|
|
|
|
|
|
303 |
}
|
304 |
+
})
|
305 |
except:
|
306 |
+
print('Load function plugin failed')
|
|
|
307 |
|
308 |
try:
|
309 |
from crazy_functions.解析项目源代码 import 解析任意code项目
|
310 |
+
function_plugins.update({
|
311 |
+
"解析项目源代码(手动指定和筛选源代码文件类型)": {
|
312 |
+
"Group": "编程",
|
313 |
+
"Color": "stop",
|
314 |
+
"AsButton": False,
|
315 |
+
"AdvancedArgs": True, # 调用时,唤起���级参数输入区(默认False)
|
316 |
+
"ArgsReminder": "输入时用逗号隔开, *代表通配符, 加了^代表不匹配; 不输入代表全部匹配。例如: \"*.c, ^*.cpp, config.toml, ^*.toml\"", # 高级参数输入区的显示提示
|
317 |
+
"Function": HotReload(解析任意code项目)
|
318 |
+
},
|
319 |
+
})
|
|
|
|
|
|
|
320 |
except:
|
321 |
+
print('Load function plugin failed')
|
|
|
322 |
|
323 |
try:
|
324 |
from crazy_functions.询问多个大语言模型 import 同时问询_指定模型
|
325 |
+
function_plugins.update({
|
326 |
+
"询问多个GPT模型(手动指定询问哪些模型)": {
|
327 |
+
"Group": "对话",
|
328 |
+
"Color": "stop",
|
329 |
+
"AsButton": False,
|
330 |
+
"AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
|
331 |
+
"ArgsReminder": "支持任意数量的llm接口,用&符号分隔。例如chatglm&gpt-3.5-turbo&api2d-gpt-4", # 高级参数输入区的显示提示
|
332 |
+
"Function": HotReload(同时问询_指定模型)
|
333 |
+
},
|
334 |
+
})
|
|
|
|
|
|
|
335 |
except:
|
336 |
+
print('Load function plugin failed')
|
|
|
337 |
|
338 |
try:
|
339 |
+
from crazy_functions.图片生成 import 图片生成
|
340 |
+
function_plugins.update({
|
341 |
+
"图片生成(先切换模型到openai或api2d)": {
|
342 |
+
"Group": "对话",
|
343 |
+
"Color": "stop",
|
344 |
+
"AsButton": False,
|
345 |
+
"AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
|
346 |
+
"ArgsReminder": "在这里输入分辨率, 如256x256(默认)", # 高级参数输入区的显示提示
|
347 |
+
"Info": "图片生成 | 输入参数字符串,提供图像的内容",
|
348 |
+
"Function": HotReload(图片生成)
|
349 |
+
},
|
350 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
351 |
except:
|
352 |
+
print('Load function plugin failed')
|
|
|
353 |
|
354 |
try:
|
355 |
from crazy_functions.总结音视频 import 总结音视频
|
356 |
+
function_plugins.update({
|
357 |
+
"批量总结音视频(输入路径或上传压缩包)": {
|
358 |
+
"Group": "对话",
|
359 |
+
"Color": "stop",
|
360 |
+
"AsButton": False,
|
361 |
+
"AdvancedArgs": True,
|
362 |
+
"ArgsReminder": "调用openai api 使用whisper-1模型, 目前支持的格式:mp4, m4a, wav, mpga, mpeg, mp3。此处可以输入解析提示,例如:解析为简体中文(默认)。",
|
363 |
+
"Info": "批量总结音频或视频 | 输入参数为路径",
|
364 |
+
"Function": HotReload(总结音视频)
|
|
|
|
|
|
|
365 |
}
|
366 |
+
})
|
367 |
except:
|
368 |
+
print('Load function plugin failed')
|
|
|
369 |
|
370 |
try:
|
371 |
from crazy_functions.数学动画生成manim import 动画生成
|
372 |
+
function_plugins.update({
|
373 |
+
"数学动画生成(Manim)": {
|
374 |
+
"Group": "对话",
|
375 |
+
"Color": "stop",
|
376 |
+
"AsButton": False,
|
377 |
+
"Info": "按照自然语言描述生成一个动画 | 输入参数是一段话",
|
378 |
+
"Function": HotReload(动画生成)
|
|
|
|
|
|
|
379 |
}
|
380 |
+
})
|
381 |
except:
|
382 |
+
print('Load function plugin failed')
|
|
|
383 |
|
384 |
try:
|
385 |
from crazy_functions.批量Markdown翻译 import Markdown翻译指定语言
|
386 |
+
function_plugins.update({
|
387 |
+
"Markdown翻译(手动指定语言)": {
|
388 |
+
"Group": "编程",
|
389 |
+
"Color": "stop",
|
390 |
+
"AsButton": False,
|
391 |
+
"AdvancedArgs": True,
|
392 |
+
"ArgsReminder": "请输入要翻译成哪种语言,默认为Chinese。",
|
393 |
+
"Function": HotReload(Markdown翻译指定语言)
|
|
|
|
|
|
|
394 |
}
|
395 |
+
})
|
396 |
except:
|
397 |
+
print('Load function plugin failed')
|
|
|
398 |
|
399 |
try:
|
400 |
+
from crazy_functions.Langchain知识库 import 知识库问答
|
401 |
+
function_plugins.update({
|
402 |
+
"构建知识库(先上传文件素材,再运行此插件)": {
|
403 |
+
"Group": "对话",
|
404 |
+
"Color": "stop",
|
405 |
+
"AsButton": False,
|
406 |
+
"AdvancedArgs": True,
|
407 |
+
"ArgsReminder": "此处待注入的知识库名称id, 默认为default。文件进入知识库后可长期保存。可以通过再次调用本插件的方式,向知识库追加更多文档。",
|
408 |
+
"Function": HotReload(知识库问答)
|
|
|
|
|
|
|
409 |
}
|
410 |
+
})
|
411 |
except:
|
412 |
+
print('Load function plugin failed')
|
|
|
413 |
|
414 |
try:
|
415 |
+
from crazy_functions.Langchain知识库 import 读取知识库作答
|
416 |
+
function_plugins.update({
|
417 |
+
"知识库问答(构建知识库后,再运行此插件)": {
|
418 |
+
"Group": "对话",
|
419 |
+
"Color": "stop",
|
420 |
+
"AsButton": False,
|
421 |
+
"AdvancedArgs": True,
|
422 |
+
"ArgsReminder": "待提取的知识库名称id, 默认为default, 您需要构建知识库后再运行此插件。",
|
423 |
+
"Function": HotReload(读取知识库作答)
|
|
|
|
|
|
|
424 |
}
|
425 |
+
})
|
426 |
except:
|
427 |
+
print('Load function plugin failed')
|
|
|
428 |
|
429 |
try:
|
430 |
from crazy_functions.交互功能函数模板 import 交互功能模板函数
|
431 |
+
function_plugins.update({
|
432 |
+
"交互功能模板函数": {
|
433 |
+
"Group": "对话",
|
434 |
+
"Color": "stop",
|
435 |
+
"AsButton": False,
|
436 |
+
"Function": HotReload(交互功能模板函数)
|
|
|
|
|
|
|
437 |
}
|
438 |
+
})
|
439 |
except:
|
440 |
+
print('Load function plugin failed')
|
|
|
441 |
|
442 |
try:
|
443 |
+
from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比
|
444 |
+
function_plugins.update({
|
445 |
+
"Latex英文纠错+高亮修正位置 [需Latex]": {
|
446 |
+
"Group": "学术",
|
447 |
+
"Color": "stop",
|
448 |
+
"AsButton": False,
|
449 |
+
"AdvancedArgs": True,
|
450 |
+
"ArgsReminder": "如果有必要, 请在此处追加更细致的矫错指令(使用英文)。",
|
451 |
+
"Function": HotReload(Latex英文纠错加PDF对比)
|
452 |
+
}
|
453 |
+
})
|
454 |
+
from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
|
455 |
+
function_plugins.update({
|
456 |
+
"Arixv论文精细翻译(输入arxivID)[需Latex]": {
|
457 |
+
"Group": "学术",
|
458 |
+
"Color": "stop",
|
459 |
+
"AsButton": False,
|
460 |
+
"AdvancedArgs": True,
|
461 |
+
"ArgsReminder":
|
462 |
+
"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 " +
|
463 |
+
"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " +
|
464 |
+
'If the term "agent" is used in this section, it should be translated to "智能体". ',
|
465 |
+
"Info": "Arixv论文精细翻译 | 输入参数arxiv论文的ID,比如1812.10695",
|
466 |
+
"Function": HotReload(Latex翻译中文并重新编译PDF)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
467 |
}
|
468 |
+
})
|
469 |
+
function_plugins.update({
|
470 |
+
"本地Latex论文精细翻译(上传Latex项目)[需Latex]": {
|
471 |
+
"Group": "学术",
|
472 |
+
"Color": "stop",
|
473 |
+
"AsButton": False,
|
474 |
+
"AdvancedArgs": True,
|
475 |
+
"ArgsReminder":
|
476 |
+
"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 " +
|
477 |
+
"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " +
|
478 |
+
'If the term "agent" is used in this section, it should be translated to "智能体". ',
|
479 |
+
"Info": "本地Latex论文精细翻译 | 输入参数是路径",
|
480 |
+
"Function": HotReload(Latex翻译中文并重新编译PDF)
|
481 |
+
}
|
482 |
+
})
|
483 |
except:
|
484 |
+
print('Load function plugin failed')
|
|
|
485 |
|
486 |
try:
|
487 |
from toolbox import get_conf
|
488 |
+
ENABLE_AUDIO, = get_conf('ENABLE_AUDIO')
|
|
|
489 |
if ENABLE_AUDIO:
|
490 |
from crazy_functions.语音助手 import 语音助手
|
491 |
+
function_plugins.update({
|
492 |
+
"实时音频采集": {
|
493 |
+
"Group": "对话",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
494 |
"Color": "stop",
|
495 |
+
"AsButton": True,
|
496 |
+
"Info": "开始语言对话 | 没有输入参数",
|
497 |
+
"Function": HotReload(语音助手)
|
498 |
}
|
499 |
+
})
|
|
|
500 |
except:
|
501 |
+
print('Load function plugin failed')
|
|
|
502 |
|
503 |
try:
|
504 |
+
from crazy_functions.批量翻译PDF文档_NOUGAT import 批量翻译PDF文档
|
505 |
+
function_plugins.update({
|
506 |
+
"精准翻译PDF文档(NOUGAT)": {
|
507 |
+
"Group": "学术",
|
508 |
+
"Color": "stop",
|
509 |
+
"AsButton": False,
|
510 |
+
"Function": HotReload(批量翻译PDF文档)
|
|
|
|
|
|
|
511 |
}
|
512 |
+
})
|
513 |
except:
|
514 |
+
print('Load function plugin failed')
|
|
|
|
|
|
|
|
|
515 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
516 |
|
517 |
# try:
|
518 |
+
# from crazy_functions.CodeInterpreter import 虚空终端CodeInterpreter
|
519 |
# function_plugins.update({
|
520 |
+
# "CodeInterpreter(开发中,仅供测试)": {
|
521 |
+
# "Group": "编程|对话",
|
522 |
# "Color": "stop",
|
523 |
+
# "AsButton": False,
|
524 |
+
# "Function": HotReload(虚空终端CodeInterpreter)
|
525 |
# }
|
526 |
# })
|
527 |
# except:
|
|
|
528 |
# print('Load function plugin failed')
|
529 |
|
530 |
# try:
|
|
|
541 |
# except:
|
542 |
# print('Load function plugin failed')
|
543 |
|
544 |
+
|
545 |
+
|
546 |
"""
|
547 |
设置默认值:
|
548 |
- 默认 Group = 对话
|
|
|
552 |
"""
|
553 |
for name, function_meta in function_plugins.items():
|
554 |
if "Group" not in function_meta:
|
555 |
+
function_plugins[name]["Group"] = '对话'
|
556 |
if "AsButton" not in function_meta:
|
557 |
function_plugins[name]["AsButton"] = True
|
558 |
if "AdvancedArgs" not in function_meta:
|
559 |
function_plugins[name]["AdvancedArgs"] = False
|
560 |
if "Color" not in function_meta:
|
561 |
+
function_plugins[name]["Color"] = 'secondary'
|
562 |
|
563 |
return function_plugins
|
crazy_functions/Langchain知识库.py
CHANGED
@@ -53,14 +53,14 @@ def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
|
|
53 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
54 |
print('Checking Text2vec ...')
|
55 |
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
56 |
-
with ProxyNetworkActivate(
|
57 |
HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
|
58 |
|
59 |
# < -------------------构建知识库--------------- >
|
60 |
chatbot.append(['<br/>'.join(file_manifest), "正在构建知识库..."])
|
61 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
62 |
print('Establishing knowledge archive ...')
|
63 |
-
with ProxyNetworkActivate(
|
64 |
kai = knowledge_archive_interface()
|
65 |
kai.feed_archive(file_manifest=file_manifest, id=kai_id)
|
66 |
kai_files = kai.get_loaded_file()
|
|
|
53 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
54 |
print('Checking Text2vec ...')
|
55 |
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
56 |
+
with ProxyNetworkActivate(): # 临时地激活代理网络
|
57 |
HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
|
58 |
|
59 |
# < -------------------构建知识库--------------- >
|
60 |
chatbot.append(['<br/>'.join(file_manifest), "正在构建知识库..."])
|
61 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
62 |
print('Establishing knowledge archive ...')
|
63 |
+
with ProxyNetworkActivate(): # 临时地激活代理网络
|
64 |
kai = knowledge_archive_interface()
|
65 |
kai.feed_archive(file_manifest=file_manifest, id=kai_id)
|
66 |
kai_files = kai.get_loaded_file()
|
crazy_functions/Latex全文润色.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
from toolbox import update_ui, trimmed_format_exc, promote_file_to_downloadzone, get_log_folder
|
2 |
-
from toolbox import CatchException,
|
3 |
|
4 |
|
5 |
class PaperFileGroup():
|
@@ -11,7 +11,7 @@ class PaperFileGroup():
|
|
11 |
self.sp_file_tag = []
|
12 |
|
13 |
# count_token
|
14 |
-
from
|
15 |
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
16 |
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
17 |
self.get_token_num = get_token_num
|
@@ -26,8 +26,8 @@ class PaperFileGroup():
|
|
26 |
self.sp_file_index.append(index)
|
27 |
self.sp_file_tag.append(self.file_paths[index])
|
28 |
else:
|
29 |
-
from
|
30 |
-
segments =
|
31 |
for j, segment in enumerate(segments):
|
32 |
self.sp_file_contents.append(segment)
|
33 |
self.sp_file_index.append(index)
|
@@ -135,18 +135,18 @@ def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
|
135 |
|
136 |
|
137 |
@CatchException
|
138 |
-
def Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt,
|
139 |
# 基本信息:功能、贡献者
|
140 |
chatbot.append([
|
141 |
"函数插件功能?",
|
142 |
-
"对整个Latex项目进行润色。函数插件贡献者: Binary-Husky。(注意,此插件不调用Latex,如果有Latex
|
143 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
144 |
|
145 |
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
146 |
try:
|
147 |
import tiktoken
|
148 |
except:
|
149 |
-
|
150 |
a=f"解析项目: {txt}",
|
151 |
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
|
152 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
@@ -157,12 +157,12 @@ def Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
|
|
157 |
project_folder = txt
|
158 |
else:
|
159 |
if txt == "": txt = '空空如也的输入栏'
|
160 |
-
|
161 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
162 |
return
|
163 |
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
|
164 |
if len(file_manifest) == 0:
|
165 |
-
|
166 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
167 |
return
|
168 |
yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en')
|
@@ -173,7 +173,7 @@ def Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
|
|
173 |
|
174 |
|
175 |
@CatchException
|
176 |
-
def Latex中文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt,
|
177 |
# 基本信息:功能、贡献者
|
178 |
chatbot.append([
|
179 |
"函数插件功能?",
|
@@ -184,7 +184,7 @@ def Latex中文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
|
|
184 |
try:
|
185 |
import tiktoken
|
186 |
except:
|
187 |
-
|
188 |
a=f"解析项目: {txt}",
|
189 |
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
|
190 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
@@ -195,12 +195,12 @@ def Latex中文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
|
|
195 |
project_folder = txt
|
196 |
else:
|
197 |
if txt == "": txt = '空空如也的输入栏'
|
198 |
-
|
199 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
200 |
return
|
201 |
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
|
202 |
if len(file_manifest) == 0:
|
203 |
-
|
204 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
205 |
return
|
206 |
yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh')
|
@@ -209,7 +209,7 @@ def Latex中文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
|
|
209 |
|
210 |
|
211 |
@CatchException
|
212 |
-
def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt,
|
213 |
# 基本信息:功能、贡献者
|
214 |
chatbot.append([
|
215 |
"函数插件功能?",
|
@@ -220,7 +220,7 @@ def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
|
|
220 |
try:
|
221 |
import tiktoken
|
222 |
except:
|
223 |
-
|
224 |
a=f"解析项目: {txt}",
|
225 |
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
|
226 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
@@ -231,12 +231,12 @@ def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
|
|
231 |
project_folder = txt
|
232 |
else:
|
233 |
if txt == "": txt = '空空如也的输入栏'
|
234 |
-
|
235 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
236 |
return
|
237 |
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
|
238 |
if len(file_manifest) == 0:
|
239 |
-
|
240 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
241 |
return
|
242 |
yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='proofread')
|
|
|
1 |
from toolbox import update_ui, trimmed_format_exc, promote_file_to_downloadzone, get_log_folder
|
2 |
+
from toolbox import CatchException, report_execption, write_history_to_file, zip_folder
|
3 |
|
4 |
|
5 |
class PaperFileGroup():
|
|
|
11 |
self.sp_file_tag = []
|
12 |
|
13 |
# count_token
|
14 |
+
from request_llm.bridge_all import model_info
|
15 |
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
16 |
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
17 |
self.get_token_num = get_token_num
|
|
|
26 |
self.sp_file_index.append(index)
|
27 |
self.sp_file_tag.append(self.file_paths[index])
|
28 |
else:
|
29 |
+
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
30 |
+
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
|
31 |
for j, segment in enumerate(segments):
|
32 |
self.sp_file_contents.append(segment)
|
33 |
self.sp_file_index.append(index)
|
|
|
135 |
|
136 |
|
137 |
@CatchException
|
138 |
+
def Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
139 |
# 基本信息:功能、贡献者
|
140 |
chatbot.append([
|
141 |
"函数插件功能?",
|
142 |
+
"对整个Latex项目进行润色。函数插件贡献者: Binary-Husky。(注意,此插件不调用Latex,如果有Latex环境,请使用“Latex英文纠错+高亮”插件)"])
|
143 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
144 |
|
145 |
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
146 |
try:
|
147 |
import tiktoken
|
148 |
except:
|
149 |
+
report_execption(chatbot, history,
|
150 |
a=f"解析项目: {txt}",
|
151 |
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
|
152 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
|
|
157 |
project_folder = txt
|
158 |
else:
|
159 |
if txt == "": txt = '空空如也的输入栏'
|
160 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
161 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
162 |
return
|
163 |
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
|
164 |
if len(file_manifest) == 0:
|
165 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
|
166 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
167 |
return
|
168 |
yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en')
|
|
|
173 |
|
174 |
|
175 |
@CatchException
|
176 |
+
def Latex中文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
177 |
# 基本信息:功能、贡献者
|
178 |
chatbot.append([
|
179 |
"函数插件功能?",
|
|
|
184 |
try:
|
185 |
import tiktoken
|
186 |
except:
|
187 |
+
report_execption(chatbot, history,
|
188 |
a=f"解析项目: {txt}",
|
189 |
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
|
190 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
|
|
195 |
project_folder = txt
|
196 |
else:
|
197 |
if txt == "": txt = '空空如也的输入栏'
|
198 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
199 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
200 |
return
|
201 |
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
|
202 |
if len(file_manifest) == 0:
|
203 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
|
204 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
205 |
return
|
206 |
yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh')
|
|
|
209 |
|
210 |
|
211 |
@CatchException
|
212 |
+
def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
213 |
# 基本信息:功能、贡献者
|
214 |
chatbot.append([
|
215 |
"函数插件功能?",
|
|
|
220 |
try:
|
221 |
import tiktoken
|
222 |
except:
|
223 |
+
report_execption(chatbot, history,
|
224 |
a=f"解析项目: {txt}",
|
225 |
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
|
226 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
|
|
231 |
project_folder = txt
|
232 |
else:
|
233 |
if txt == "": txt = '空空如也的输入栏'
|
234 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
235 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
236 |
return
|
237 |
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
|
238 |
if len(file_manifest) == 0:
|
239 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
|
240 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
241 |
return
|
242 |
yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='proofread')
|
crazy_functions/Latex全文翻译.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
from toolbox import update_ui, promote_file_to_downloadzone
|
2 |
-
from toolbox import CatchException,
|
3 |
fast_debug = False
|
4 |
|
5 |
class PaperFileGroup():
|
@@ -11,7 +11,7 @@ class PaperFileGroup():
|
|
11 |
self.sp_file_tag = []
|
12 |
|
13 |
# count_token
|
14 |
-
from
|
15 |
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
16 |
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
17 |
self.get_token_num = get_token_num
|
@@ -26,8 +26,8 @@ class PaperFileGroup():
|
|
26 |
self.sp_file_index.append(index)
|
27 |
self.sp_file_tag.append(self.file_paths[index])
|
28 |
else:
|
29 |
-
from
|
30 |
-
segments =
|
31 |
for j, segment in enumerate(segments):
|
32 |
self.sp_file_contents.append(segment)
|
33 |
self.sp_file_index.append(index)
|
@@ -106,7 +106,7 @@ def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
|
106 |
|
107 |
|
108 |
@CatchException
|
109 |
-
def Latex英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt,
|
110 |
# 基本信息:功能、贡献者
|
111 |
chatbot.append([
|
112 |
"函数插件功能?",
|
@@ -117,7 +117,7 @@ def Latex英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prom
|
|
117 |
try:
|
118 |
import tiktoken
|
119 |
except:
|
120 |
-
|
121 |
a=f"解析项目: {txt}",
|
122 |
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
|
123 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
@@ -128,12 +128,12 @@ def Latex英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prom
|
|
128 |
project_folder = txt
|
129 |
else:
|
130 |
if txt == "": txt = '空空如也的输入栏'
|
131 |
-
|
132 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
133 |
return
|
134 |
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
|
135 |
if len(file_manifest) == 0:
|
136 |
-
|
137 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
138 |
return
|
139 |
yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en->zh')
|
@@ -143,7 +143,7 @@ def Latex英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prom
|
|
143 |
|
144 |
|
145 |
@CatchException
|
146 |
-
def Latex中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt,
|
147 |
# 基本信息:功能、贡献者
|
148 |
chatbot.append([
|
149 |
"函数插件功能?",
|
@@ -154,7 +154,7 @@ def Latex中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prom
|
|
154 |
try:
|
155 |
import tiktoken
|
156 |
except:
|
157 |
-
|
158 |
a=f"解析项目: {txt}",
|
159 |
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
|
160 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
@@ -165,12 +165,12 @@ def Latex中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prom
|
|
165 |
project_folder = txt
|
166 |
else:
|
167 |
if txt == "": txt = '空空如也的输入栏'
|
168 |
-
|
169 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
170 |
return
|
171 |
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
|
172 |
if len(file_manifest) == 0:
|
173 |
-
|
174 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
175 |
return
|
176 |
yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh->en')
|
|
|
1 |
from toolbox import update_ui, promote_file_to_downloadzone
|
2 |
+
from toolbox import CatchException, report_execption, write_history_to_file
|
3 |
fast_debug = False
|
4 |
|
5 |
class PaperFileGroup():
|
|
|
11 |
self.sp_file_tag = []
|
12 |
|
13 |
# count_token
|
14 |
+
from request_llm.bridge_all import model_info
|
15 |
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
16 |
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
17 |
self.get_token_num = get_token_num
|
|
|
26 |
self.sp_file_index.append(index)
|
27 |
self.sp_file_tag.append(self.file_paths[index])
|
28 |
else:
|
29 |
+
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
30 |
+
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
|
31 |
for j, segment in enumerate(segments):
|
32 |
self.sp_file_contents.append(segment)
|
33 |
self.sp_file_index.append(index)
|
|
|
106 |
|
107 |
|
108 |
@CatchException
|
109 |
+
def Latex英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
110 |
# 基本信息:功能、贡献者
|
111 |
chatbot.append([
|
112 |
"函数插件功能?",
|
|
|
117 |
try:
|
118 |
import tiktoken
|
119 |
except:
|
120 |
+
report_execption(chatbot, history,
|
121 |
a=f"解析项目: {txt}",
|
122 |
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
|
123 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
|
|
128 |
project_folder = txt
|
129 |
else:
|
130 |
if txt == "": txt = '空空如也的输入栏'
|
131 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
132 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
133 |
return
|
134 |
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
|
135 |
if len(file_manifest) == 0:
|
136 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
|
137 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
138 |
return
|
139 |
yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en->zh')
|
|
|
143 |
|
144 |
|
145 |
@CatchException
|
146 |
+
def Latex中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
147 |
# 基本信息:功能、贡献者
|
148 |
chatbot.append([
|
149 |
"函数插件功能?",
|
|
|
154 |
try:
|
155 |
import tiktoken
|
156 |
except:
|
157 |
+
report_execption(chatbot, history,
|
158 |
a=f"解析项目: {txt}",
|
159 |
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
|
160 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
|
|
165 |
project_folder = txt
|
166 |
else:
|
167 |
if txt == "": txt = '空空如也的输入栏'
|
168 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
169 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
170 |
return
|
171 |
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
|
172 |
if len(file_manifest) == 0:
|
173 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
|
174 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
175 |
return
|
176 |
yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh->en')
|
crazy_functions/Latex输出PDF.py
DELETED
@@ -1,484 +0,0 @@
|
|
1 |
-
from toolbox import update_ui, trimmed_format_exc, get_conf, get_log_folder, promote_file_to_downloadzone
|
2 |
-
from toolbox import CatchException, report_exception, update_ui_lastest_msg, zip_result, gen_time_str
|
3 |
-
from functools import partial
|
4 |
-
import glob, os, requests, time, json, tarfile
|
5 |
-
|
6 |
-
pj = os.path.join
|
7 |
-
ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
|
8 |
-
|
9 |
-
|
10 |
-
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 工具函数 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
11 |
-
# 专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". '
|
12 |
-
def switch_prompt(pfg, mode, more_requirement):
|
13 |
-
"""
|
14 |
-
Generate prompts and system prompts based on the mode for proofreading or translating.
|
15 |
-
Args:
|
16 |
-
- pfg: Proofreader or Translator instance.
|
17 |
-
- mode: A string specifying the mode, either 'proofread' or 'translate_zh'.
|
18 |
-
|
19 |
-
Returns:
|
20 |
-
- inputs_array: A list of strings containing prompts for users to respond to.
|
21 |
-
- sys_prompt_array: A list of strings containing prompts for system prompts.
|
22 |
-
"""
|
23 |
-
n_split = len(pfg.sp_file_contents)
|
24 |
-
if mode == 'proofread_en':
|
25 |
-
inputs_array = [r"Below is a section from an academic paper, proofread this section." +
|
26 |
-
r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + more_requirement +
|
27 |
-
r"Answer me only with the revised text:" +
|
28 |
-
f"\n\n{frag}" for frag in pfg.sp_file_contents]
|
29 |
-
sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
|
30 |
-
elif mode == 'translate_zh':
|
31 |
-
inputs_array = [
|
32 |
-
r"Below is a section from an English academic paper, translate it into Chinese. " + more_requirement +
|
33 |
-
r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
|
34 |
-
r"Answer me only with the translated text:" +
|
35 |
-
f"\n\n{frag}" for frag in pfg.sp_file_contents]
|
36 |
-
sys_prompt_array = ["You are a professional translator." for _ in range(n_split)]
|
37 |
-
else:
|
38 |
-
assert False, "未知指令"
|
39 |
-
return inputs_array, sys_prompt_array
|
40 |
-
|
41 |
-
|
42 |
-
def desend_to_extracted_folder_if_exist(project_folder):
|
43 |
-
"""
|
44 |
-
Descend into the extracted folder if it exists, otherwise return the original folder.
|
45 |
-
|
46 |
-
Args:
|
47 |
-
- project_folder: A string specifying the folder path.
|
48 |
-
|
49 |
-
Returns:
|
50 |
-
- A string specifying the path to the extracted folder, or the original folder if there is no extracted folder.
|
51 |
-
"""
|
52 |
-
maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)]
|
53 |
-
if len(maybe_dir) == 0: return project_folder
|
54 |
-
if maybe_dir[0].endswith('.extract'): return maybe_dir[0]
|
55 |
-
return project_folder
|
56 |
-
|
57 |
-
|
58 |
-
def move_project(project_folder, arxiv_id=None):
|
59 |
-
"""
|
60 |
-
Create a new work folder and copy the project folder to it.
|
61 |
-
|
62 |
-
Args:
|
63 |
-
- project_folder: A string specifying the folder path of the project.
|
64 |
-
|
65 |
-
Returns:
|
66 |
-
- A string specifying the path to the new work folder.
|
67 |
-
"""
|
68 |
-
import shutil, time
|
69 |
-
time.sleep(2) # avoid time string conflict
|
70 |
-
if arxiv_id is not None:
|
71 |
-
new_workfolder = pj(ARXIV_CACHE_DIR, arxiv_id, 'workfolder')
|
72 |
-
else:
|
73 |
-
new_workfolder = f'{get_log_folder()}/{gen_time_str()}'
|
74 |
-
try:
|
75 |
-
shutil.rmtree(new_workfolder)
|
76 |
-
except:
|
77 |
-
pass
|
78 |
-
|
79 |
-
# align subfolder if there is a folder wrapper
|
80 |
-
items = glob.glob(pj(project_folder, '*'))
|
81 |
-
items = [item for item in items if os.path.basename(item) != '__MACOSX']
|
82 |
-
if len(glob.glob(pj(project_folder, '*.tex'))) == 0 and len(items) == 1:
|
83 |
-
if os.path.isdir(items[0]): project_folder = items[0]
|
84 |
-
|
85 |
-
shutil.copytree(src=project_folder, dst=new_workfolder)
|
86 |
-
return new_workfolder
|
87 |
-
|
88 |
-
|
89 |
-
def arxiv_download(chatbot, history, txt, allow_cache=True):
|
90 |
-
def check_cached_translation_pdf(arxiv_id):
|
91 |
-
translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'translation')
|
92 |
-
if not os.path.exists(translation_dir):
|
93 |
-
os.makedirs(translation_dir)
|
94 |
-
target_file = pj(translation_dir, 'translate_zh.pdf')
|
95 |
-
if os.path.exists(target_file):
|
96 |
-
promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot)
|
97 |
-
target_file_compare = pj(translation_dir, 'comparison.pdf')
|
98 |
-
if os.path.exists(target_file_compare):
|
99 |
-
promote_file_to_downloadzone(target_file_compare, rename_file=None, chatbot=chatbot)
|
100 |
-
return target_file
|
101 |
-
return False
|
102 |
-
|
103 |
-
def is_float(s):
|
104 |
-
try:
|
105 |
-
float(s)
|
106 |
-
return True
|
107 |
-
except ValueError:
|
108 |
-
return False
|
109 |
-
|
110 |
-
if ('.' in txt) and ('/' not in txt) and is_float(txt): # is arxiv ID
|
111 |
-
txt = 'https://arxiv.org/abs/' + txt.strip()
|
112 |
-
if ('.' in txt) and ('/' not in txt) and is_float(txt[:10]): # is arxiv ID
|
113 |
-
txt = 'https://arxiv.org/abs/' + txt[:10]
|
114 |
-
|
115 |
-
if not txt.startswith('https://arxiv.org'):
|
116 |
-
return txt, None # 是本地文件,跳过下载
|
117 |
-
|
118 |
-
# <-------------- inspect format ------------->
|
119 |
-
chatbot.append([f"检测到arxiv文档连接", '尝试下载 ...'])
|
120 |
-
yield from update_ui(chatbot=chatbot, history=history)
|
121 |
-
time.sleep(1) # 刷新界面
|
122 |
-
|
123 |
-
url_ = txt # https://arxiv.org/abs/1707.06690
|
124 |
-
if not txt.startswith('https://arxiv.org/abs/'):
|
125 |
-
msg = f"解析arxiv网址失败, 期望格式例如: https://arxiv.org/abs/1707.06690。实际得到格式: {url_}。"
|
126 |
-
yield from update_ui_lastest_msg(msg, chatbot=chatbot, history=history) # 刷新界面
|
127 |
-
return msg, None
|
128 |
-
# <-------------- set format ------------->
|
129 |
-
arxiv_id = url_.split('/abs/')[-1]
|
130 |
-
if 'v' in arxiv_id: arxiv_id = arxiv_id[:10]
|
131 |
-
cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
|
132 |
-
if cached_translation_pdf and allow_cache: return cached_translation_pdf, arxiv_id
|
133 |
-
|
134 |
-
url_tar = url_.replace('/abs/', '/e-print/')
|
135 |
-
translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
|
136 |
-
extract_dst = pj(ARXIV_CACHE_DIR, arxiv_id, 'extract')
|
137 |
-
os.makedirs(translation_dir, exist_ok=True)
|
138 |
-
|
139 |
-
# <-------------- download arxiv source file ------------->
|
140 |
-
dst = pj(translation_dir, arxiv_id + '.tar')
|
141 |
-
if os.path.exists(dst):
|
142 |
-
yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history) # 刷新界面
|
143 |
-
else:
|
144 |
-
yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history) # 刷新界面
|
145 |
-
proxies = get_conf('proxies')
|
146 |
-
r = requests.get(url_tar, proxies=proxies)
|
147 |
-
with open(dst, 'wb+') as f:
|
148 |
-
f.write(r.content)
|
149 |
-
# <-------------- extract file ------------->
|
150 |
-
yield from update_ui_lastest_msg("下载完成", chatbot=chatbot, history=history) # 刷新界面
|
151 |
-
from toolbox import extract_archive
|
152 |
-
extract_archive(file_path=dst, dest_dir=extract_dst)
|
153 |
-
return extract_dst, arxiv_id
|
154 |
-
|
155 |
-
|
156 |
-
def pdf2tex_project(pdf_file_path):
|
157 |
-
# Mathpix API credentials
|
158 |
-
app_id, app_key = get_conf('MATHPIX_APPID', 'MATHPIX_APPKEY')
|
159 |
-
headers = {"app_id": app_id, "app_key": app_key}
|
160 |
-
|
161 |
-
# Step 1: Send PDF file for processing
|
162 |
-
options = {
|
163 |
-
"conversion_formats": {"tex.zip": True},
|
164 |
-
"math_inline_delimiters": ["$", "$"],
|
165 |
-
"rm_spaces": True
|
166 |
-
}
|
167 |
-
|
168 |
-
response = requests.post(url="https://api.mathpix.com/v3/pdf",
|
169 |
-
headers=headers,
|
170 |
-
data={"options_json": json.dumps(options)},
|
171 |
-
files={"file": open(pdf_file_path, "rb")})
|
172 |
-
|
173 |
-
if response.ok:
|
174 |
-
pdf_id = response.json()["pdf_id"]
|
175 |
-
print(f"PDF processing initiated. PDF ID: {pdf_id}")
|
176 |
-
|
177 |
-
# Step 2: Check processing status
|
178 |
-
while True:
|
179 |
-
conversion_response = requests.get(f"https://api.mathpix.com/v3/pdf/{pdf_id}", headers=headers)
|
180 |
-
conversion_data = conversion_response.json()
|
181 |
-
|
182 |
-
if conversion_data["status"] == "completed":
|
183 |
-
print("PDF processing completed.")
|
184 |
-
break
|
185 |
-
elif conversion_data["status"] == "error":
|
186 |
-
print("Error occurred during processing.")
|
187 |
-
else:
|
188 |
-
print(f"Processing status: {conversion_data['status']}")
|
189 |
-
time.sleep(5) # wait for a few seconds before checking again
|
190 |
-
|
191 |
-
# Step 3: Save results to local files
|
192 |
-
output_dir = os.path.join(os.path.dirname(pdf_file_path), 'mathpix_output')
|
193 |
-
if not os.path.exists(output_dir):
|
194 |
-
os.makedirs(output_dir)
|
195 |
-
|
196 |
-
url = f"https://api.mathpix.com/v3/pdf/{pdf_id}.tex"
|
197 |
-
response = requests.get(url, headers=headers)
|
198 |
-
file_name_wo_dot = '_'.join(os.path.basename(pdf_file_path).split('.')[:-1])
|
199 |
-
output_name = f"{file_name_wo_dot}.tex.zip"
|
200 |
-
output_path = os.path.join(output_dir, output_name)
|
201 |
-
with open(output_path, "wb") as output_file:
|
202 |
-
output_file.write(response.content)
|
203 |
-
print(f"tex.zip file saved at: {output_path}")
|
204 |
-
|
205 |
-
import zipfile
|
206 |
-
unzip_dir = os.path.join(output_dir, file_name_wo_dot)
|
207 |
-
with zipfile.ZipFile(output_path, 'r') as zip_ref:
|
208 |
-
zip_ref.extractall(unzip_dir)
|
209 |
-
|
210 |
-
return unzip_dir
|
211 |
-
|
212 |
-
else:
|
213 |
-
print(f"Error sending PDF for processing. Status code: {response.status_code}")
|
214 |
-
return None
|
215 |
-
|
216 |
-
|
217 |
-
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 插件主程序1 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
218 |
-
|
219 |
-
|
220 |
-
@CatchException
|
221 |
-
def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
222 |
-
# <-------------- information about this plugin ------------->
|
223 |
-
chatbot.append(["函数插件功能?",
|
224 |
-
"对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。仅在Windows系统进行了测试,其他操作系��表现未知。"])
|
225 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
226 |
-
|
227 |
-
# <-------------- more requirements ------------->
|
228 |
-
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
229 |
-
more_req = plugin_kwargs.get("advanced_arg", "")
|
230 |
-
_switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
|
231 |
-
|
232 |
-
# <-------------- check deps ------------->
|
233 |
-
try:
|
234 |
-
import glob, os, time, subprocess
|
235 |
-
subprocess.Popen(['pdflatex', '-version'])
|
236 |
-
from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
|
237 |
-
except Exception as e:
|
238 |
-
chatbot.append([f"解析项目: {txt}",
|
239 |
-
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
|
240 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
241 |
-
return
|
242 |
-
|
243 |
-
# <-------------- clear history and read input ------------->
|
244 |
-
history = []
|
245 |
-
if os.path.exists(txt):
|
246 |
-
project_folder = txt
|
247 |
-
else:
|
248 |
-
if txt == "": txt = '空空如也的输入栏'
|
249 |
-
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
|
250 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
251 |
-
return
|
252 |
-
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
|
253 |
-
if len(file_manifest) == 0:
|
254 |
-
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.tex文件: {txt}")
|
255 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
256 |
-
return
|
257 |
-
|
258 |
-
# <-------------- if is a zip/tar file ------------->
|
259 |
-
project_folder = desend_to_extracted_folder_if_exist(project_folder)
|
260 |
-
|
261 |
-
# <-------------- move latex project away from temp folder ------------->
|
262 |
-
project_folder = move_project(project_folder, arxiv_id=None)
|
263 |
-
|
264 |
-
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
|
265 |
-
if not os.path.exists(project_folder + '/merge_proofread_en.tex'):
|
266 |
-
yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
|
267 |
-
chatbot, history, system_prompt, mode='proofread_en',
|
268 |
-
switch_prompt=_switch_prompt_)
|
269 |
-
|
270 |
-
# <-------------- compile PDF ------------->
|
271 |
-
success = yield from 编译Latex(chatbot, history, main_file_original='merge',
|
272 |
-
main_file_modified='merge_proofread_en',
|
273 |
-
work_folder_original=project_folder, work_folder_modified=project_folder,
|
274 |
-
work_folder=project_folder)
|
275 |
-
|
276 |
-
# <-------------- zip PDF ------------->
|
277 |
-
zip_res = zip_result(project_folder)
|
278 |
-
if success:
|
279 |
-
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
|
280 |
-
yield from update_ui(chatbot=chatbot, history=history);
|
281 |
-
time.sleep(1) # 刷新界面
|
282 |
-
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
|
283 |
-
else:
|
284 |
-
chatbot.append((f"失败了",
|
285 |
-
'虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
|
286 |
-
yield from update_ui(chatbot=chatbot, history=history);
|
287 |
-
time.sleep(1) # 刷新界面
|
288 |
-
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
|
289 |
-
|
290 |
-
# <-------------- we are done ------------->
|
291 |
-
return success
|
292 |
-
|
293 |
-
|
294 |
-
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 插件主程序2 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
295 |
-
|
296 |
-
@CatchException
|
297 |
-
def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
298 |
-
# <-------------- information about this plugin ------------->
|
299 |
-
chatbot.append([
|
300 |
-
"函数插件功能?",
|
301 |
-
"对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 此插件Windows支持最佳,Linux下必须使用Docker安装,详见项目主README.md。目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"])
|
302 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
303 |
-
|
304 |
-
# <-------------- more requirements ------------->
|
305 |
-
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
306 |
-
more_req = plugin_kwargs.get("advanced_arg", "")
|
307 |
-
no_cache = more_req.startswith("--no-cache")
|
308 |
-
if no_cache: more_req.lstrip("--no-cache")
|
309 |
-
allow_cache = not no_cache
|
310 |
-
_switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
|
311 |
-
|
312 |
-
# <-------------- check deps ------------->
|
313 |
-
try:
|
314 |
-
import glob, os, time, subprocess
|
315 |
-
subprocess.Popen(['pdflatex', '-version'])
|
316 |
-
from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
|
317 |
-
except Exception as e:
|
318 |
-
chatbot.append([f"解析项目: {txt}",
|
319 |
-
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
|
320 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
321 |
-
return
|
322 |
-
|
323 |
-
# <-------------- clear history and read input ------------->
|
324 |
-
history = []
|
325 |
-
try:
|
326 |
-
txt, arxiv_id = yield from arxiv_download(chatbot, history, txt, allow_cache)
|
327 |
-
except tarfile.ReadError as e:
|
328 |
-
yield from update_ui_lastest_msg(
|
329 |
-
"无法自动下载该论文的Latex源码,请前往arxiv打开此论文下载页面,点other Formats,然后download source手动下载latex源码包。接下来调用本地Latex翻译插件即可。",
|
330 |
-
chatbot=chatbot, history=history)
|
331 |
-
return
|
332 |
-
|
333 |
-
if txt.endswith('.pdf'):
|
334 |
-
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"发现已经存在翻译好的PDF文档")
|
335 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
336 |
-
return
|
337 |
-
|
338 |
-
if os.path.exists(txt):
|
339 |
-
project_folder = txt
|
340 |
-
else:
|
341 |
-
if txt == "": txt = '空空如也的输入栏'
|
342 |
-
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无法处理: {txt}")
|
343 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
344 |
-
return
|
345 |
-
|
346 |
-
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
|
347 |
-
if len(file_manifest) == 0:
|
348 |
-
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.tex文件: {txt}")
|
349 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
350 |
-
return
|
351 |
-
|
352 |
-
# <-------------- if is a zip/tar file ------------->
|
353 |
-
project_folder = desend_to_extracted_folder_if_exist(project_folder)
|
354 |
-
|
355 |
-
# <-------------- move latex project away from temp folder ------------->
|
356 |
-
project_folder = move_project(project_folder, arxiv_id)
|
357 |
-
|
358 |
-
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
|
359 |
-
if not os.path.exists(project_folder + '/merge_translate_zh.tex'):
|
360 |
-
yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
|
361 |
-
chatbot, history, system_prompt, mode='translate_zh',
|
362 |
-
switch_prompt=_switch_prompt_)
|
363 |
-
|
364 |
-
# <-------------- compile PDF ------------->
|
365 |
-
success = yield from 编译Latex(chatbot, history, main_file_original='merge',
|
366 |
-
main_file_modified='merge_translate_zh', mode='translate_zh',
|
367 |
-
work_folder_original=project_folder, work_folder_modified=project_folder,
|
368 |
-
work_folder=project_folder)
|
369 |
-
|
370 |
-
# <-------------- zip PDF ------------->
|
371 |
-
zip_res = zip_result(project_folder)
|
372 |
-
if success:
|
373 |
-
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
|
374 |
-
yield from update_ui(chatbot=chatbot, history=history);
|
375 |
-
time.sleep(1) # 刷新界面
|
376 |
-
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
|
377 |
-
else:
|
378 |
-
chatbot.append((f"失败了",
|
379 |
-
'虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 您可以到Github Issue区, 用该压缩包进行反馈。如系统是Linux,请检查系统字体(见Github wiki) ...'))
|
380 |
-
yield from update_ui(chatbot=chatbot, history=history);
|
381 |
-
time.sleep(1) # 刷新界面
|
382 |
-
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
|
383 |
-
|
384 |
-
# <-------------- we are done ------------->
|
385 |
-
return success
|
386 |
-
|
387 |
-
|
388 |
-
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 插件主程序3 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
389 |
-
|
390 |
-
@CatchException
|
391 |
-
def PDF翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
392 |
-
# <-------------- information about this plugin ------------->
|
393 |
-
chatbot.append([
|
394 |
-
"函数插件功能?",
|
395 |
-
"将PDF转换为Latex项目,翻译为中文后重新编译为PDF。函数插件贡献者: Marroh。注意事项: 此插件Windows支持最佳,Linux下必须使用Docker安装,详见项目主README.md。目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"])
|
396 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
397 |
-
|
398 |
-
# <-------------- more requirements ------------->
|
399 |
-
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
400 |
-
more_req = plugin_kwargs.get("advanced_arg", "")
|
401 |
-
no_cache = more_req.startswith("--no-cache")
|
402 |
-
if no_cache: more_req.lstrip("--no-cache")
|
403 |
-
allow_cache = not no_cache
|
404 |
-
_switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
|
405 |
-
|
406 |
-
# <-------------- check deps ------------->
|
407 |
-
try:
|
408 |
-
import glob, os, time, subprocess
|
409 |
-
subprocess.Popen(['pdflatex', '-version'])
|
410 |
-
from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
|
411 |
-
except Exception as e:
|
412 |
-
chatbot.append([f"解析项目: {txt}",
|
413 |
-
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
|
414 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
415 |
-
return
|
416 |
-
|
417 |
-
# <-------------- clear history and read input ------------->
|
418 |
-
if os.path.exists(txt):
|
419 |
-
project_folder = txt
|
420 |
-
else:
|
421 |
-
if txt == "": txt = '空空如也的输入栏'
|
422 |
-
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无法处理: {txt}")
|
423 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
424 |
-
return
|
425 |
-
|
426 |
-
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)]
|
427 |
-
if len(file_manifest) == 0:
|
428 |
-
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.pdf文件: {txt}")
|
429 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
430 |
-
return
|
431 |
-
if len(file_manifest) != 1:
|
432 |
-
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"不支持同时处理多个pdf文件: {txt}")
|
433 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
434 |
-
return
|
435 |
-
app_id, app_key = get_conf('MATHPIX_APPID', 'MATHPIX_APPKEY')
|
436 |
-
if len(app_id) == 0 or len(app_key) == 0:
|
437 |
-
report_exception(chatbot, history, a="缺失 MATHPIX_APPID 和 MATHPIX_APPKEY。", b=f"请配置 MATHPIX_APPID 和 MATHPIX_APPKEY")
|
438 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
439 |
-
return
|
440 |
-
|
441 |
-
# <-------------- convert pdf into tex ------------->
|
442 |
-
project_folder = pdf2tex_project(file_manifest[0])
|
443 |
-
|
444 |
-
# Translate English Latex to Chinese Latex, and compile it
|
445 |
-
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
|
446 |
-
if len(file_manifest) == 0:
|
447 |
-
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.tex文件: {txt}")
|
448 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
449 |
-
return
|
450 |
-
|
451 |
-
# <-------------- if is a zip/tar file ------------->
|
452 |
-
project_folder = desend_to_extracted_folder_if_exist(project_folder)
|
453 |
-
|
454 |
-
# <-------------- move latex project away from temp folder ------------->
|
455 |
-
project_folder = move_project(project_folder)
|
456 |
-
|
457 |
-
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
|
458 |
-
if not os.path.exists(project_folder + '/merge_translate_zh.tex'):
|
459 |
-
yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
|
460 |
-
chatbot, history, system_prompt, mode='translate_zh',
|
461 |
-
switch_prompt=_switch_prompt_)
|
462 |
-
|
463 |
-
# <-------------- compile PDF ------------->
|
464 |
-
success = yield from 编译Latex(chatbot, history, main_file_original='merge',
|
465 |
-
main_file_modified='merge_translate_zh', mode='translate_zh',
|
466 |
-
work_folder_original=project_folder, work_folder_modified=project_folder,
|
467 |
-
work_folder=project_folder)
|
468 |
-
|
469 |
-
# <-------------- zip PDF ------------->
|
470 |
-
zip_res = zip_result(project_folder)
|
471 |
-
if success:
|
472 |
-
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
|
473 |
-
yield from update_ui(chatbot=chatbot, history=history);
|
474 |
-
time.sleep(1) # 刷新界面
|
475 |
-
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
|
476 |
-
else:
|
477 |
-
chatbot.append((f"失败了",
|
478 |
-
'虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 您可以到Github Issue区, 用该压缩包进行反馈。如系统是Linux,请检查系统字体(见Github wiki) ...'))
|
479 |
-
yield from update_ui(chatbot=chatbot, history=history);
|
480 |
-
time.sleep(1) # 刷新界面
|
481 |
-
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
|
482 |
-
|
483 |
-
# <-------------- we are done ------------->
|
484 |
-
return success
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/Latex输出PDF结果.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
from toolbox import update_ui, trimmed_format_exc, get_conf, get_log_folder, promote_file_to_downloadzone
|
2 |
-
from toolbox import CatchException,
|
3 |
from functools import partial
|
4 |
import glob, os, requests, time
|
5 |
pj = os.path.join
|
6 |
ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
|
7 |
|
8 |
-
#
|
9 |
# 专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". '
|
10 |
def switch_prompt(pfg, mode, more_requirement):
|
11 |
"""
|
@@ -73,14 +73,13 @@ def move_project(project_folder, arxiv_id=None):
|
|
73 |
|
74 |
# align subfolder if there is a folder wrapper
|
75 |
items = glob.glob(pj(project_folder,'*'))
|
76 |
-
items = [item for item in items if os.path.basename(item)!='__MACOSX']
|
77 |
if len(glob.glob(pj(project_folder,'*.tex'))) == 0 and len(items) == 1:
|
78 |
if os.path.isdir(items[0]): project_folder = items[0]
|
79 |
|
80 |
shutil.copytree(src=project_folder, dst=new_workfolder)
|
81 |
return new_workfolder
|
82 |
|
83 |
-
def arxiv_download(chatbot, history, txt
|
84 |
def check_cached_translation_pdf(arxiv_id):
|
85 |
translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'translation')
|
86 |
if not os.path.exists(translation_dir):
|
@@ -88,9 +87,6 @@ def arxiv_download(chatbot, history, txt, allow_cache=True):
|
|
88 |
target_file = pj(translation_dir, 'translate_zh.pdf')
|
89 |
if os.path.exists(target_file):
|
90 |
promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot)
|
91 |
-
target_file_compare = pj(translation_dir, 'comparison.pdf')
|
92 |
-
if os.path.exists(target_file_compare):
|
93 |
-
promote_file_to_downloadzone(target_file_compare, rename_file=None, chatbot=chatbot)
|
94 |
return target_file
|
95 |
return False
|
96 |
def is_float(s):
|
@@ -120,7 +116,7 @@ def arxiv_download(chatbot, history, txt, allow_cache=True):
|
|
120 |
arxiv_id = url_.split('/abs/')[-1]
|
121 |
if 'v' in arxiv_id: arxiv_id = arxiv_id[:10]
|
122 |
cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
|
123 |
-
if cached_translation_pdf
|
124 |
|
125 |
url_tar = url_.replace('/abs/', '/e-print/')
|
126 |
translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
|
@@ -133,7 +129,7 @@ def arxiv_download(chatbot, history, txt, allow_cache=True):
|
|
133 |
yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history) # 刷新界面
|
134 |
else:
|
135 |
yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history) # 刷新界面
|
136 |
-
proxies = get_conf('proxies')
|
137 |
r = requests.get(url_tar, proxies=proxies)
|
138 |
with open(dst, 'wb+') as f:
|
139 |
f.write(r.content)
|
@@ -142,7 +138,7 @@ def arxiv_download(chatbot, history, txt, allow_cache=True):
|
|
142 |
from toolbox import extract_archive
|
143 |
extract_archive(file_path=dst, dest_dir=extract_dst)
|
144 |
return extract_dst, arxiv_id
|
145 |
-
#
|
146 |
|
147 |
|
148 |
@CatchException
|
@@ -175,12 +171,12 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
|
|
175 |
project_folder = txt
|
176 |
else:
|
177 |
if txt == "": txt = '空空如也的输入栏'
|
178 |
-
|
179 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
180 |
return
|
181 |
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
|
182 |
if len(file_manifest) == 0:
|
183 |
-
|
184 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
185 |
return
|
186 |
|
@@ -218,7 +214,8 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
|
|
218 |
# <-------------- we are done ------------->
|
219 |
return success
|
220 |
|
221 |
-
|
|
|
222 |
|
223 |
@CatchException
|
224 |
def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
@@ -231,9 +228,6 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
|
|
231 |
# <-------------- more requirements ------------->
|
232 |
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
233 |
more_req = plugin_kwargs.get("advanced_arg", "")
|
234 |
-
no_cache = more_req.startswith("--no-cache")
|
235 |
-
if no_cache: more_req.lstrip("--no-cache")
|
236 |
-
allow_cache = not no_cache
|
237 |
_switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
|
238 |
|
239 |
# <-------------- check deps ------------->
|
@@ -250,9 +244,9 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
|
|
250 |
|
251 |
# <-------------- clear history and read input ------------->
|
252 |
history = []
|
253 |
-
txt, arxiv_id = yield from arxiv_download(chatbot, history, txt
|
254 |
if txt.endswith('.pdf'):
|
255 |
-
|
256 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
257 |
return
|
258 |
|
@@ -261,13 +255,13 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
|
|
261 |
project_folder = txt
|
262 |
else:
|
263 |
if txt == "": txt = '空空如也的输入栏'
|
264 |
-
|
265 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
266 |
return
|
267 |
|
268 |
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
|
269 |
if len(file_manifest) == 0:
|
270 |
-
|
271 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
272 |
return
|
273 |
|
|
|
1 |
from toolbox import update_ui, trimmed_format_exc, get_conf, get_log_folder, promote_file_to_downloadzone
|
2 |
+
from toolbox import CatchException, report_execption, update_ui_lastest_msg, zip_result, gen_time_str
|
3 |
from functools import partial
|
4 |
import glob, os, requests, time
|
5 |
pj = os.path.join
|
6 |
ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
|
7 |
|
8 |
+
# =================================== 工具函数 ===============================================
|
9 |
# 专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". '
|
10 |
def switch_prompt(pfg, mode, more_requirement):
|
11 |
"""
|
|
|
73 |
|
74 |
# align subfolder if there is a folder wrapper
|
75 |
items = glob.glob(pj(project_folder,'*'))
|
|
|
76 |
if len(glob.glob(pj(project_folder,'*.tex'))) == 0 and len(items) == 1:
|
77 |
if os.path.isdir(items[0]): project_folder = items[0]
|
78 |
|
79 |
shutil.copytree(src=project_folder, dst=new_workfolder)
|
80 |
return new_workfolder
|
81 |
|
82 |
+
def arxiv_download(chatbot, history, txt):
|
83 |
def check_cached_translation_pdf(arxiv_id):
|
84 |
translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'translation')
|
85 |
if not os.path.exists(translation_dir):
|
|
|
87 |
target_file = pj(translation_dir, 'translate_zh.pdf')
|
88 |
if os.path.exists(target_file):
|
89 |
promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot)
|
|
|
|
|
|
|
90 |
return target_file
|
91 |
return False
|
92 |
def is_float(s):
|
|
|
116 |
arxiv_id = url_.split('/abs/')[-1]
|
117 |
if 'v' in arxiv_id: arxiv_id = arxiv_id[:10]
|
118 |
cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
|
119 |
+
if cached_translation_pdf: return cached_translation_pdf, arxiv_id
|
120 |
|
121 |
url_tar = url_.replace('/abs/', '/e-print/')
|
122 |
translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
|
|
|
129 |
yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history) # 刷新界面
|
130 |
else:
|
131 |
yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history) # 刷新界面
|
132 |
+
proxies, = get_conf('proxies')
|
133 |
r = requests.get(url_tar, proxies=proxies)
|
134 |
with open(dst, 'wb+') as f:
|
135 |
f.write(r.content)
|
|
|
138 |
from toolbox import extract_archive
|
139 |
extract_archive(file_path=dst, dest_dir=extract_dst)
|
140 |
return extract_dst, arxiv_id
|
141 |
+
# ========================================= 插件主程序1 =====================================================
|
142 |
|
143 |
|
144 |
@CatchException
|
|
|
171 |
project_folder = txt
|
172 |
else:
|
173 |
if txt == "": txt = '空空如也的输入栏'
|
174 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
175 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
176 |
return
|
177 |
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
|
178 |
if len(file_manifest) == 0:
|
179 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
|
180 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
181 |
return
|
182 |
|
|
|
214 |
# <-------------- we are done ------------->
|
215 |
return success
|
216 |
|
217 |
+
|
218 |
+
# ========================================= 插件主程序2 =====================================================
|
219 |
|
220 |
@CatchException
|
221 |
def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
|
|
228 |
# <-------------- more requirements ------------->
|
229 |
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
230 |
more_req = plugin_kwargs.get("advanced_arg", "")
|
|
|
|
|
|
|
231 |
_switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
|
232 |
|
233 |
# <-------------- check deps ------------->
|
|
|
244 |
|
245 |
# <-------------- clear history and read input ------------->
|
246 |
history = []
|
247 |
+
txt, arxiv_id = yield from arxiv_download(chatbot, history, txt)
|
248 |
if txt.endswith('.pdf'):
|
249 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"发现已经存在翻译好的PDF文档")
|
250 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
251 |
return
|
252 |
|
|
|
255 |
project_folder = txt
|
256 |
else:
|
257 |
if txt == "": txt = '空空如也的输入栏'
|
258 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无法处理: {txt}")
|
259 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
260 |
return
|
261 |
|
262 |
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
|
263 |
if len(file_manifest) == 0:
|
264 |
+
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
|
265 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
266 |
return
|
267 |
|
crazy_functions/agent_fns/auto_agent.py
DELETED
@@ -1,23 +0,0 @@
|
|
1 |
-
from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, ProxyNetworkActivate
|
2 |
-
from toolbox import report_exception, get_log_folder, update_ui_lastest_msg, Singleton
|
3 |
-
from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
|
4 |
-
from crazy_functions.agent_fns.general import AutoGenGeneral
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
class AutoGenMath(AutoGenGeneral):
|
9 |
-
|
10 |
-
def define_agents(self):
|
11 |
-
from autogen import AssistantAgent, UserProxyAgent
|
12 |
-
return [
|
13 |
-
{
|
14 |
-
"name": "assistant", # name of the agent.
|
15 |
-
"cls": AssistantAgent, # class of the agent.
|
16 |
-
},
|
17 |
-
{
|
18 |
-
"name": "user_proxy", # name of the agent.
|
19 |
-
"cls": UserProxyAgent, # class of the agent.
|
20 |
-
"human_input_mode": "ALWAYS", # always ask for human input.
|
21 |
-
"llm_config": False, # disables llm-based auto reply.
|
22 |
-
},
|
23 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/agent_fns/echo_agent.py
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
|
2 |
-
|
3 |
-
class EchoDemo(PluginMultiprocessManager):
|
4 |
-
def subprocess_worker(self, child_conn):
|
5 |
-
# ⭐⭐ 子进程
|
6 |
-
self.child_conn = child_conn
|
7 |
-
while True:
|
8 |
-
msg = self.child_conn.recv() # PipeCom
|
9 |
-
if msg.cmd == "user_input":
|
10 |
-
# wait futher user input
|
11 |
-
self.child_conn.send(PipeCom("show", msg.content))
|
12 |
-
wait_success = self.subprocess_worker_wait_user_feedback(wait_msg="我准备好处理下一个问题了.")
|
13 |
-
if not wait_success:
|
14 |
-
# wait timeout, terminate this subprocess_worker
|
15 |
-
break
|
16 |
-
elif msg.cmd == "terminate":
|
17 |
-
self.child_conn.send(PipeCom("done", ""))
|
18 |
-
break
|
19 |
-
print('[debug] subprocess_worker terminated')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/agent_fns/general.py
DELETED
@@ -1,138 +0,0 @@
|
|
1 |
-
from toolbox import trimmed_format_exc, get_conf, ProxyNetworkActivate
|
2 |
-
from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
|
3 |
-
from request_llms.bridge_all import predict_no_ui_long_connection
|
4 |
-
import time
|
5 |
-
|
6 |
-
def gpt_academic_generate_oai_reply(
|
7 |
-
self,
|
8 |
-
messages,
|
9 |
-
sender,
|
10 |
-
config,
|
11 |
-
):
|
12 |
-
llm_config = self.llm_config if config is None else config
|
13 |
-
if llm_config is False:
|
14 |
-
return False, None
|
15 |
-
if messages is None:
|
16 |
-
messages = self._oai_messages[sender]
|
17 |
-
|
18 |
-
inputs = messages[-1]['content']
|
19 |
-
history = []
|
20 |
-
for message in messages[:-1]:
|
21 |
-
history.append(message['content'])
|
22 |
-
context=messages[-1].pop("context", None)
|
23 |
-
assert context is None, "预留参数 context 未实现"
|
24 |
-
|
25 |
-
reply = predict_no_ui_long_connection(
|
26 |
-
inputs=inputs,
|
27 |
-
llm_kwargs=llm_config,
|
28 |
-
history=history,
|
29 |
-
sys_prompt=self._oai_system_message[0]['content'],
|
30 |
-
console_slience=True
|
31 |
-
)
|
32 |
-
assumed_done = reply.endswith('\nTERMINATE')
|
33 |
-
return True, reply
|
34 |
-
|
35 |
-
class AutoGenGeneral(PluginMultiprocessManager):
|
36 |
-
def gpt_academic_print_override(self, user_proxy, message, sender):
|
37 |
-
# ⭐⭐ run in subprocess
|
38 |
-
try:
|
39 |
-
print_msg = sender.name + "\n\n---\n\n" + message["content"]
|
40 |
-
except:
|
41 |
-
print_msg = sender.name + "\n\n---\n\n" + message
|
42 |
-
self.child_conn.send(PipeCom("show", print_msg))
|
43 |
-
|
44 |
-
def gpt_academic_get_human_input(self, user_proxy, message):
|
45 |
-
# ⭐⭐ run in subprocess
|
46 |
-
patience = 300
|
47 |
-
begin_waiting_time = time.time()
|
48 |
-
self.child_conn.send(PipeCom("interact", message))
|
49 |
-
while True:
|
50 |
-
time.sleep(0.5)
|
51 |
-
if self.child_conn.poll():
|
52 |
-
wait_success = True
|
53 |
-
break
|
54 |
-
if time.time() - begin_waiting_time > patience:
|
55 |
-
self.child_conn.send(PipeCom("done", ""))
|
56 |
-
wait_success = False
|
57 |
-
break
|
58 |
-
if wait_success:
|
59 |
-
return self.child_conn.recv().content
|
60 |
-
else:
|
61 |
-
raise TimeoutError("等待用户输入超时")
|
62 |
-
|
63 |
-
def define_agents(self):
|
64 |
-
raise NotImplementedError
|
65 |
-
|
66 |
-
def exe_autogen(self, input):
|
67 |
-
# ⭐⭐ run in subprocess
|
68 |
-
input = input.content
|
69 |
-
code_execution_config = {"work_dir": self.autogen_work_dir, "use_docker": self.use_docker}
|
70 |
-
agents = self.define_agents()
|
71 |
-
user_proxy = None
|
72 |
-
assistant = None
|
73 |
-
for agent_kwargs in agents:
|
74 |
-
agent_cls = agent_kwargs.pop('cls')
|
75 |
-
kwargs = {
|
76 |
-
'llm_config':self.llm_kwargs,
|
77 |
-
'code_execution_config':code_execution_config
|
78 |
-
}
|
79 |
-
kwargs.update(agent_kwargs)
|
80 |
-
agent_handle = agent_cls(**kwargs)
|
81 |
-
agent_handle._print_received_message = lambda a,b: self.gpt_academic_print_override(agent_kwargs, a, b)
|
82 |
-
for d in agent_handle._reply_func_list:
|
83 |
-
if hasattr(d['reply_func'],'__name__') and d['reply_func'].__name__ == 'generate_oai_reply':
|
84 |
-
d['reply_func'] = gpt_academic_generate_oai_reply
|
85 |
-
if agent_kwargs['name'] == 'user_proxy':
|
86 |
-
agent_handle.get_human_input = lambda a: self.gpt_academic_get_human_input(user_proxy, a)
|
87 |
-
user_proxy = agent_handle
|
88 |
-
if agent_kwargs['name'] == 'assistant': assistant = agent_handle
|
89 |
-
try:
|
90 |
-
if user_proxy is None or assistant is None: raise Exception("用户代理或助理代理未定义")
|
91 |
-
with ProxyNetworkActivate("AutoGen"):
|
92 |
-
user_proxy.initiate_chat(assistant, message=input)
|
93 |
-
except Exception as e:
|
94 |
-
tb_str = '```\n' + trimmed_format_exc() + '```'
|
95 |
-
self.child_conn.send(PipeCom("done", "AutoGen 执行失败: \n\n" + tb_str))
|
96 |
-
|
97 |
-
def subprocess_worker(self, child_conn):
|
98 |
-
# ⭐⭐ run in subprocess
|
99 |
-
self.child_conn = child_conn
|
100 |
-
while True:
|
101 |
-
msg = self.child_conn.recv() # PipeCom
|
102 |
-
self.exe_autogen(msg)
|
103 |
-
|
104 |
-
|
105 |
-
class AutoGenGroupChat(AutoGenGeneral):
|
106 |
-
def exe_autogen(self, input):
|
107 |
-
# ⭐⭐ run in subprocess
|
108 |
-
import autogen
|
109 |
-
|
110 |
-
input = input.content
|
111 |
-
with ProxyNetworkActivate("AutoGen"):
|
112 |
-
code_execution_config = {"work_dir": self.autogen_work_dir, "use_docker": self.use_docker}
|
113 |
-
agents = self.define_agents()
|
114 |
-
agents_instances = []
|
115 |
-
for agent_kwargs in agents:
|
116 |
-
agent_cls = agent_kwargs.pop("cls")
|
117 |
-
kwargs = {"code_execution_config": code_execution_config}
|
118 |
-
kwargs.update(agent_kwargs)
|
119 |
-
agent_handle = agent_cls(**kwargs)
|
120 |
-
agent_handle._print_received_message = lambda a, b: self.gpt_academic_print_override(agent_kwargs, a, b)
|
121 |
-
agents_instances.append(agent_handle)
|
122 |
-
if agent_kwargs["name"] == "user_proxy":
|
123 |
-
user_proxy = agent_handle
|
124 |
-
user_proxy.get_human_input = lambda a: self.gpt_academic_get_human_input(user_proxy, a)
|
125 |
-
try:
|
126 |
-
groupchat = autogen.GroupChat(agents=agents_instances, messages=[], max_round=50)
|
127 |
-
manager = autogen.GroupChatManager(groupchat=groupchat, **self.define_group_chat_manager_config())
|
128 |
-
manager._print_received_message = lambda a, b: self.gpt_academic_print_override(agent_kwargs, a, b)
|
129 |
-
manager.get_human_input = lambda a: self.gpt_academic_get_human_input(manager, a)
|
130 |
-
if user_proxy is None:
|
131 |
-
raise Exception("user_proxy is not defined")
|
132 |
-
user_proxy.initiate_chat(manager, message=input)
|
133 |
-
except Exception:
|
134 |
-
tb_str = "```\n" + trimmed_format_exc() + "```"
|
135 |
-
self.child_conn.send(PipeCom("done", "AutoGen exe failed: \n\n" + tb_str))
|
136 |
-
|
137 |
-
def define_group_chat_manager_config(self):
|
138 |
-
raise NotImplementedError
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/agent_fns/persistent.py
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
from toolbox import Singleton
|
2 |
-
@Singleton
|
3 |
-
class GradioMultiuserManagerForPersistentClasses():
|
4 |
-
def __init__(self):
|
5 |
-
self.mapping = {}
|
6 |
-
|
7 |
-
def already_alive(self, key):
|
8 |
-
return (key in self.mapping) and (self.mapping[key].is_alive())
|
9 |
-
|
10 |
-
def set(self, key, x):
|
11 |
-
self.mapping[key] = x
|
12 |
-
return self.mapping[key]
|
13 |
-
|
14 |
-
def get(self, key):
|
15 |
-
return self.mapping[key]
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/agent_fns/pipe.py
DELETED
@@ -1,194 +0,0 @@
|
|
1 |
-
from toolbox import get_log_folder, update_ui, gen_time_str, get_conf, promote_file_to_downloadzone
|
2 |
-
from crazy_functions.agent_fns.watchdog import WatchDog
|
3 |
-
import time, os
|
4 |
-
|
5 |
-
class PipeCom:
|
6 |
-
def __init__(self, cmd, content) -> None:
|
7 |
-
self.cmd = cmd
|
8 |
-
self.content = content
|
9 |
-
|
10 |
-
|
11 |
-
class PluginMultiprocessManager:
|
12 |
-
def __init__(self, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
13 |
-
# ⭐ run in main process
|
14 |
-
self.autogen_work_dir = os.path.join(get_log_folder("autogen"), gen_time_str())
|
15 |
-
self.previous_work_dir_files = {}
|
16 |
-
self.llm_kwargs = llm_kwargs
|
17 |
-
self.plugin_kwargs = plugin_kwargs
|
18 |
-
self.chatbot = chatbot
|
19 |
-
self.history = history
|
20 |
-
self.system_prompt = system_prompt
|
21 |
-
# self.user_request = user_request
|
22 |
-
self.alive = True
|
23 |
-
self.use_docker = get_conf("AUTOGEN_USE_DOCKER")
|
24 |
-
self.last_user_input = ""
|
25 |
-
# create a thread to monitor self.heartbeat, terminate the instance if no heartbeat for a long time
|
26 |
-
timeout_seconds = 5 * 60
|
27 |
-
self.heartbeat_watchdog = WatchDog(timeout=timeout_seconds, bark_fn=self.terminate, interval=5)
|
28 |
-
self.heartbeat_watchdog.begin_watch()
|
29 |
-
|
30 |
-
def feed_heartbeat_watchdog(self):
|
31 |
-
# feed this `dog`, so the dog will not `bark` (bark_fn will terminate the instance)
|
32 |
-
self.heartbeat_watchdog.feed()
|
33 |
-
|
34 |
-
def is_alive(self):
|
35 |
-
return self.alive
|
36 |
-
|
37 |
-
def launch_subprocess_with_pipe(self):
|
38 |
-
# ⭐ run in main process
|
39 |
-
from multiprocessing import Process, Pipe
|
40 |
-
|
41 |
-
parent_conn, child_conn = Pipe()
|
42 |
-
self.p = Process(target=self.subprocess_worker, args=(child_conn,))
|
43 |
-
self.p.daemon = True
|
44 |
-
self.p.start()
|
45 |
-
return parent_conn
|
46 |
-
|
47 |
-
def terminate(self):
|
48 |
-
self.p.terminate()
|
49 |
-
self.alive = False
|
50 |
-
print("[debug] instance terminated")
|
51 |
-
|
52 |
-
def subprocess_worker(self, child_conn):
|
53 |
-
# ⭐⭐ run in subprocess
|
54 |
-
raise NotImplementedError
|
55 |
-
|
56 |
-
def send_command(self, cmd):
|
57 |
-
# ⭐ run in main process
|
58 |
-
repeated = False
|
59 |
-
if cmd == self.last_user_input:
|
60 |
-
repeated = True
|
61 |
-
cmd = ""
|
62 |
-
else:
|
63 |
-
self.last_user_input = cmd
|
64 |
-
self.parent_conn.send(PipeCom("user_input", cmd))
|
65 |
-
return repeated, cmd
|
66 |
-
|
67 |
-
def immediate_showoff_when_possible(self, fp):
|
68 |
-
# ⭐ 主进程
|
69 |
-
# 获取fp的拓展名
|
70 |
-
file_type = fp.split('.')[-1]
|
71 |
-
# 如果是文本文件, 则直接显示文本内容
|
72 |
-
if file_type.lower() in ['png', 'jpg']:
|
73 |
-
image_path = os.path.abspath(fp)
|
74 |
-
self.chatbot.append([
|
75 |
-
'检测到新生图像:',
|
76 |
-
f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
|
77 |
-
])
|
78 |
-
yield from update_ui(chatbot=self.chatbot, history=self.history)
|
79 |
-
|
80 |
-
def overwatch_workdir_file_change(self):
|
81 |
-
# ⭐ 主进程 Docker 外挂文件夹监控
|
82 |
-
path_to_overwatch = self.autogen_work_dir
|
83 |
-
change_list = []
|
84 |
-
# 扫描路径下的所有文件, 并与self.previous_work_dir_files中所记录的文件进行对比,
|
85 |
-
# 如果有新文件出现,或者文件的修改时间发生变化,则更新self.previous_work_dir_files中
|
86 |
-
# 把新文件和发生变化的文件的路径记录到 change_list 中
|
87 |
-
for root, dirs, files in os.walk(path_to_overwatch):
|
88 |
-
for file in files:
|
89 |
-
file_path = os.path.join(root, file)
|
90 |
-
if file_path not in self.previous_work_dir_files.keys():
|
91 |
-
last_modified_time = os.stat(file_path).st_mtime
|
92 |
-
self.previous_work_dir_files.update({file_path: last_modified_time})
|
93 |
-
change_list.append(file_path)
|
94 |
-
else:
|
95 |
-
last_modified_time = os.stat(file_path).st_mtime
|
96 |
-
if last_modified_time != self.previous_work_dir_files[file_path]:
|
97 |
-
self.previous_work_dir_files[file_path] = last_modified_time
|
98 |
-
change_list.append(file_path)
|
99 |
-
if len(change_list) > 0:
|
100 |
-
file_links = ""
|
101 |
-
for f in change_list:
|
102 |
-
res = promote_file_to_downloadzone(f)
|
103 |
-
file_links += f'<br/><a href="file={res}" target="_blank">{res}</a>'
|
104 |
-
yield from self.immediate_showoff_when_possible(f)
|
105 |
-
|
106 |
-
self.chatbot.append(['检测到新生文档.', f'文档清单如下: {file_links}'])
|
107 |
-
yield from update_ui(chatbot=self.chatbot, history=self.history)
|
108 |
-
return change_list
|
109 |
-
|
110 |
-
|
111 |
-
def main_process_ui_control(self, txt, create_or_resume) -> str:
|
112 |
-
# ⭐ 主进程
|
113 |
-
if create_or_resume == 'create':
|
114 |
-
self.cnt = 1
|
115 |
-
self.parent_conn = self.launch_subprocess_with_pipe() # ⭐⭐⭐
|
116 |
-
repeated, cmd_to_autogen = self.send_command(txt)
|
117 |
-
if txt == 'exit':
|
118 |
-
self.chatbot.append([f"结束", "结束信号已明确,终止AutoGen程序。"])
|
119 |
-
yield from update_ui(chatbot=self.chatbot, history=self.history)
|
120 |
-
self.terminate()
|
121 |
-
return "terminate"
|
122 |
-
|
123 |
-
# patience = 10
|
124 |
-
|
125 |
-
while True:
|
126 |
-
time.sleep(0.5)
|
127 |
-
if not self.alive:
|
128 |
-
# the heartbeat watchdog might have it killed
|
129 |
-
self.terminate()
|
130 |
-
return "terminate"
|
131 |
-
if self.parent_conn.poll():
|
132 |
-
self.feed_heartbeat_watchdog()
|
133 |
-
if "[GPT-Academic] 等待中" in self.chatbot[-1][-1]:
|
134 |
-
self.chatbot.pop(-1) # remove the last line
|
135 |
-
if "等待您的进一步指令" in self.chatbot[-1][-1]:
|
136 |
-
self.chatbot.pop(-1) # remove the last line
|
137 |
-
if '[GPT-Academic] 等待中' in self.chatbot[-1][-1]:
|
138 |
-
self.chatbot.pop(-1) # remove the last line
|
139 |
-
msg = self.parent_conn.recv() # PipeCom
|
140 |
-
if msg.cmd == "done":
|
141 |
-
self.chatbot.append([f"结束", msg.content])
|
142 |
-
self.cnt += 1
|
143 |
-
yield from update_ui(chatbot=self.chatbot, history=self.history)
|
144 |
-
self.terminate()
|
145 |
-
break
|
146 |
-
if msg.cmd == "show":
|
147 |
-
yield from self.overwatch_workdir_file_change()
|
148 |
-
notice = ""
|
149 |
-
if repeated: notice = "(自动忽略重复的输入)"
|
150 |
-
self.chatbot.append([f"运行阶段-{self.cnt}(上次用户反馈输入为: 「{cmd_to_autogen}」{notice}", msg.content])
|
151 |
-
self.cnt += 1
|
152 |
-
yield from update_ui(chatbot=self.chatbot, history=self.history)
|
153 |
-
if msg.cmd == "interact":
|
154 |
-
yield from self.overwatch_workdir_file_change()
|
155 |
-
self.chatbot.append([f"程序抵达用户反馈节点.", msg.content +
|
156 |
-
"\n\n等待您的进一步指令." +
|
157 |
-
"\n\n(1) 一般情况下您不需要说什么, 清空输入区, 然后直接点击“提交”以继续. " +
|
158 |
-
"\n\n(2) 如果您需要补充些什么, 输入要反馈的内容, 直接点击“提交”以继续. " +
|
159 |
-
"\n\n(3) 如果您想终止程序, 输入exit, 直接点击“提交”以终止AutoGen并解锁. "
|
160 |
-
])
|
161 |
-
yield from update_ui(chatbot=self.chatbot, history=self.history)
|
162 |
-
# do not terminate here, leave the subprocess_worker instance alive
|
163 |
-
return "wait_feedback"
|
164 |
-
else:
|
165 |
-
self.feed_heartbeat_watchdog()
|
166 |
-
if '[GPT-Academic] 等待中' not in self.chatbot[-1][-1]:
|
167 |
-
# begin_waiting_time = time.time()
|
168 |
-
self.chatbot.append(["[GPT-Academic] 等待AutoGen执行结果 ...", "[GPT-Academic] 等待中"])
|
169 |
-
self.chatbot[-1] = [self.chatbot[-1][0], self.chatbot[-1][1].replace("[GPT-Academic] 等待中", "[GPT-Academic] 等待中.")]
|
170 |
-
yield from update_ui(chatbot=self.chatbot, history=self.history)
|
171 |
-
# if time.time() - begin_waiting_time > patience:
|
172 |
-
# self.chatbot.append([f"结束", "等待超时, 终止AutoGen程序。"])
|
173 |
-
# yield from update_ui(chatbot=self.chatbot, history=self.history)
|
174 |
-
# self.terminate()
|
175 |
-
# return "terminate"
|
176 |
-
|
177 |
-
self.terminate()
|
178 |
-
return "terminate"
|
179 |
-
|
180 |
-
def subprocess_worker_wait_user_feedback(self, wait_msg="wait user feedback"):
|
181 |
-
# ⭐⭐ run in subprocess
|
182 |
-
patience = 5 * 60
|
183 |
-
begin_waiting_time = time.time()
|
184 |
-
self.child_conn.send(PipeCom("interact", wait_msg))
|
185 |
-
while True:
|
186 |
-
time.sleep(0.5)
|
187 |
-
if self.child_conn.poll():
|
188 |
-
wait_success = True
|
189 |
-
break
|
190 |
-
if time.time() - begin_waiting_time > patience:
|
191 |
-
self.child_conn.send(PipeCom("done", ""))
|
192 |
-
wait_success = False
|
193 |
-
break
|
194 |
-
return wait_success
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/agent_fns/watchdog.py
DELETED
@@ -1,28 +0,0 @@
|
|
1 |
-
import threading, time
|
2 |
-
|
3 |
-
class WatchDog():
|
4 |
-
def __init__(self, timeout, bark_fn, interval=3, msg="") -> None:
|
5 |
-
self.last_feed = None
|
6 |
-
self.timeout = timeout
|
7 |
-
self.bark_fn = bark_fn
|
8 |
-
self.interval = interval
|
9 |
-
self.msg = msg
|
10 |
-
self.kill_dog = False
|
11 |
-
|
12 |
-
def watch(self):
|
13 |
-
while True:
|
14 |
-
if self.kill_dog: break
|
15 |
-
if time.time() - self.last_feed > self.timeout:
|
16 |
-
if len(self.msg) > 0: print(self.msg)
|
17 |
-
self.bark_fn()
|
18 |
-
break
|
19 |
-
time.sleep(self.interval)
|
20 |
-
|
21 |
-
def begin_watch(self):
|
22 |
-
self.last_feed = time.time()
|
23 |
-
th = threading.Thread(target=self.watch)
|
24 |
-
th.daemon = True
|
25 |
-
th.start()
|
26 |
-
|
27 |
-
def feed(self):
|
28 |
-
self.last_feed = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/chatglm微调工具.py
CHANGED
@@ -32,7 +32,7 @@ def string_to_options(arguments):
|
|
32 |
return args
|
33 |
|
34 |
@CatchException
|
35 |
-
def 微调数据集生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt,
|
36 |
"""
|
37 |
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
38 |
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
@@ -40,7 +40,7 @@ def 微调数据集生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
|
40 |
chatbot 聊天显示框的句柄,用于显示给用户
|
41 |
history 聊天历史,前情提要
|
42 |
system_prompt 给gpt的静默提醒
|
43 |
-
|
44 |
"""
|
45 |
history = [] # 清空历史,以免输入溢出
|
46 |
chatbot.append(("这是什么功能?", "[Local Message] 微调数据集生成"))
|
@@ -80,7 +80,7 @@ def 微调数据集生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
|
80 |
|
81 |
|
82 |
@CatchException
|
83 |
-
def 启动微调(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt,
|
84 |
"""
|
85 |
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
86 |
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
@@ -88,7 +88,7 @@ def 启动微调(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt
|
|
88 |
chatbot 聊天显示框的句柄,用于显示给用户
|
89 |
history 聊天历史,前情提要
|
90 |
system_prompt 给gpt的静默提醒
|
91 |
-
|
92 |
"""
|
93 |
import subprocess
|
94 |
history = [] # 清空历史,以免输入溢出
|
|
|
32 |
return args
|
33 |
|
34 |
@CatchException
|
35 |
+
def 微调数据集生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
36 |
"""
|
37 |
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
38 |
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
|
|
40 |
chatbot 聊天显示框的句柄,用于显示给用户
|
41 |
history 聊天历史,前情提要
|
42 |
system_prompt 给gpt的静默提醒
|
43 |
+
web_port 当前软件运行的端口号
|
44 |
"""
|
45 |
history = [] # 清空历史,以免输入溢出
|
46 |
chatbot.append(("这是什么功能?", "[Local Message] 微调数据集生成"))
|
|
|
80 |
|
81 |
|
82 |
@CatchException
|
83 |
+
def 启动微调(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
84 |
"""
|
85 |
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
86 |
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
|
|
88 |
chatbot 聊天显示框的句柄,用于显示给用户
|
89 |
history 聊天历史,前情提要
|
90 |
system_prompt 给gpt的静默提醒
|
91 |
+
web_port 当前软件运行的端口号
|
92 |
"""
|
93 |
import subprocess
|
94 |
history = [] # 清空历史,以免输入溢出
|
crazy_functions/crazy_utils.py
CHANGED
@@ -1,18 +1,18 @@
|
|
1 |
-
from toolbox import update_ui, get_conf, trimmed_format_exc,
|
2 |
import threading
|
3 |
import os
|
4 |
import logging
|
5 |
|
6 |
def input_clipping(inputs, history, max_token_limit):
|
7 |
import numpy as np
|
8 |
-
from
|
9 |
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
10 |
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
11 |
|
12 |
mode = 'input-and-history'
|
13 |
# 当 输入部分的token占比 小于 全文的一半时,只裁剪历史
|
14 |
input_token_num = get_token_num(inputs)
|
15 |
-
if input_token_num < max_token_limit//2:
|
16 |
mode = 'only-history'
|
17 |
max_token_limit = max_token_limit - input_token_num
|
18 |
|
@@ -21,7 +21,7 @@ def input_clipping(inputs, history, max_token_limit):
|
|
21 |
n_token = get_token_num('\n'.join(everything))
|
22 |
everything_token = [get_token_num(e) for e in everything]
|
23 |
delta = max(everything_token) // 16 # 截断时的颗粒度
|
24 |
-
|
25 |
while n_token > max_token_limit:
|
26 |
where = np.argmax(everything_token)
|
27 |
encoded = enc.encode(everything[where], disallowed_special=())
|
@@ -38,9 +38,9 @@ def input_clipping(inputs, history, max_token_limit):
|
|
38 |
return inputs, history
|
39 |
|
40 |
def request_gpt_model_in_new_thread_with_ui_alive(
|
41 |
-
inputs, inputs_show_user, llm_kwargs,
|
42 |
chatbot, history, sys_prompt, refresh_interval=0.2,
|
43 |
-
handle_token_exceed=True,
|
44 |
retry_times_at_unknown_error=2,
|
45 |
):
|
46 |
"""
|
@@ -63,21 +63,18 @@ def request_gpt_model_in_new_thread_with_ui_alive(
|
|
63 |
"""
|
64 |
import time
|
65 |
from concurrent.futures import ThreadPoolExecutor
|
66 |
-
from
|
67 |
# 用户反馈
|
68 |
chatbot.append([inputs_show_user, ""])
|
69 |
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
70 |
executor = ThreadPoolExecutor(max_workers=16)
|
71 |
mutable = ["", time.time(), ""]
|
72 |
-
# 看门狗耐心
|
73 |
-
watch_dog_patience = 5
|
74 |
-
# 请求任务
|
75 |
def _req_gpt(inputs, history, sys_prompt):
|
76 |
retry_op = retry_times_at_unknown_error
|
77 |
exceeded_cnt = 0
|
78 |
while True:
|
79 |
# watchdog error
|
80 |
-
if len(mutable) >= 2 and (time.time()-mutable[1]) >
|
81 |
raise RuntimeError("检测到程序终止。")
|
82 |
try:
|
83 |
# 【第一种情况】:顺利完成
|
@@ -92,7 +89,7 @@ def request_gpt_model_in_new_thread_with_ui_alive(
|
|
92 |
# 【选择处理】 尝试计算比例,尽可能多地保留文本
|
93 |
from toolbox import get_reduce_token_percent
|
94 |
p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
|
95 |
-
MAX_TOKEN =
|
96 |
EXCEED_ALLO = 512 + 512 * exceeded_cnt
|
97 |
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
|
98 |
mutable[0] += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
|
@@ -139,13 +136,11 @@ def can_multi_process(llm):
|
|
139 |
if llm.startswith('gpt-'): return True
|
140 |
if llm.startswith('api2d-'): return True
|
141 |
if llm.startswith('azure-'): return True
|
142 |
-
if llm.startswith('spark'): return True
|
143 |
-
if llm.startswith('zhipuai') or llm.startswith('glm-'): return True
|
144 |
return False
|
145 |
|
146 |
def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
147 |
-
inputs_array, inputs_show_user_array, llm_kwargs,
|
148 |
-
chatbot, history_array, sys_prompt_array,
|
149 |
refresh_interval=0.2, max_workers=-1, scroller_max_len=30,
|
150 |
handle_token_exceed=True, show_user_at_complete=False,
|
151 |
retry_times_at_unknown_error=2,
|
@@ -179,17 +174,17 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
|
179 |
"""
|
180 |
import time, random
|
181 |
from concurrent.futures import ThreadPoolExecutor
|
182 |
-
from
|
183 |
assert len(inputs_array) == len(history_array)
|
184 |
assert len(inputs_array) == len(sys_prompt_array)
|
185 |
if max_workers == -1: # 读取配置文件
|
186 |
-
try: max_workers = get_conf('DEFAULT_WORKER_NUM')
|
187 |
except: max_workers = 8
|
188 |
if max_workers <= 0: max_workers = 3
|
189 |
# 屏蔽掉 chatglm的多线程,可能会导致严重卡顿
|
190 |
if not can_multi_process(llm_kwargs['llm_model']):
|
191 |
max_workers = 1
|
192 |
-
|
193 |
executor = ThreadPoolExecutor(max_workers=max_workers)
|
194 |
n_frag = len(inputs_array)
|
195 |
# 用户反馈
|
@@ -198,35 +193,33 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
|
198 |
# 跨线程传递
|
199 |
mutable = [["", time.time(), "等待中"] for _ in range(n_frag)]
|
200 |
|
201 |
-
# 看门狗耐心
|
202 |
-
watch_dog_patience = 5
|
203 |
-
|
204 |
# 子线程任务
|
205 |
def _req_gpt(index, inputs, history, sys_prompt):
|
206 |
gpt_say = ""
|
207 |
retry_op = retry_times_at_unknown_error
|
208 |
exceeded_cnt = 0
|
209 |
mutable[index][2] = "执行中"
|
210 |
-
detect_timeout = lambda: len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > watch_dog_patience
|
211 |
while True:
|
212 |
# watchdog error
|
213 |
-
if
|
|
|
214 |
try:
|
215 |
# 【第一种情况】:顺利完成
|
|
|
216 |
gpt_say = predict_no_ui_long_connection(
|
217 |
-
inputs=inputs, llm_kwargs=llm_kwargs, history=history,
|
218 |
sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True
|
219 |
)
|
220 |
mutable[index][2] = "已成功"
|
221 |
return gpt_say
|
222 |
except ConnectionAbortedError as token_exceeded_error:
|
223 |
-
# 【第二种情况】:Token
|
224 |
if handle_token_exceed:
|
225 |
exceeded_cnt += 1
|
226 |
# 【选择处理】 尝试计算比例,尽可能多地保留文本
|
227 |
from toolbox import get_reduce_token_percent
|
228 |
p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
|
229 |
-
MAX_TOKEN =
|
230 |
EXCEED_ALLO = 512 + 512 * exceeded_cnt
|
231 |
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
|
232 |
gpt_say += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
|
@@ -241,12 +234,11 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
|
241 |
return gpt_say # 放弃
|
242 |
except:
|
243 |
# 【第三种情况】:其他错误
|
244 |
-
if detect_timeout(): raise RuntimeError("检测到程序终止。")
|
245 |
tb_str = '```\n' + trimmed_format_exc() + '```'
|
246 |
print(tb_str)
|
247 |
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
248 |
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
|
249 |
-
if retry_op > 0:
|
250 |
retry_op -= 1
|
251 |
wait = random.randint(5, 20)
|
252 |
if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
|
@@ -258,7 +250,6 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
|
258 |
for i in range(wait):
|
259 |
mutable[index][2] = f"{fail_info}等待重试 {wait-i}"; time.sleep(1)
|
260 |
# 开始重试
|
261 |
-
if detect_timeout(): raise RuntimeError("检测到程序终止。")
|
262 |
mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}"
|
263 |
continue # 返回重试
|
264 |
else:
|
@@ -284,11 +275,12 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
|
284 |
# 在前端打印些好玩的东西
|
285 |
for thread_index, _ in enumerate(worker_done):
|
286 |
print_something_really_funny = "[ ...`"+mutable[thread_index][0][-scroller_max_len:].\
|
287 |
-
replace('\n', '').replace('
|
|
|
288 |
observe_win.append(print_something_really_funny)
|
289 |
# 在前端打印些好玩的东西
|
290 |
-
stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
|
291 |
-
if not done else f'`{mutable[thread_index][2]}`\n\n'
|
292 |
for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)])
|
293 |
# 在前端打印些好玩的东西
|
294 |
chatbot[-1] = [chatbot[-1][0], f'多线���操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
|
@@ -302,17 +294,106 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
|
302 |
for inputs_show_user, f in zip(inputs_show_user_array, futures):
|
303 |
gpt_res = f.result()
|
304 |
gpt_response_collection.extend([inputs_show_user, gpt_res])
|
305 |
-
|
306 |
# 是否在结束时,在界面上显示结果
|
307 |
if show_user_at_complete:
|
308 |
for inputs_show_user, f in zip(inputs_show_user_array, futures):
|
309 |
gpt_res = f.result()
|
310 |
chatbot.append([inputs_show_user, gpt_res])
|
311 |
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
312 |
-
time.sleep(0.
|
313 |
return gpt_response_collection
|
314 |
|
315 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
316 |
|
317 |
def read_and_clean_pdf_text(fp):
|
318 |
"""
|
@@ -352,7 +433,7 @@ def read_and_clean_pdf_text(fp):
|
|
352 |
if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0
|
353 |
fsize_statiscs[wtf['size']] += len(wtf['text'])
|
354 |
return max(fsize_statiscs, key=fsize_statiscs.get)
|
355 |
-
|
356 |
def ffsize_same(a,b):
|
357 |
"""
|
358 |
提取字体大小是否近似相等
|
@@ -388,7 +469,7 @@ def read_and_clean_pdf_text(fp):
|
|
388 |
if index == 0:
|
389 |
page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
|
390 |
'- ', '') for t in text_areas['blocks'] if 'lines' in t]
|
391 |
-
|
392 |
############################## <第 2 步,获取正文主字体> ##################################
|
393 |
try:
|
394 |
fsize_statiscs = {}
|
@@ -404,7 +485,7 @@ def read_and_clean_pdf_text(fp):
|
|
404 |
mega_sec = []
|
405 |
sec = []
|
406 |
for index, line in enumerate(meta_line):
|
407 |
-
if index == 0:
|
408 |
sec.append(line[fc])
|
409 |
continue
|
410 |
if REMOVE_FOOT_NOTE:
|
@@ -465,9 +546,6 @@ def read_and_clean_pdf_text(fp):
|
|
465 |
return True
|
466 |
else:
|
467 |
return False
|
468 |
-
# 对于某些PDF会有第一个段落就以小写字母开头,为了避免索引错误将其更改为大写
|
469 |
-
if starts_with_lowercase_word(meta_txt[0]):
|
470 |
-
meta_txt[0] = meta_txt[0].capitalize()
|
471 |
for _ in range(100):
|
472 |
for index, block_txt in enumerate(meta_txt):
|
473 |
if starts_with_lowercase_word(block_txt):
|
@@ -501,12 +579,12 @@ def get_files_from_everything(txt, type): # type='.md'
|
|
501 |
"""
|
502 |
这个函数是用来获取指定目录下所有指定类型(如.md)的文件,并且对于网络上的文件,也可以获取它。
|
503 |
下面是对每个参数和返回值的说明:
|
504 |
-
参数
|
505 |
-
- txt: 路径或网址,表示要搜索的文件或者文件夹路径或网络上的文件。
|
506 |
- type: 字符串,表示要搜索的文件类型。默认是.md。
|
507 |
-
返回值
|
508 |
-
- success: 布尔值,表示函数是否成功执行。
|
509 |
-
- file_manifest: 文件路径列表,里面包含以指定类型为后缀名的所有文件的绝对路径。
|
510 |
- project_folder: 字符串,表示文件所在的文件夹路径。如果是网络上的文件,就是临时文件夹的路径。
|
511 |
该函数详细注释已添加,请确认是否满足您的需要。
|
512 |
"""
|
@@ -518,7 +596,7 @@ def get_files_from_everything(txt, type): # type='.md'
|
|
518 |
import requests
|
519 |
from toolbox import get_conf
|
520 |
from toolbox import get_log_folder, gen_time_str
|
521 |
-
proxies = get_conf('proxies')
|
522 |
try:
|
523 |
r = requests.get(txt, proxies=proxies)
|
524 |
except:
|
@@ -546,6 +624,90 @@ def get_files_from_everything(txt, type): # type='.md'
|
|
546 |
|
547 |
|
548 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
549 |
@Singleton
|
550 |
class nougat_interface():
|
551 |
def __init__(self):
|
@@ -553,10 +715,8 @@ class nougat_interface():
|
|
553 |
|
554 |
def nougat_with_timeout(self, command, cwd, timeout=3600):
|
555 |
import subprocess
|
556 |
-
from toolbox import ProxyNetworkActivate
|
557 |
logging.info(f'正在执行命令 {command}')
|
558 |
-
|
559 |
-
process = subprocess.Popen(command, shell=True, cwd=cwd, env=os.environ)
|
560 |
try:
|
561 |
stdout, stderr = process.communicate(timeout=timeout)
|
562 |
except subprocess.TimeoutExpired:
|
@@ -570,7 +730,7 @@ class nougat_interface():
|
|
570 |
def NOUGAT_parse_pdf(self, fp, chatbot, history):
|
571 |
from toolbox import update_ui_lastest_msg
|
572 |
|
573 |
-
yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在排队, 等待线程锁...",
|
574 |
chatbot=chatbot, history=history, delay=0)
|
575 |
self.threadLock.acquire()
|
576 |
import glob, threading, os
|
@@ -578,7 +738,7 @@ class nougat_interface():
|
|
578 |
dst = os.path.join(get_log_folder(plugin_name='nougat'), gen_time_str())
|
579 |
os.makedirs(dst)
|
580 |
|
581 |
-
yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在加载NOUGAT... (提示:首次运行需要花费较长时间下载NOUGAT参数)",
|
582 |
chatbot=chatbot, history=history, delay=0)
|
583 |
self.nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}"', os.getcwd(), timeout=3600)
|
584 |
res = glob.glob(os.path.join(dst,'*.mmd'))
|
@@ -601,8 +761,49 @@ def try_install_deps(deps, reload_m=[]):
|
|
601 |
importlib.reload(__import__(m))
|
602 |
|
603 |
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
|
608 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from toolbox import update_ui, get_conf, trimmed_format_exc, get_log_folder
|
2 |
import threading
|
3 |
import os
|
4 |
import logging
|
5 |
|
6 |
def input_clipping(inputs, history, max_token_limit):
|
7 |
import numpy as np
|
8 |
+
from request_llm.bridge_all import model_info
|
9 |
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
10 |
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
11 |
|
12 |
mode = 'input-and-history'
|
13 |
# 当 输入部分的token占比 小于 全文的一半时,只裁剪历史
|
14 |
input_token_num = get_token_num(inputs)
|
15 |
+
if input_token_num < max_token_limit//2:
|
16 |
mode = 'only-history'
|
17 |
max_token_limit = max_token_limit - input_token_num
|
18 |
|
|
|
21 |
n_token = get_token_num('\n'.join(everything))
|
22 |
everything_token = [get_token_num(e) for e in everything]
|
23 |
delta = max(everything_token) // 16 # 截断时的颗粒度
|
24 |
+
|
25 |
while n_token > max_token_limit:
|
26 |
where = np.argmax(everything_token)
|
27 |
encoded = enc.encode(everything[where], disallowed_special=())
|
|
|
38 |
return inputs, history
|
39 |
|
40 |
def request_gpt_model_in_new_thread_with_ui_alive(
|
41 |
+
inputs, inputs_show_user, llm_kwargs,
|
42 |
chatbot, history, sys_prompt, refresh_interval=0.2,
|
43 |
+
handle_token_exceed=True,
|
44 |
retry_times_at_unknown_error=2,
|
45 |
):
|
46 |
"""
|
|
|
63 |
"""
|
64 |
import time
|
65 |
from concurrent.futures import ThreadPoolExecutor
|
66 |
+
from request_llm.bridge_all import predict_no_ui_long_connection
|
67 |
# 用户反馈
|
68 |
chatbot.append([inputs_show_user, ""])
|
69 |
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
70 |
executor = ThreadPoolExecutor(max_workers=16)
|
71 |
mutable = ["", time.time(), ""]
|
|
|
|
|
|
|
72 |
def _req_gpt(inputs, history, sys_prompt):
|
73 |
retry_op = retry_times_at_unknown_error
|
74 |
exceeded_cnt = 0
|
75 |
while True:
|
76 |
# watchdog error
|
77 |
+
if len(mutable) >= 2 and (time.time()-mutable[1]) > 5:
|
78 |
raise RuntimeError("检测到程序终止。")
|
79 |
try:
|
80 |
# 【第一种情况】:顺利完成
|
|
|
89 |
# 【选择处理】 尝试计算比例,尽可能多地保留文本
|
90 |
from toolbox import get_reduce_token_percent
|
91 |
p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
|
92 |
+
MAX_TOKEN = 4096
|
93 |
EXCEED_ALLO = 512 + 512 * exceeded_cnt
|
94 |
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
|
95 |
mutable[0] += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
|
|
|
136 |
if llm.startswith('gpt-'): return True
|
137 |
if llm.startswith('api2d-'): return True
|
138 |
if llm.startswith('azure-'): return True
|
|
|
|
|
139 |
return False
|
140 |
|
141 |
def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
142 |
+
inputs_array, inputs_show_user_array, llm_kwargs,
|
143 |
+
chatbot, history_array, sys_prompt_array,
|
144 |
refresh_interval=0.2, max_workers=-1, scroller_max_len=30,
|
145 |
handle_token_exceed=True, show_user_at_complete=False,
|
146 |
retry_times_at_unknown_error=2,
|
|
|
174 |
"""
|
175 |
import time, random
|
176 |
from concurrent.futures import ThreadPoolExecutor
|
177 |
+
from request_llm.bridge_all import predict_no_ui_long_connection
|
178 |
assert len(inputs_array) == len(history_array)
|
179 |
assert len(inputs_array) == len(sys_prompt_array)
|
180 |
if max_workers == -1: # 读取配置文件
|
181 |
+
try: max_workers, = get_conf('DEFAULT_WORKER_NUM')
|
182 |
except: max_workers = 8
|
183 |
if max_workers <= 0: max_workers = 3
|
184 |
# 屏蔽掉 chatglm的多线程,可能会导致严重卡顿
|
185 |
if not can_multi_process(llm_kwargs['llm_model']):
|
186 |
max_workers = 1
|
187 |
+
|
188 |
executor = ThreadPoolExecutor(max_workers=max_workers)
|
189 |
n_frag = len(inputs_array)
|
190 |
# 用户反馈
|
|
|
193 |
# 跨线程传递
|
194 |
mutable = [["", time.time(), "等待中"] for _ in range(n_frag)]
|
195 |
|
|
|
|
|
|
|
196 |
# 子线程任务
|
197 |
def _req_gpt(index, inputs, history, sys_prompt):
|
198 |
gpt_say = ""
|
199 |
retry_op = retry_times_at_unknown_error
|
200 |
exceeded_cnt = 0
|
201 |
mutable[index][2] = "执行中"
|
|
|
202 |
while True:
|
203 |
# watchdog error
|
204 |
+
if len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > 5:
|
205 |
+
raise RuntimeError("检测到程序终止。")
|
206 |
try:
|
207 |
# 【第一种情况】:顺利完成
|
208 |
+
# time.sleep(10); raise RuntimeError("测试")
|
209 |
gpt_say = predict_no_ui_long_connection(
|
210 |
+
inputs=inputs, llm_kwargs=llm_kwargs, history=history,
|
211 |
sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True
|
212 |
)
|
213 |
mutable[index][2] = "已成功"
|
214 |
return gpt_say
|
215 |
except ConnectionAbortedError as token_exceeded_error:
|
216 |
+
# 【第二种情况】:Token溢出,
|
217 |
if handle_token_exceed:
|
218 |
exceeded_cnt += 1
|
219 |
# 【选择处理】 尝试计算比例,尽可能多地保留文本
|
220 |
from toolbox import get_reduce_token_percent
|
221 |
p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
|
222 |
+
MAX_TOKEN = 4096
|
223 |
EXCEED_ALLO = 512 + 512 * exceeded_cnt
|
224 |
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
|
225 |
gpt_say += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
|
|
|
234 |
return gpt_say # 放弃
|
235 |
except:
|
236 |
# 【第三种情况】:其他错误
|
|
|
237 |
tb_str = '```\n' + trimmed_format_exc() + '```'
|
238 |
print(tb_str)
|
239 |
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
240 |
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
|
241 |
+
if retry_op > 0:
|
242 |
retry_op -= 1
|
243 |
wait = random.randint(5, 20)
|
244 |
if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
|
|
|
250 |
for i in range(wait):
|
251 |
mutable[index][2] = f"{fail_info}等待重试 {wait-i}"; time.sleep(1)
|
252 |
# 开始重试
|
|
|
253 |
mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}"
|
254 |
continue # 返回重试
|
255 |
else:
|
|
|
275 |
# 在前端打印些好玩的东西
|
276 |
for thread_index, _ in enumerate(worker_done):
|
277 |
print_something_really_funny = "[ ...`"+mutable[thread_index][0][-scroller_max_len:].\
|
278 |
+
replace('\n', '').replace('```', '...').replace(
|
279 |
+
' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
|
280 |
observe_win.append(print_something_really_funny)
|
281 |
# 在前端打印些好玩的东西
|
282 |
+
stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
|
283 |
+
if not done else f'`{mutable[thread_index][2]}`\n\n'
|
284 |
for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)])
|
285 |
# 在前端打印些好玩的东西
|
286 |
chatbot[-1] = [chatbot[-1][0], f'多线���操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
|
|
|
294 |
for inputs_show_user, f in zip(inputs_show_user_array, futures):
|
295 |
gpt_res = f.result()
|
296 |
gpt_response_collection.extend([inputs_show_user, gpt_res])
|
297 |
+
|
298 |
# 是否在结束时,在界面上显示结果
|
299 |
if show_user_at_complete:
|
300 |
for inputs_show_user, f in zip(inputs_show_user_array, futures):
|
301 |
gpt_res = f.result()
|
302 |
chatbot.append([inputs_show_user, gpt_res])
|
303 |
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
304 |
+
time.sleep(0.3)
|
305 |
return gpt_response_collection
|
306 |
|
307 |
|
308 |
+
def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
|
309 |
+
def cut(txt_tocut, must_break_at_empty_line): # 递归
|
310 |
+
if get_token_fn(txt_tocut) <= limit:
|
311 |
+
return [txt_tocut]
|
312 |
+
else:
|
313 |
+
lines = txt_tocut.split('\n')
|
314 |
+
estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
|
315 |
+
estimated_line_cut = int(estimated_line_cut)
|
316 |
+
for cnt in reversed(range(estimated_line_cut)):
|
317 |
+
if must_break_at_empty_line:
|
318 |
+
if lines[cnt] != "":
|
319 |
+
continue
|
320 |
+
print(cnt)
|
321 |
+
prev = "\n".join(lines[:cnt])
|
322 |
+
post = "\n".join(lines[cnt:])
|
323 |
+
if get_token_fn(prev) < limit:
|
324 |
+
break
|
325 |
+
if cnt == 0:
|
326 |
+
raise RuntimeError("存在一行极长的文本!")
|
327 |
+
# print(len(post))
|
328 |
+
# 列表递归接龙
|
329 |
+
result = [prev]
|
330 |
+
result.extend(cut(post, must_break_at_empty_line))
|
331 |
+
return result
|
332 |
+
try:
|
333 |
+
return cut(txt, must_break_at_empty_line=True)
|
334 |
+
except RuntimeError:
|
335 |
+
return cut(txt, must_break_at_empty_line=False)
|
336 |
+
|
337 |
+
|
338 |
+
def force_breakdown(txt, limit, get_token_fn):
|
339 |
+
"""
|
340 |
+
当无法用标点、空行分割时,我们用最暴力的方法切割
|
341 |
+
"""
|
342 |
+
for i in reversed(range(len(txt))):
|
343 |
+
if get_token_fn(txt[:i]) < limit:
|
344 |
+
return txt[:i], txt[i:]
|
345 |
+
return "Tiktoken未知错误", "Tiktoken未知错误"
|
346 |
+
|
347 |
+
def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
|
348 |
+
# 递归
|
349 |
+
def cut(txt_tocut, must_break_at_empty_line, break_anyway=False):
|
350 |
+
if get_token_fn(txt_tocut) <= limit:
|
351 |
+
return [txt_tocut]
|
352 |
+
else:
|
353 |
+
lines = txt_tocut.split('\n')
|
354 |
+
estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
|
355 |
+
estimated_line_cut = int(estimated_line_cut)
|
356 |
+
cnt = 0
|
357 |
+
for cnt in reversed(range(estimated_line_cut)):
|
358 |
+
if must_break_at_empty_line:
|
359 |
+
if lines[cnt] != "":
|
360 |
+
continue
|
361 |
+
prev = "\n".join(lines[:cnt])
|
362 |
+
post = "\n".join(lines[cnt:])
|
363 |
+
if get_token_fn(prev) < limit:
|
364 |
+
break
|
365 |
+
if cnt == 0:
|
366 |
+
if break_anyway:
|
367 |
+
prev, post = force_breakdown(txt_tocut, limit, get_token_fn)
|
368 |
+
else:
|
369 |
+
raise RuntimeError(f"存在一行极长的文本!{txt_tocut}")
|
370 |
+
# print(len(post))
|
371 |
+
# 列表递归接龙
|
372 |
+
result = [prev]
|
373 |
+
result.extend(cut(post, must_break_at_empty_line, break_anyway=break_anyway))
|
374 |
+
return result
|
375 |
+
try:
|
376 |
+
# 第1次尝试,将双空行(\n\n)作为切分点
|
377 |
+
return cut(txt, must_break_at_empty_line=True)
|
378 |
+
except RuntimeError:
|
379 |
+
try:
|
380 |
+
# 第2次尝试,将单空行(\n)作为切分点
|
381 |
+
return cut(txt, must_break_at_empty_line=False)
|
382 |
+
except RuntimeError:
|
383 |
+
try:
|
384 |
+
# 第3次尝试,将英文句号(.)作为切分点
|
385 |
+
res = cut(txt.replace('.', '。\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在
|
386 |
+
return [r.replace('。\n', '.') for r in res]
|
387 |
+
except RuntimeError as e:
|
388 |
+
try:
|
389 |
+
# 第4次尝试,将中文句号(。)作为切分点
|
390 |
+
res = cut(txt.replace('。', '。。\n'), must_break_at_empty_line=False)
|
391 |
+
return [r.replace('。。\n', '。') for r in res]
|
392 |
+
except RuntimeError as e:
|
393 |
+
# 第5次尝试,没办法了,随便切一下敷衍吧
|
394 |
+
return cut(txt, must_break_at_empty_line=False, break_anyway=True)
|
395 |
+
|
396 |
+
|
397 |
|
398 |
def read_and_clean_pdf_text(fp):
|
399 |
"""
|
|
|
433 |
if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0
|
434 |
fsize_statiscs[wtf['size']] += len(wtf['text'])
|
435 |
return max(fsize_statiscs, key=fsize_statiscs.get)
|
436 |
+
|
437 |
def ffsize_same(a,b):
|
438 |
"""
|
439 |
提取字体大小是否近似相等
|
|
|
469 |
if index == 0:
|
470 |
page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
|
471 |
'- ', '') for t in text_areas['blocks'] if 'lines' in t]
|
472 |
+
|
473 |
############################## <第 2 步,获取正文主字体> ##################################
|
474 |
try:
|
475 |
fsize_statiscs = {}
|
|
|
485 |
mega_sec = []
|
486 |
sec = []
|
487 |
for index, line in enumerate(meta_line):
|
488 |
+
if index == 0:
|
489 |
sec.append(line[fc])
|
490 |
continue
|
491 |
if REMOVE_FOOT_NOTE:
|
|
|
546 |
return True
|
547 |
else:
|
548 |
return False
|
|
|
|
|
|
|
549 |
for _ in range(100):
|
550 |
for index, block_txt in enumerate(meta_txt):
|
551 |
if starts_with_lowercase_word(block_txt):
|
|
|
579 |
"""
|
580 |
这个函数是用来获取指定目录下所有指定类型(如.md)的文件,并且对于网络上的文件,也可以获取它。
|
581 |
下面是对每个参数和返回值的说明:
|
582 |
+
参数
|
583 |
+
- txt: 路径或网址,表示要搜索的文件或者文件夹路径或网络上的文件。
|
584 |
- type: 字符串,表示要搜索的文件类型。默认是.md。
|
585 |
+
返回值
|
586 |
+
- success: 布尔值,表示函数是否成功执行。
|
587 |
+
- file_manifest: 文件路径列表,里面包含以指定类型为后缀名的所有文件的绝对路径。
|
588 |
- project_folder: 字符串,表示文件所在的文件夹路径。如果是网络上的文件,就是临时文件夹的路径。
|
589 |
该函数详细注释已添加,请确认是否满足您的需要。
|
590 |
"""
|
|
|
596 |
import requests
|
597 |
from toolbox import get_conf
|
598 |
from toolbox import get_log_folder, gen_time_str
|
599 |
+
proxies, = get_conf('proxies')
|
600 |
try:
|
601 |
r = requests.get(txt, proxies=proxies)
|
602 |
except:
|
|
|
624 |
|
625 |
|
626 |
|
627 |
+
|
628 |
+
def Singleton(cls):
|
629 |
+
_instance = {}
|
630 |
+
|
631 |
+
def _singleton(*args, **kargs):
|
632 |
+
if cls not in _instance:
|
633 |
+
_instance[cls] = cls(*args, **kargs)
|
634 |
+
return _instance[cls]
|
635 |
+
|
636 |
+
return _singleton
|
637 |
+
|
638 |
+
|
639 |
+
@Singleton
|
640 |
+
class knowledge_archive_interface():
|
641 |
+
def __init__(self) -> None:
|
642 |
+
self.threadLock = threading.Lock()
|
643 |
+
self.current_id = ""
|
644 |
+
self.kai_path = None
|
645 |
+
self.qa_handle = None
|
646 |
+
self.text2vec_large_chinese = None
|
647 |
+
|
648 |
+
def get_chinese_text2vec(self):
|
649 |
+
if self.text2vec_large_chinese is None:
|
650 |
+
# < -------------------预热文本向量化模组--------------- >
|
651 |
+
from toolbox import ProxyNetworkActivate
|
652 |
+
print('Checking Text2vec ...')
|
653 |
+
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
654 |
+
with ProxyNetworkActivate(): # 临时地激活代理网络
|
655 |
+
self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
|
656 |
+
|
657 |
+
return self.text2vec_large_chinese
|
658 |
+
|
659 |
+
|
660 |
+
def feed_archive(self, file_manifest, id="default"):
|
661 |
+
self.threadLock.acquire()
|
662 |
+
# import uuid
|
663 |
+
self.current_id = id
|
664 |
+
from zh_langchain import construct_vector_store
|
665 |
+
self.qa_handle, self.kai_path = construct_vector_store(
|
666 |
+
vs_id=self.current_id,
|
667 |
+
files=file_manifest,
|
668 |
+
sentence_size=100,
|
669 |
+
history=[],
|
670 |
+
one_conent="",
|
671 |
+
one_content_segmentation="",
|
672 |
+
text2vec = self.get_chinese_text2vec(),
|
673 |
+
)
|
674 |
+
self.threadLock.release()
|
675 |
+
|
676 |
+
def get_current_archive_id(self):
|
677 |
+
return self.current_id
|
678 |
+
|
679 |
+
def get_loaded_file(self):
|
680 |
+
return self.qa_handle.get_loaded_file()
|
681 |
+
|
682 |
+
def answer_with_archive_by_id(self, txt, id):
|
683 |
+
self.threadLock.acquire()
|
684 |
+
if not self.current_id == id:
|
685 |
+
self.current_id = id
|
686 |
+
from zh_langchain import construct_vector_store
|
687 |
+
self.qa_handle, self.kai_path = construct_vector_store(
|
688 |
+
vs_id=self.current_id,
|
689 |
+
files=[],
|
690 |
+
sentence_size=100,
|
691 |
+
history=[],
|
692 |
+
one_conent="",
|
693 |
+
one_content_segmentation="",
|
694 |
+
text2vec = self.get_chinese_text2vec(),
|
695 |
+
)
|
696 |
+
VECTOR_SEARCH_SCORE_THRESHOLD = 0
|
697 |
+
VECTOR_SEARCH_TOP_K = 4
|
698 |
+
CHUNK_SIZE = 512
|
699 |
+
resp, prompt = self.qa_handle.get_knowledge_based_conent_test(
|
700 |
+
query = txt,
|
701 |
+
vs_path = self.kai_path,
|
702 |
+
score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
|
703 |
+
vector_search_top_k=VECTOR_SEARCH_TOP_K,
|
704 |
+
chunk_conent=True,
|
705 |
+
chunk_size=CHUNK_SIZE,
|
706 |
+
text2vec = self.get_chinese_text2vec(),
|
707 |
+
)
|
708 |
+
self.threadLock.release()
|
709 |
+
return resp, prompt
|
710 |
+
|
711 |
@Singleton
|
712 |
class nougat_interface():
|
713 |
def __init__(self):
|
|
|
715 |
|
716 |
def nougat_with_timeout(self, command, cwd, timeout=3600):
|
717 |
import subprocess
|
|
|
718 |
logging.info(f'正在执行命令 {command}')
|
719 |
+
process = subprocess.Popen(command, shell=True, cwd=cwd)
|
|
|
720 |
try:
|
721 |
stdout, stderr = process.communicate(timeout=timeout)
|
722 |
except subprocess.TimeoutExpired:
|
|
|
730 |
def NOUGAT_parse_pdf(self, fp, chatbot, history):
|
731 |
from toolbox import update_ui_lastest_msg
|
732 |
|
733 |
+
yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在排队, 等待线程锁...",
|
734 |
chatbot=chatbot, history=history, delay=0)
|
735 |
self.threadLock.acquire()
|
736 |
import glob, threading, os
|
|
|
738 |
dst = os.path.join(get_log_folder(plugin_name='nougat'), gen_time_str())
|
739 |
os.makedirs(dst)
|
740 |
|
741 |
+
yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在加载NOUGAT... (提示:首次运行需要花费较长时间下载NOUGAT参数)",
|
742 |
chatbot=chatbot, history=history, delay=0)
|
743 |
self.nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}"', os.getcwd(), timeout=3600)
|
744 |
res = glob.glob(os.path.join(dst,'*.mmd'))
|
|
|
761 |
importlib.reload(__import__(m))
|
762 |
|
763 |
|
764 |
+
HTML_CSS = """
|
765 |
+
.row {
|
766 |
+
display: flex;
|
767 |
+
flex-wrap: wrap;
|
768 |
+
}
|
769 |
+
.column {
|
770 |
+
flex: 1;
|
771 |
+
padding: 10px;
|
772 |
+
}
|
773 |
+
.table-header {
|
774 |
+
font-weight: bold;
|
775 |
+
border-bottom: 1px solid black;
|
776 |
+
}
|
777 |
+
.table-row {
|
778 |
+
border-bottom: 1px solid lightgray;
|
779 |
+
}
|
780 |
+
.table-cell {
|
781 |
+
padding: 5px;
|
782 |
+
}
|
783 |
+
"""
|
784 |
+
|
785 |
+
TABLE_CSS = """
|
786 |
+
<div class="row table-row">
|
787 |
+
<div class="column table-cell">REPLACE_A</div>
|
788 |
+
<div class="column table-cell">REPLACE_B</div>
|
789 |
+
</div>
|
790 |
+
"""
|
791 |
+
|
792 |
+
class construct_html():
|
793 |
+
def __init__(self) -> None:
|
794 |
+
self.css = HTML_CSS
|
795 |
+
self.html_string = f'<!DOCTYPE html><head><meta charset="utf-8"><title>翻译结果</title><style>{self.css}</style></head>'
|
796 |
+
|
797 |
+
|
798 |
+
def add_row(self, a, b):
|
799 |
+
tmp = TABLE_CSS
|
800 |
+
from toolbox import markdown_convertion
|
801 |
+
tmp = tmp.replace('REPLACE_A', markdown_convertion(a))
|
802 |
+
tmp = tmp.replace('REPLACE_B', markdown_convertion(b))
|
803 |
+
self.html_string += tmp
|
804 |
+
|
805 |
+
|
806 |
+
def save_file(self, file_name):
|
807 |
+
with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f:
|
808 |
+
f.write(self.html_string.encode('utf-8', 'ignore').decode())
|
809 |
+
return os.path.join(get_log_folder(), file_name)
|
crazy_functions/diagram_fns/file_tree.py
DELETED
@@ -1,122 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
from textwrap import indent
|
3 |
-
|
4 |
-
class FileNode:
|
5 |
-
def __init__(self, name):
|
6 |
-
self.name = name
|
7 |
-
self.children = []
|
8 |
-
self.is_leaf = False
|
9 |
-
self.level = 0
|
10 |
-
self.parenting_ship = []
|
11 |
-
self.comment = ""
|
12 |
-
self.comment_maxlen_show = 50
|
13 |
-
|
14 |
-
@staticmethod
|
15 |
-
def add_linebreaks_at_spaces(string, interval=10):
|
16 |
-
return '\n'.join(string[i:i+interval] for i in range(0, len(string), interval))
|
17 |
-
|
18 |
-
def sanitize_comment(self, comment):
|
19 |
-
if len(comment) > self.comment_maxlen_show: suf = '...'
|
20 |
-
else: suf = ''
|
21 |
-
comment = comment[:self.comment_maxlen_show]
|
22 |
-
comment = comment.replace('\"', '').replace('`', '').replace('\n', '').replace('`', '').replace('$', '')
|
23 |
-
comment = self.add_linebreaks_at_spaces(comment, 10)
|
24 |
-
return '`' + comment + suf + '`'
|
25 |
-
|
26 |
-
def add_file(self, file_path, file_comment):
|
27 |
-
directory_names, file_name = os.path.split(file_path)
|
28 |
-
current_node = self
|
29 |
-
level = 1
|
30 |
-
if directory_names == "":
|
31 |
-
new_node = FileNode(file_name)
|
32 |
-
current_node.children.append(new_node)
|
33 |
-
new_node.is_leaf = True
|
34 |
-
new_node.comment = self.sanitize_comment(file_comment)
|
35 |
-
new_node.level = level
|
36 |
-
current_node = new_node
|
37 |
-
else:
|
38 |
-
dnamesplit = directory_names.split(os.sep)
|
39 |
-
for i, directory_name in enumerate(dnamesplit):
|
40 |
-
found_child = False
|
41 |
-
level += 1
|
42 |
-
for child in current_node.children:
|
43 |
-
if child.name == directory_name:
|
44 |
-
current_node = child
|
45 |
-
found_child = True
|
46 |
-
break
|
47 |
-
if not found_child:
|
48 |
-
new_node = FileNode(directory_name)
|
49 |
-
current_node.children.append(new_node)
|
50 |
-
new_node.level = level - 1
|
51 |
-
current_node = new_node
|
52 |
-
term = FileNode(file_name)
|
53 |
-
term.level = level
|
54 |
-
term.comment = self.sanitize_comment(file_comment)
|
55 |
-
term.is_leaf = True
|
56 |
-
current_node.children.append(term)
|
57 |
-
|
58 |
-
def print_files_recursively(self, level=0, code="R0"):
|
59 |
-
print(' '*level + self.name + ' ' + str(self.is_leaf) + ' ' + str(self.level))
|
60 |
-
for j, child in enumerate(self.children):
|
61 |
-
child.print_files_recursively(level=level+1, code=code+str(j))
|
62 |
-
self.parenting_ship.extend(child.parenting_ship)
|
63 |
-
p1 = f"""{code}[\"🗎{self.name}\"]""" if self.is_leaf else f"""{code}[[\"📁{self.name}\"]]"""
|
64 |
-
p2 = """ --> """
|
65 |
-
p3 = f"""{code+str(j)}[\"🗎{child.name}\"]""" if child.is_leaf else f"""{code+str(j)}[[\"📁{child.name}\"]]"""
|
66 |
-
edge_code = p1 + p2 + p3
|
67 |
-
if edge_code in self.parenting_ship:
|
68 |
-
continue
|
69 |
-
self.parenting_ship.append(edge_code)
|
70 |
-
if self.comment != "":
|
71 |
-
pc1 = f"""{code}[\"🗎{self.name}\"]""" if self.is_leaf else f"""{code}[[\"📁{self.name}\"]]"""
|
72 |
-
pc2 = f""" -.-x """
|
73 |
-
pc3 = f"""C{code}[\"{self.comment}\"]:::Comment"""
|
74 |
-
edge_code = pc1 + pc2 + pc3
|
75 |
-
self.parenting_ship.append(edge_code)
|
76 |
-
|
77 |
-
|
78 |
-
MERMAID_TEMPLATE = r"""
|
79 |
-
```mermaid
|
80 |
-
flowchart LR
|
81 |
-
%% <gpt_academic_hide_mermaid_code> 一个特殊标记,用于在生成mermaid图表时隐藏代码块
|
82 |
-
classDef Comment stroke-dasharray: 5 5
|
83 |
-
subgraph {graph_name}
|
84 |
-
{relationship}
|
85 |
-
end
|
86 |
-
```
|
87 |
-
"""
|
88 |
-
|
89 |
-
def build_file_tree_mermaid_diagram(file_manifest, file_comments, graph_name):
|
90 |
-
# Create the root node
|
91 |
-
file_tree_struct = FileNode("root")
|
92 |
-
# Build the tree structure
|
93 |
-
for file_path, file_comment in zip(file_manifest, file_comments):
|
94 |
-
file_tree_struct.add_file(file_path, file_comment)
|
95 |
-
file_tree_struct.print_files_recursively()
|
96 |
-
cc = "\n".join(file_tree_struct.parenting_ship)
|
97 |
-
ccc = indent(cc, prefix=" "*8)
|
98 |
-
return MERMAID_TEMPLATE.format(graph_name=graph_name, relationship=ccc)
|
99 |
-
|
100 |
-
if __name__ == "__main__":
|
101 |
-
# File manifest
|
102 |
-
file_manifest = [
|
103 |
-
"cradle_void_terminal.ipynb",
|
104 |
-
"tests/test_utils.py",
|
105 |
-
"tests/test_plugins.py",
|
106 |
-
"tests/test_llms.py",
|
107 |
-
"config.py",
|
108 |
-
"build/ChatGLM-6b-onnx-u8s8/chatglm-6b-int8-onnx-merged/model_weights_0.bin",
|
109 |
-
"crazy_functions/latex_fns/latex_actions.py",
|
110 |
-
"crazy_functions/latex_fns/latex_toolbox.py"
|
111 |
-
]
|
112 |
-
file_comments = [
|
113 |
-
"根据位置和名称,可能是一个模块的初始化文件根据位置和名称,可能是一个模块的初始化文件根据位置和名称,可能是一个模块的初始化文件",
|
114 |
-
"包含一些用于文本处理和模型微调的函数和装饰器包含一些用于文本处理和模型微调的函数和装饰器包含一些用于文本处理和模型微调的函数和装饰器",
|
115 |
-
"用于构建HTML报告的类和方法用于构建HTML报告的类和方法��于构建HTML报告的类和方法",
|
116 |
-
"包含了用于文本切分的函数,以及处理PDF文件的示例代码包含了用于文本切分的函数,以及处理PDF文件的示例代码包含了用于文本切分的函数,以及处理PDF文件的示例代码",
|
117 |
-
"用于解析和翻译PDF文件的功能和相关辅助函数用于解析和翻译PDF文件的功能和相关辅助函数用于解析和翻译PDF文件的功能和相关辅助函数",
|
118 |
-
"是一个包的初始化文件,用于初始化包的属性和导入模块是一个包的初始化文件,用于初始化包的属性和导入模块是一个包的初始化文件,用于初始化包的属性和导入模块",
|
119 |
-
"用于加载和分割文件中的文本的通用文件加载器用于加载和分割文件中的文本的通用文件加载器用于加载和分割文件中的文本的通用文件加载器",
|
120 |
-
"包含了用于构建和管理向量数据库的函数和类包含了用于构建和管理向量数据库的函数和类包含了用于构建和管理向量数据库的函数和类",
|
121 |
-
]
|
122 |
-
print(build_file_tree_mermaid_diagram(file_manifest, file_comments, "项目文件树"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/game_fns/game_ascii_art.py
DELETED
@@ -1,42 +0,0 @@
|
|
1 |
-
from toolbox import CatchException, update_ui, update_ui_lastest_msg
|
2 |
-
from crazy_functions.multi_stage.multi_stage_utils import GptAcademicGameBaseState
|
3 |
-
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
4 |
-
from request_llms.bridge_all import predict_no_ui_long_connection
|
5 |
-
from crazy_functions.game_fns.game_utils import get_code_block, is_same_thing
|
6 |
-
import random
|
7 |
-
|
8 |
-
|
9 |
-
class MiniGame_ASCII_Art(GptAcademicGameBaseState):
|
10 |
-
def step(self, prompt, chatbot, history):
|
11 |
-
if self.step_cnt == 0:
|
12 |
-
chatbot.append(["我画你猜(动物)", "请稍等..."])
|
13 |
-
else:
|
14 |
-
if prompt.strip() == 'exit':
|
15 |
-
self.delete_game = True
|
16 |
-
yield from update_ui_lastest_msg(lastmsg=f"谜底是{self.obj},游戏结束。", chatbot=chatbot, history=history, delay=0.)
|
17 |
-
return
|
18 |
-
chatbot.append([prompt, ""])
|
19 |
-
yield from update_ui(chatbot=chatbot, history=history)
|
20 |
-
|
21 |
-
if self.step_cnt == 0:
|
22 |
-
self.lock_plugin(chatbot)
|
23 |
-
self.cur_task = 'draw'
|
24 |
-
|
25 |
-
if self.cur_task == 'draw':
|
26 |
-
avail_obj = ["狗","猫","鸟","鱼","老鼠","蛇"]
|
27 |
-
self.obj = random.choice(avail_obj)
|
28 |
-
inputs = "I want to play a game called Guess the ASCII art. You can draw the ASCII art and I will try to guess it. " + \
|
29 |
-
f"This time you draw a {self.obj}. Note that you must not indicate what you have draw in the text, and you should only produce the ASCII art wrapped by ```. "
|
30 |
-
raw_res = predict_no_ui_long_connection(inputs=inputs, llm_kwargs=self.llm_kwargs, history=[], sys_prompt="")
|
31 |
-
self.cur_task = 'identify user guess'
|
32 |
-
res = get_code_block(raw_res)
|
33 |
-
history += ['', f'the answer is {self.obj}', inputs, res]
|
34 |
-
yield from update_ui_lastest_msg(lastmsg=res, chatbot=chatbot, history=history, delay=0.)
|
35 |
-
|
36 |
-
elif self.cur_task == 'identify user guess':
|
37 |
-
if is_same_thing(self.obj, prompt, self.llm_kwargs):
|
38 |
-
self.delete_game = True
|
39 |
-
yield from update_ui_lastest_msg(lastmsg="你猜对了!", chatbot=chatbot, history=history, delay=0.)
|
40 |
-
else:
|
41 |
-
self.cur_task = 'identify user guess'
|
42 |
-
yield from update_ui_lastest_msg(lastmsg="猜错了,再试试,输入“exit”获取答案。", chatbot=chatbot, history=history, delay=0.)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/game_fns/game_interactive_story.py
DELETED
@@ -1,212 +0,0 @@
|
|
1 |
-
prompts_hs = """ 请以“{headstart}”为开头,编写一个小说的第一幕。
|
2 |
-
|
3 |
-
- 尽量短,不要包含太多情节,因为你接下来将会与用户互动续写下面的情节,要留出足够的互动空间。
|
4 |
-
- 出现人物时,给出人物的名字。
|
5 |
-
- 积极地运用环境描写、人物描写等手法,让读者能够感受到你的故事世界。
|
6 |
-
- 积极地运用修辞手法,比如比喻、拟人、排比、对偶、夸张等等。
|
7 |
-
- 字数要求:第一幕的字数少于300字,且少于2个段落。
|
8 |
-
"""
|
9 |
-
|
10 |
-
prompts_interact = """ 小说的前文回顾:
|
11 |
-
「
|
12 |
-
{previously_on_story}
|
13 |
-
」
|
14 |
-
|
15 |
-
你是一个作家,根据以上的情节,给出4种不同的后续剧情发展方向,每个发展方向都精明扼要地用一句话说明。稍后,我将在这4个选择中,挑选一种剧情发展。
|
16 |
-
|
17 |
-
输出格式例如:
|
18 |
-
1. 后续剧情发展1
|
19 |
-
2. 后续剧情发展2
|
20 |
-
3. 后续剧情发展3
|
21 |
-
4. 后续剧情发展4
|
22 |
-
"""
|
23 |
-
|
24 |
-
|
25 |
-
prompts_resume = """小说的前文回顾:
|
26 |
-
「
|
27 |
-
{previously_on_story}
|
28 |
-
」
|
29 |
-
|
30 |
-
你是一个作家,我们正在互相讨论,确定后续剧情的发展。
|
31 |
-
在以下的剧情发展中,
|
32 |
-
「
|
33 |
-
{choice}
|
34 |
-
」
|
35 |
-
我认为更合理的是:{user_choice}。
|
36 |
-
请在前文的基础上(不要重复前文),围绕我选定的剧情情节,编写小说的下一幕。
|
37 |
-
|
38 |
-
- 禁止杜撰不符合我选择的剧情。
|
39 |
-
- 尽量短,不要包含太多情节,因为你接下来将会与用户互动续写下面的情节,要留出足够的互动空间。
|
40 |
-
- 不要重复前文。
|
41 |
-
- 出现人物时,给出人物的名字。
|
42 |
-
- 积极地运用环境描写、人物描写等手法,让读者能够感受到你的故事世界。
|
43 |
-
- 积极地运用修辞手法,比如比喻、拟人、排比、对偶、夸张等等。
|
44 |
-
- 小说的下一幕字数少于300字,且少于2个段落。
|
45 |
-
"""
|
46 |
-
|
47 |
-
|
48 |
-
prompts_terminate = """小说的前文回顾:
|
49 |
-
「
|
50 |
-
{previously_on_story}
|
51 |
-
」
|
52 |
-
|
53 |
-
你是一个作家,我们正在互相讨论,确定后续剧情的发展。
|
54 |
-
现在,故事该结束了,我认为最合理的故事结局是:{user_choice}。
|
55 |
-
|
56 |
-
请在前文的基础上(不要重复前文),编写小说的最后一幕。
|
57 |
-
|
58 |
-
- 不要重复前文。
|
59 |
-
- 出现人物时,给出人物的名字。
|
60 |
-
- 积极地运用环境描写、人物描写等手法,让读者能够感受到你的故事世界。
|
61 |
-
- 积极地运用修辞手法,比如比喻、拟人、排比、对偶、夸张等等。
|
62 |
-
- 字数要求:最后一幕的字数少于1000字。
|
63 |
-
"""
|
64 |
-
|
65 |
-
|
66 |
-
from toolbox import CatchException, update_ui, update_ui_lastest_msg
|
67 |
-
from crazy_functions.multi_stage.multi_stage_utils import GptAcademicGameBaseState
|
68 |
-
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
69 |
-
from request_llms.bridge_all import predict_no_ui_long_connection
|
70 |
-
from crazy_functions.game_fns.game_utils import get_code_block, is_same_thing
|
71 |
-
import random
|
72 |
-
|
73 |
-
|
74 |
-
class MiniGame_ResumeStory(GptAcademicGameBaseState):
|
75 |
-
story_headstart = [
|
76 |
-
'先行者知道,他现在是全宇宙中唯一的一个人了。',
|
77 |
-
'深夜,一个年轻人穿过天安门广场向纪念堂走去。在二十二世纪编年史中,计算机把他的代号定为M102。',
|
78 |
-
'他知道,这最后一课要提前讲了。又一阵剧痛从肝部袭来,几乎使他晕厥过去。',
|
79 |
-
'在距地球五万光年的远方,在银河系的中心,一场延续了两万年的星际战争已接近尾声。那里的太空中渐渐隐现出一个方形区域,仿佛灿烂的群星的背景被剪出一个方口。',
|
80 |
-
'伊依一行三人乘坐一艘游艇在南太平洋上做吟诗航行,他们的目的地是南极,如果几天后能顺利到达那里,他们将钻出地壳去看诗云。',
|
81 |
-
'很多人生来就会莫名其妙地迷上一样东西,仿佛他的出生就是要和这东西约会似的,正是这样,圆圆迷上了肥皂泡。'
|
82 |
-
]
|
83 |
-
|
84 |
-
|
85 |
-
def begin_game_step_0(self, prompt, chatbot, history):
|
86 |
-
# init game at step 0
|
87 |
-
self.headstart = random.choice(self.story_headstart)
|
88 |
-
self.story = []
|
89 |
-
chatbot.append(["互动写故事", f"这次的故事开头是:{self.headstart}"])
|
90 |
-
self.sys_prompt_ = '你是一个想象力丰富的杰出作家。正在与你的朋友互动,一起写故事,因此你每次写的故事段落应少于300字(结局除外)。'
|
91 |
-
|
92 |
-
|
93 |
-
def generate_story_image(self, story_paragraph):
|
94 |
-
try:
|
95 |
-
from crazy_functions.图片生成 import gen_image
|
96 |
-
prompt_ = predict_no_ui_long_connection(inputs=story_paragraph, llm_kwargs=self.llm_kwargs, history=[], sys_prompt='你需要根据用户给出的小说段落,进行简短的环境描写。要求:80字以内。')
|
97 |
-
image_url, image_path = gen_image(self.llm_kwargs, prompt_, '512x512', model="dall-e-2", quality='standard', style='natural')
|
98 |
-
return f'<br/><div align="center"><img src="file={image_path}"></div>'
|
99 |
-
except:
|
100 |
-
return ''
|
101 |
-
|
102 |
-
def step(self, prompt, chatbot, history):
|
103 |
-
|
104 |
-
"""
|
105 |
-
首先,处理游戏初始化等特殊情况
|
106 |
-
"""
|
107 |
-
if self.step_cnt == 0:
|
108 |
-
self.begin_game_step_0(prompt, chatbot, history)
|
109 |
-
self.lock_plugin(chatbot)
|
110 |
-
self.cur_task = 'head_start'
|
111 |
-
else:
|
112 |
-
if prompt.strip() == 'exit' or prompt.strip() == '结束剧情':
|
113 |
-
# should we terminate game here?
|
114 |
-
self.delete_game = True
|
115 |
-
yield from update_ui_lastest_msg(lastmsg=f"游戏结束。", chatbot=chatbot, history=history, delay=0.)
|
116 |
-
return
|
117 |
-
if '剧情收尾' in prompt:
|
118 |
-
self.cur_task = 'story_terminate'
|
119 |
-
# # well, game resumes
|
120 |
-
# chatbot.append([prompt, ""])
|
121 |
-
# update ui, don't keep the user waiting
|
122 |
-
yield from update_ui(chatbot=chatbot, history=history)
|
123 |
-
|
124 |
-
|
125 |
-
"""
|
126 |
-
处理游戏的主体逻辑
|
127 |
-
"""
|
128 |
-
if self.cur_task == 'head_start':
|
129 |
-
"""
|
130 |
-
这是游戏的第一步
|
131 |
-
"""
|
132 |
-
inputs_ = prompts_hs.format(headstart=self.headstart)
|
133 |
-
history_ = []
|
134 |
-
story_paragraph = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
135 |
-
inputs_, '故事开头', self.llm_kwargs,
|
136 |
-
chatbot, history_, self.sys_prompt_
|
137 |
-
)
|
138 |
-
self.story.append(story_paragraph)
|
139 |
-
# # 配图
|
140 |
-
yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>正在生成插图中 ...', chatbot=chatbot, history=history, delay=0.)
|
141 |
-
yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>'+ self.generate_story_image(story_paragraph), chatbot=chatbot, history=history, delay=0.)
|
142 |
-
|
143 |
-
# # 构建后续剧情引导
|
144 |
-
previously_on_story = ""
|
145 |
-
for s in self.story:
|
146 |
-
previously_on_story += s + '\n'
|
147 |
-
inputs_ = prompts_interact.format(previously_on_story=previously_on_story)
|
148 |
-
history_ = []
|
149 |
-
self.next_choices = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
150 |
-
inputs_, '请在以下几种故事走向中,选择一种(当然,您也可以选择给出其他故事走向):', self.llm_kwargs,
|
151 |
-
chatbot,
|
152 |
-
history_,
|
153 |
-
self.sys_prompt_
|
154 |
-
)
|
155 |
-
self.cur_task = 'user_choice'
|
156 |
-
|
157 |
-
|
158 |
-
elif self.cur_task == 'user_choice':
|
159 |
-
"""
|
160 |
-
根据用户的提示,确定故事的下一步
|
161 |
-
"""
|
162 |
-
if '请在以下几种故事走向中,选择一种' in chatbot[-1][0]: chatbot.pop(-1)
|
163 |
-
previously_on_story = ""
|
164 |
-
for s in self.story:
|
165 |
-
previously_on_story += s + '\n'
|
166 |
-
inputs_ = prompts_resume.format(previously_on_story=previously_on_story, choice=self.next_choices, user_choice=prompt)
|
167 |
-
history_ = []
|
168 |
-
story_paragraph = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
169 |
-
inputs_, f'下一段故事(您的选择是:{prompt})。', self.llm_kwargs,
|
170 |
-
chatbot, history_, self.sys_prompt_
|
171 |
-
)
|
172 |
-
self.story.append(story_paragraph)
|
173 |
-
# # 配图
|
174 |
-
yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>正在生成插图中 ...', chatbot=chatbot, history=history, delay=0.)
|
175 |
-
yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>'+ self.generate_story_image(story_paragraph), chatbot=chatbot, history=history, delay=0.)
|
176 |
-
|
177 |
-
# # 构建后续剧情引导
|
178 |
-
previously_on_story = ""
|
179 |
-
for s in self.story:
|
180 |
-
previously_on_story += s + '\n'
|
181 |
-
inputs_ = prompts_interact.format(previously_on_story=previously_on_story)
|
182 |
-
history_ = []
|
183 |
-
self.next_choices = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
184 |
-
inputs_,
|
185 |
-
'请在以下几种故事走向中,选择一种。当然,您也可以给出您心中的其他故事走向。另外,如果您希望剧情立即收尾,请输入剧情走向,并以“剧情收尾”四个字提示程序。', self.llm_kwargs,
|
186 |
-
chatbot,
|
187 |
-
history_,
|
188 |
-
self.sys_prompt_
|
189 |
-
)
|
190 |
-
self.cur_task = 'user_choice'
|
191 |
-
|
192 |
-
|
193 |
-
elif self.cur_task == 'story_terminate':
|
194 |
-
"""
|
195 |
-
根据用户的提示,确定故事的结局
|
196 |
-
"""
|
197 |
-
previously_on_story = ""
|
198 |
-
for s in self.story:
|
199 |
-
previously_on_story += s + '\n'
|
200 |
-
inputs_ = prompts_terminate.format(previously_on_story=previously_on_story, user_choice=prompt)
|
201 |
-
history_ = []
|
202 |
-
story_paragraph = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
203 |
-
inputs_, f'故事收尾(您的选择是:{prompt})。', self.llm_kwargs,
|
204 |
-
chatbot, history_, self.sys_prompt_
|
205 |
-
)
|
206 |
-
# # 配图
|
207 |
-
yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>正在生成插图中 ...', chatbot=chatbot, history=history, delay=0.)
|
208 |
-
yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>'+ self.generate_story_image(story_paragraph), chatbot=chatbot, history=history, delay=0.)
|
209 |
-
|
210 |
-
# terminate game
|
211 |
-
self.delete_game = True
|
212 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/game_fns/game_utils.py
DELETED
@@ -1,35 +0,0 @@
|
|
1 |
-
|
2 |
-
from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
|
3 |
-
from request_llms.bridge_all import predict_no_ui_long_connection
|
4 |
-
def get_code_block(reply):
|
5 |
-
import re
|
6 |
-
pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
|
7 |
-
matches = re.findall(pattern, reply) # find all code blocks in text
|
8 |
-
if len(matches) == 1:
|
9 |
-
return "```" + matches[0] + "```" # code block
|
10 |
-
raise RuntimeError("GPT is not generating proper code.")
|
11 |
-
|
12 |
-
def is_same_thing(a, b, llm_kwargs):
|
13 |
-
from pydantic import BaseModel, Field
|
14 |
-
class IsSameThing(BaseModel):
|
15 |
-
is_same_thing: bool = Field(description="determine whether two objects are same thing.", default=False)
|
16 |
-
|
17 |
-
def run_gpt_fn(inputs, sys_prompt, history=[]):
|
18 |
-
return predict_no_ui_long_connection(
|
19 |
-
inputs=inputs, llm_kwargs=llm_kwargs,
|
20 |
-
history=history, sys_prompt=sys_prompt, observe_window=[]
|
21 |
-
)
|
22 |
-
|
23 |
-
gpt_json_io = GptJsonIO(IsSameThing)
|
24 |
-
inputs_01 = "Identity whether the user input and the target is the same thing: \n target object: {a} \n user input object: {b} \n\n\n".format(a=a, b=b)
|
25 |
-
inputs_01 += "\n\n\n Note that the user may describe the target object with a different language, e.g. cat and 猫 are the same thing."
|
26 |
-
analyze_res_cot_01 = run_gpt_fn(inputs_01, "", [])
|
27 |
-
|
28 |
-
inputs_02 = inputs_01 + gpt_json_io.format_instructions
|
29 |
-
analyze_res = run_gpt_fn(inputs_02, "", [inputs_01, analyze_res_cot_01])
|
30 |
-
|
31 |
-
try:
|
32 |
-
res = gpt_json_io.generate_output_auto_repair(analyze_res, run_gpt_fn)
|
33 |
-
return res.is_same_thing
|
34 |
-
except JsonStringError as e:
|
35 |
-
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/gen_fns/gen_fns_shared.py
DELETED
@@ -1,70 +0,0 @@
|
|
1 |
-
import time
|
2 |
-
import importlib
|
3 |
-
from toolbox import trimmed_format_exc, gen_time_str, get_log_folder
|
4 |
-
from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, is_the_upload_folder
|
5 |
-
from toolbox import promote_file_to_downloadzone, get_log_folder, update_ui_lastest_msg
|
6 |
-
import multiprocessing
|
7 |
-
|
8 |
-
def get_class_name(class_string):
|
9 |
-
import re
|
10 |
-
# Use regex to extract the class name
|
11 |
-
class_name = re.search(r'class (\w+)\(', class_string).group(1)
|
12 |
-
return class_name
|
13 |
-
|
14 |
-
def try_make_module(code, chatbot):
|
15 |
-
module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
|
16 |
-
fn_path = f'{get_log_folder(plugin_name="gen_plugin_verify")}/{module_file}.py'
|
17 |
-
with open(fn_path, 'w', encoding='utf8') as f: f.write(code)
|
18 |
-
promote_file_to_downloadzone(fn_path, chatbot=chatbot)
|
19 |
-
class_name = get_class_name(code)
|
20 |
-
manager = multiprocessing.Manager()
|
21 |
-
return_dict = manager.dict()
|
22 |
-
p = multiprocessing.Process(target=is_function_successfully_generated, args=(fn_path, class_name, return_dict))
|
23 |
-
# only has 10 seconds to run
|
24 |
-
p.start(); p.join(timeout=10)
|
25 |
-
if p.is_alive(): p.terminate(); p.join()
|
26 |
-
p.close()
|
27 |
-
return return_dict["success"], return_dict['traceback']
|
28 |
-
|
29 |
-
# check is_function_successfully_generated
|
30 |
-
def is_function_successfully_generated(fn_path, class_name, return_dict):
|
31 |
-
return_dict['success'] = False
|
32 |
-
return_dict['traceback'] = ""
|
33 |
-
try:
|
34 |
-
# Create a spec for the module
|
35 |
-
module_spec = importlib.util.spec_from_file_location('example_module', fn_path)
|
36 |
-
# Load the module
|
37 |
-
example_module = importlib.util.module_from_spec(module_spec)
|
38 |
-
module_spec.loader.exec_module(example_module)
|
39 |
-
# Now you can use the module
|
40 |
-
some_class = getattr(example_module, class_name)
|
41 |
-
# Now you can create an instance of the class
|
42 |
-
instance = some_class()
|
43 |
-
return_dict['success'] = True
|
44 |
-
return
|
45 |
-
except:
|
46 |
-
return_dict['traceback'] = trimmed_format_exc()
|
47 |
-
return
|
48 |
-
|
49 |
-
def subprocess_worker(code, file_path, return_dict):
|
50 |
-
return_dict['result'] = None
|
51 |
-
return_dict['success'] = False
|
52 |
-
return_dict['traceback'] = ""
|
53 |
-
try:
|
54 |
-
module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
|
55 |
-
fn_path = f'{get_log_folder(plugin_name="gen_plugin_run")}/{module_file}.py'
|
56 |
-
with open(fn_path, 'w', encoding='utf8') as f: f.write(code)
|
57 |
-
class_name = get_class_name(code)
|
58 |
-
# Create a spec for the module
|
59 |
-
module_spec = importlib.util.spec_from_file_location('example_module', fn_path)
|
60 |
-
# Load the module
|
61 |
-
example_module = importlib.util.module_from_spec(module_spec)
|
62 |
-
module_spec.loader.exec_module(example_module)
|
63 |
-
# Now you can use the module
|
64 |
-
some_class = getattr(example_module, class_name)
|
65 |
-
# Now you can create an instance of the class
|
66 |
-
instance = some_class()
|
67 |
-
return_dict['result'] = instance.run(file_path)
|
68 |
-
return_dict['success'] = True
|
69 |
-
except:
|
70 |
-
return_dict['traceback'] = trimmed_format_exc()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/ipc_fns/mp.py
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
import platform
|
2 |
-
import pickle
|
3 |
-
import multiprocessing
|
4 |
-
|
5 |
-
def run_in_subprocess_wrapper_func(v_args):
|
6 |
-
func, args, kwargs, return_dict, exception_dict = pickle.loads(v_args)
|
7 |
-
import sys
|
8 |
-
try:
|
9 |
-
result = func(*args, **kwargs)
|
10 |
-
return_dict['result'] = result
|
11 |
-
except Exception as e:
|
12 |
-
exc_info = sys.exc_info()
|
13 |
-
exception_dict['exception'] = exc_info
|
14 |
-
|
15 |
-
def run_in_subprocess_with_timeout(func, timeout=60):
|
16 |
-
if platform.system() == 'Linux':
|
17 |
-
def wrapper(*args, **kwargs):
|
18 |
-
return_dict = multiprocessing.Manager().dict()
|
19 |
-
exception_dict = multiprocessing.Manager().dict()
|
20 |
-
v_args = pickle.dumps((func, args, kwargs, return_dict, exception_dict))
|
21 |
-
process = multiprocessing.Process(target=run_in_subprocess_wrapper_func, args=(v_args,))
|
22 |
-
process.start()
|
23 |
-
process.join(timeout)
|
24 |
-
if process.is_alive():
|
25 |
-
process.terminate()
|
26 |
-
raise TimeoutError(f'功能单元{str(func)}未能在规定时间内完成任务')
|
27 |
-
process.close()
|
28 |
-
if 'exception' in exception_dict:
|
29 |
-
# ooops, the subprocess ran into an exception
|
30 |
-
exc_info = exception_dict['exception']
|
31 |
-
raise exc_info[1].with_traceback(exc_info[2])
|
32 |
-
if 'result' in return_dict.keys():
|
33 |
-
# If the subprocess ran successfully, return the result
|
34 |
-
return return_dict['result']
|
35 |
-
return wrapper
|
36 |
-
else:
|
37 |
-
return func
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/latex_fns/latex_actions.py
CHANGED
@@ -1,10 +1,9 @@
|
|
1 |
from toolbox import update_ui, update_ui_lastest_msg, get_log_folder
|
2 |
-
from toolbox import
|
3 |
from .latex_toolbox import PRESERVE, TRANSFORM
|
4 |
from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
|
5 |
from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process
|
6 |
from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout
|
7 |
-
from .latex_toolbox import find_title_and_abs
|
8 |
|
9 |
import os, shutil
|
10 |
import re
|
@@ -91,18 +90,7 @@ class LatexPaperSplit():
|
|
91 |
"项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
|
92 |
# 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
|
93 |
self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
|
94 |
-
|
95 |
-
self.abstract = "unknown"
|
96 |
-
|
97 |
-
def read_title_and_abstract(self, txt):
|
98 |
-
try:
|
99 |
-
title, abstract = find_title_and_abs(txt)
|
100 |
-
if title is not None:
|
101 |
-
self.title = title.replace('\n', ' ').replace('\\\\', ' ').replace(' ', '').replace(' ', '')
|
102 |
-
if abstract is not None:
|
103 |
-
self.abstract = abstract.replace('\n', ' ').replace('\\\\', ' ').replace(' ', '').replace(' ', '')
|
104 |
-
except:
|
105 |
-
pass
|
106 |
|
107 |
def merge_result(self, arr, mode, msg, buggy_lines=[], buggy_line_surgery_n_lines=10):
|
108 |
"""
|
@@ -175,8 +163,9 @@ class LatexPaperFileGroup():
|
|
175 |
self.sp_file_contents = []
|
176 |
self.sp_file_index = []
|
177 |
self.sp_file_tag = []
|
|
|
178 |
# count_token
|
179 |
-
from
|
180 |
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
181 |
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
182 |
self.get_token_num = get_token_num
|
@@ -191,12 +180,13 @@ class LatexPaperFileGroup():
|
|
191 |
self.sp_file_index.append(index)
|
192 |
self.sp_file_tag.append(self.file_paths[index])
|
193 |
else:
|
194 |
-
from
|
195 |
-
segments =
|
196 |
for j, segment in enumerate(segments):
|
197 |
self.sp_file_contents.append(segment)
|
198 |
self.sp_file_index.append(index)
|
199 |
self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
|
|
|
200 |
|
201 |
def merge_result(self):
|
202 |
self.file_result = ["" for _ in range(len(self.file_paths))]
|
@@ -244,8 +234,8 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
|
|
244 |
chatbot.append((f"Latex文件融合完成", f'[Local Message] 正在精细切分latex文件,这需要一段时间计算,文档越长耗时越长,请耐心等待。'))
|
245 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
246 |
lps = LatexPaperSplit()
|
247 |
-
lps.read_title_and_abstract(merged_content)
|
248 |
res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数
|
|
|
249 |
# <-------- 拆分过长的latex片段 ---------->
|
250 |
pfg = LatexPaperFileGroup()
|
251 |
for index, r in enumerate(res):
|
@@ -266,19 +256,12 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
|
|
266 |
|
267 |
else:
|
268 |
# <-------- gpt 多线程请求 ---------->
|
269 |
-
history_array = [[""] for _ in range(n_split)]
|
270 |
-
# LATEX_EXPERIMENTAL, = get_conf('LATEX_EXPERIMENTAL')
|
271 |
-
# if LATEX_EXPERIMENTAL:
|
272 |
-
# paper_meta = f"The paper you processing is `{lps.title}`, a part of the abstraction is `{lps.abstract}`"
|
273 |
-
# paper_meta_max_len = 888
|
274 |
-
# history_array = [[ paper_meta[:paper_meta_max_len] + '...', "Understand, what should I do?"] for _ in range(n_split)]
|
275 |
-
|
276 |
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
277 |
inputs_array=inputs_array,
|
278 |
inputs_show_user_array=inputs_show_user_array,
|
279 |
llm_kwargs=llm_kwargs,
|
280 |
chatbot=chatbot,
|
281 |
-
history_array=
|
282 |
sys_prompt_array=sys_prompt_array,
|
283 |
# max_workers=5, # 并行任务数量限制, 最多同时执行5个, 其他的排队等待
|
284 |
scroller_max_len = 40
|
@@ -402,7 +385,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
|
402 |
result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path
|
403 |
promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
|
404 |
if modified_pdf_success:
|
405 |
-
yield from update_ui_lastest_msg(f'转化PDF编译已经成功,
|
406 |
result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
|
407 |
origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path
|
408 |
if os.path.exists(pj(work_folder, '..', 'translation')):
|
@@ -414,11 +397,8 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
|
414 |
from .latex_toolbox import merge_pdfs
|
415 |
concat_pdf = pj(work_folder_modified, f'comparison.pdf')
|
416 |
merge_pdfs(origin_pdf, result_pdf, concat_pdf)
|
417 |
-
if os.path.exists(pj(work_folder, '..', 'translation')):
|
418 |
-
shutil.copyfile(concat_pdf, pj(work_folder, '..', 'translation', 'comparison.pdf'))
|
419 |
promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
|
420 |
except Exception as e:
|
421 |
-
print(e)
|
422 |
pass
|
423 |
return True # 成功啦
|
424 |
else:
|
@@ -443,7 +423,7 @@ def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
|
|
443 |
# write html
|
444 |
try:
|
445 |
import shutil
|
446 |
-
from
|
447 |
from toolbox import gen_time_str
|
448 |
ch = construct_html()
|
449 |
orig = ""
|
|
|
1 |
from toolbox import update_ui, update_ui_lastest_msg, get_log_folder
|
2 |
+
from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone
|
3 |
from .latex_toolbox import PRESERVE, TRANSFORM
|
4 |
from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
|
5 |
from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process
|
6 |
from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout
|
|
|
7 |
|
8 |
import os, shutil
|
9 |
import re
|
|
|
90 |
"项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
|
91 |
# 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
|
92 |
self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
|
93 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
def merge_result(self, arr, mode, msg, buggy_lines=[], buggy_line_surgery_n_lines=10):
|
96 |
"""
|
|
|
163 |
self.sp_file_contents = []
|
164 |
self.sp_file_index = []
|
165 |
self.sp_file_tag = []
|
166 |
+
|
167 |
# count_token
|
168 |
+
from request_llm.bridge_all import model_info
|
169 |
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
170 |
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
171 |
self.get_token_num = get_token_num
|
|
|
180 |
self.sp_file_index.append(index)
|
181 |
self.sp_file_tag.append(self.file_paths[index])
|
182 |
else:
|
183 |
+
from ..crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
184 |
+
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
|
185 |
for j, segment in enumerate(segments):
|
186 |
self.sp_file_contents.append(segment)
|
187 |
self.sp_file_index.append(index)
|
188 |
self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
|
189 |
+
print('Segmentation: done')
|
190 |
|
191 |
def merge_result(self):
|
192 |
self.file_result = ["" for _ in range(len(self.file_paths))]
|
|
|
234 |
chatbot.append((f"Latex文件融合完成", f'[Local Message] 正在精细切分latex文件,这需要一段时间计算,文档越长耗时越长,请耐心等待。'))
|
235 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
236 |
lps = LatexPaperSplit()
|
|
|
237 |
res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数
|
238 |
+
|
239 |
# <-------- 拆分过长的latex片段 ---------->
|
240 |
pfg = LatexPaperFileGroup()
|
241 |
for index, r in enumerate(res):
|
|
|
256 |
|
257 |
else:
|
258 |
# <-------- gpt 多线程请求 ---------->
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
260 |
inputs_array=inputs_array,
|
261 |
inputs_show_user_array=inputs_show_user_array,
|
262 |
llm_kwargs=llm_kwargs,
|
263 |
chatbot=chatbot,
|
264 |
+
history_array=[[""] for _ in range(n_split)],
|
265 |
sys_prompt_array=sys_prompt_array,
|
266 |
# max_workers=5, # 并行任务数量限制, 最多同时执行5个, 其他的排队等待
|
267 |
scroller_max_len = 40
|
|
|
385 |
result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path
|
386 |
promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
|
387 |
if modified_pdf_success:
|
388 |
+
yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面
|
389 |
result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
|
390 |
origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path
|
391 |
if os.path.exists(pj(work_folder, '..', 'translation')):
|
|
|
397 |
from .latex_toolbox import merge_pdfs
|
398 |
concat_pdf = pj(work_folder_modified, f'comparison.pdf')
|
399 |
merge_pdfs(origin_pdf, result_pdf, concat_pdf)
|
|
|
|
|
400 |
promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
|
401 |
except Exception as e:
|
|
|
402 |
pass
|
403 |
return True # 成功啦
|
404 |
else:
|
|
|
423 |
# write html
|
424 |
try:
|
425 |
import shutil
|
426 |
+
from ..crazy_utils import construct_html
|
427 |
from toolbox import gen_time_str
|
428 |
ch = construct_html()
|
429 |
orig = ""
|
crazy_functions/latex_fns/latex_toolbox.py
CHANGED
@@ -1,18 +1,15 @@
|
|
1 |
import os, shutil
|
2 |
import re
|
3 |
import numpy as np
|
4 |
-
|
5 |
PRESERVE = 0
|
6 |
TRANSFORM = 1
|
7 |
|
8 |
pj = os.path.join
|
9 |
|
10 |
-
|
11 |
-
class LinkedListNode:
|
12 |
"""
|
13 |
Linked List Node
|
14 |
"""
|
15 |
-
|
16 |
def __init__(self, string, preserve=True) -> None:
|
17 |
self.string = string
|
18 |
self.preserve = preserve
|
@@ -21,47 +18,41 @@ class LinkedListNode:
|
|
21 |
# self.begin_line = 0
|
22 |
# self.begin_char = 0
|
23 |
|
24 |
-
|
25 |
def convert_to_linklist(text, mask):
|
26 |
root = LinkedListNode("", preserve=True)
|
27 |
current_node = root
|
28 |
for c, m, i in zip(text, mask, range(len(text))):
|
29 |
-
if (m
|
30 |
-
m
|
31 |
-
):
|
32 |
# add
|
33 |
current_node.string += c
|
34 |
else:
|
35 |
-
current_node.next = LinkedListNode(c, preserve=(m
|
36 |
current_node = current_node.next
|
37 |
return root
|
38 |
|
39 |
-
|
40 |
def post_process(root):
|
41 |
# 修复括号
|
42 |
node = root
|
43 |
while True:
|
44 |
string = node.string
|
45 |
-
if node.preserve:
|
46 |
node = node.next
|
47 |
-
if node is None:
|
48 |
-
break
|
49 |
continue
|
50 |
-
|
51 |
def break_check(string):
|
52 |
-
str_stack = [""]
|
53 |
for i, c in enumerate(string):
|
54 |
-
if c ==
|
55 |
-
str_stack.append(
|
56 |
-
elif c ==
|
57 |
if len(str_stack) == 1:
|
58 |
-
print(
|
59 |
return i
|
60 |
str_stack.pop(-1)
|
61 |
else:
|
62 |
str_stack[-1] += c
|
63 |
return -1
|
64 |
-
|
65 |
bp = break_check(string)
|
66 |
|
67 |
if bp == -1:
|
@@ -78,66 +69,51 @@ def post_process(root):
|
|
78 |
node.next = q
|
79 |
|
80 |
node = node.next
|
81 |
-
if node is None:
|
82 |
-
break
|
83 |
|
84 |
# 屏蔽空行和太短的句子
|
85 |
node = root
|
86 |
while True:
|
87 |
-
if len(node.string.strip(
|
88 |
-
|
89 |
-
if len(node.string.strip("\n").strip("")) < 42:
|
90 |
-
node.preserve = True
|
91 |
node = node.next
|
92 |
-
if node is None:
|
93 |
-
break
|
94 |
node = root
|
95 |
while True:
|
96 |
if node.next and node.preserve and node.next.preserve:
|
97 |
node.string += node.next.string
|
98 |
node.next = node.next.next
|
99 |
node = node.next
|
100 |
-
if node is None:
|
101 |
-
break
|
102 |
|
103 |
# 将前后断行符脱离
|
104 |
node = root
|
105 |
prev_node = None
|
106 |
while True:
|
107 |
if not node.preserve:
|
108 |
-
lstriped_ = node.string.lstrip().lstrip(
|
109 |
-
if (
|
110 |
-
|
111 |
-
and (prev_node.preserve)
|
112 |
-
and (len(lstriped_) != len(node.string))
|
113 |
-
):
|
114 |
-
prev_node.string += node.string[: -len(lstriped_)]
|
115 |
node.string = lstriped_
|
116 |
-
rstriped_ = node.string.rstrip().rstrip(
|
117 |
-
if (
|
118 |
-
|
119 |
-
and (node.next.preserve)
|
120 |
-
and (len(rstriped_) != len(node.string))
|
121 |
-
):
|
122 |
-
node.next.string = node.string[len(rstriped_) :] + node.next.string
|
123 |
node.string = rstriped_
|
124 |
-
#
|
125 |
prev_node = node
|
126 |
node = node.next
|
127 |
-
if node is None:
|
128 |
-
break
|
129 |
|
130 |
# 标注节点的行数范围
|
131 |
node = root
|
132 |
n_line = 0
|
133 |
expansion = 2
|
134 |
while True:
|
135 |
-
n_l = node.string.count(
|
136 |
-
node.range = [n_line
|
137 |
-
n_line = n_line
|
138 |
node = node.next
|
139 |
-
if node is None:
|
140 |
-
break
|
141 |
return root
|
142 |
|
143 |
|
@@ -152,125 +128,97 @@ def set_forbidden_text(text, mask, pattern, flags=0):
|
|
152 |
"""
|
153 |
Add a preserve text area in this paper
|
154 |
e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
|
155 |
-
you can mask out (mask = PRESERVE so that text become untouchable for GPT)
|
156 |
everything between "\begin{equation}" and "\end{equation}"
|
157 |
"""
|
158 |
-
if isinstance(pattern, list):
|
159 |
-
pattern = "|".join(pattern)
|
160 |
pattern_compile = re.compile(pattern, flags)
|
161 |
for res in pattern_compile.finditer(text):
|
162 |
-
mask[res.span()[0]
|
163 |
return text, mask
|
164 |
|
165 |
-
|
166 |
def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
|
167 |
"""
|
168 |
Move area out of preserve area (make text editable for GPT)
|
169 |
-
count the number of the braces so as to catch compelete text area.
|
170 |
e.g.
|
171 |
-
\begin{abstract} blablablablablabla. \end{abstract}
|
172 |
"""
|
173 |
-
if isinstance(pattern, list):
|
174 |
-
pattern = "|".join(pattern)
|
175 |
pattern_compile = re.compile(pattern, flags)
|
176 |
for res in pattern_compile.finditer(text):
|
177 |
if not forbid_wrapper:
|
178 |
-
mask[res.span()[0]
|
179 |
else:
|
180 |
-
mask[res.regs[0][0]
|
181 |
-
mask[res.regs[1][0]
|
182 |
-
mask[res.regs[1][1]
|
183 |
return text, mask
|
184 |
|
185 |
-
|
186 |
def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
|
187 |
"""
|
188 |
Add a preserve text area in this paper (text become untouchable for GPT).
|
189 |
-
count the number of the braces so as to catch compelete text area.
|
190 |
e.g.
|
191 |
-
\caption{blablablablabla\texbf{blablabla}blablabla.}
|
192 |
"""
|
193 |
pattern_compile = re.compile(pattern, flags)
|
194 |
for res in pattern_compile.finditer(text):
|
195 |
brace_level = -1
|
196 |
p = begin = end = res.regs[0][0]
|
197 |
-
for _ in range(1024
|
198 |
-
if text[p] ==
|
199 |
-
|
200 |
-
elif text[p] ==
|
201 |
-
brace_level -= 1
|
202 |
-
elif text[p] == "{":
|
203 |
-
brace_level += 1
|
204 |
p += 1
|
205 |
-
end = p
|
206 |
mask[begin:end] = PRESERVE
|
207 |
return text, mask
|
208 |
|
209 |
-
|
210 |
-
def reverse_forbidden_text_careful_brace(
|
211 |
-
text, mask, pattern, flags=0, forbid_wrapper=True
|
212 |
-
):
|
213 |
"""
|
214 |
Move area out of preserve area (make text editable for GPT)
|
215 |
-
count the number of the braces so as to catch compelete text area.
|
216 |
e.g.
|
217 |
-
\caption{blablablablabla\texbf{blablabla}blablabla.}
|
218 |
"""
|
219 |
pattern_compile = re.compile(pattern, flags)
|
220 |
for res in pattern_compile.finditer(text):
|
221 |
brace_level = 0
|
222 |
p = begin = end = res.regs[1][0]
|
223 |
-
for _ in range(1024
|
224 |
-
if text[p] ==
|
225 |
-
|
226 |
-
elif text[p] ==
|
227 |
-
brace_level -= 1
|
228 |
-
elif text[p] == "{":
|
229 |
-
brace_level += 1
|
230 |
p += 1
|
231 |
end = p
|
232 |
mask[begin:end] = TRANSFORM
|
233 |
if forbid_wrapper:
|
234 |
-
mask[res.regs[0][0]
|
235 |
-
mask[end
|
236 |
return text, mask
|
237 |
|
238 |
-
|
239 |
def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
|
240 |
"""
|
241 |
Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
|
242 |
Add it to preserve area
|
243 |
"""
|
244 |
pattern_compile = re.compile(pattern, flags)
|
245 |
-
|
246 |
def search_with_line_limit(text, mask):
|
247 |
for res in pattern_compile.finditer(text):
|
248 |
cmd = res.group(1) # begin{what}
|
249 |
-
this = res.group(2)
|
250 |
-
this_mask = mask[res.regs[2][0]
|
251 |
-
white_list = [
|
252 |
-
|
253 |
-
|
254 |
-
"lemma",
|
255 |
-
"definition",
|
256 |
-
"sproof",
|
257 |
-
"em",
|
258 |
-
"emph",
|
259 |
-
"textit",
|
260 |
-
"textbf",
|
261 |
-
"itemize",
|
262 |
-
"enumerate",
|
263 |
-
]
|
264 |
-
if (cmd in white_list) or this.count(
|
265 |
-
"\n"
|
266 |
-
) >= limit_n_lines: # use a magical number 42
|
267 |
this, this_mask = search_with_line_limit(this, this_mask)
|
268 |
-
mask[res.regs[2][0]
|
269 |
else:
|
270 |
-
mask[res.regs[0][0]
|
271 |
return text, mask
|
|
|
272 |
|
273 |
-
return search_with_line_limit(text, mask)
|
274 |
|
275 |
|
276 |
"""
|
@@ -279,7 +227,6 @@ Latex Merge File
|
|
279 |
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
280 |
"""
|
281 |
|
282 |
-
|
283 |
def find_main_tex_file(file_manifest, mode):
|
284 |
"""
|
285 |
在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。
|
@@ -287,36 +234,27 @@ def find_main_tex_file(file_manifest, mode):
|
|
287 |
"""
|
288 |
canidates = []
|
289 |
for texf in file_manifest:
|
290 |
-
if os.path.basename(texf).startswith(
|
291 |
continue
|
292 |
-
with open(texf,
|
293 |
file_content = f.read()
|
294 |
-
if r
|
295 |
canidates.append(texf)
|
296 |
else:
|
297 |
continue
|
298 |
|
299 |
if len(canidates) == 0:
|
300 |
-
raise RuntimeError(
|
301 |
elif len(canidates) == 1:
|
302 |
return canidates[0]
|
303 |
-
else:
|
304 |
canidates_score = []
|
305 |
# 给出一些判定模板文档的词作为扣分项
|
306 |
-
unexpected_words = [
|
307 |
-
|
308 |
-
"manuscript",
|
309 |
-
"Guidelines",
|
310 |
-
"font",
|
311 |
-
"citations",
|
312 |
-
"rejected",
|
313 |
-
"blind review",
|
314 |
-
"reviewers",
|
315 |
-
]
|
316 |
-
expected_words = ["\\input", "\\ref", "\\cite"]
|
317 |
for texf in canidates:
|
318 |
canidates_score.append(0)
|
319 |
-
with open(texf,
|
320 |
file_content = f.read()
|
321 |
file_content = rm_comments(file_content)
|
322 |
for uw in unexpected_words:
|
@@ -325,10 +263,9 @@ def find_main_tex_file(file_manifest, mode):
|
|
325 |
for uw in expected_words:
|
326 |
if uw in file_content:
|
327 |
canidates_score[-1] += 1
|
328 |
-
select = np.argmax(canidates_score)
|
329 |
return canidates[select]
|
330 |
-
|
331 |
-
|
332 |
def rm_comments(main_file):
|
333 |
new_file_remove_comment_lines = []
|
334 |
for l in main_file.splitlines():
|
@@ -337,39 +274,30 @@ def rm_comments(main_file):
|
|
337 |
pass
|
338 |
else:
|
339 |
new_file_remove_comment_lines.append(l)
|
340 |
-
main_file =
|
341 |
# main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令
|
342 |
-
main_file = re.sub(r
|
343 |
return main_file
|
344 |
|
345 |
-
|
346 |
def find_tex_file_ignore_case(fp):
|
347 |
dir_name = os.path.dirname(fp)
|
348 |
base_name = os.path.basename(fp)
|
349 |
# 如果输入的文件路径是正确的
|
350 |
-
if os.path.
|
351 |
-
return pj(dir_name, base_name)
|
352 |
# 如果不正确,试着加上.tex后缀试试
|
353 |
-
if not base_name.endswith(
|
354 |
-
|
355 |
-
if os.path.isfile(pj(dir_name, base_name)):
|
356 |
-
return pj(dir_name, base_name)
|
357 |
# 如果还找不到,解除大小写限制,再试一次
|
358 |
import glob
|
359 |
-
|
360 |
-
for f in glob.glob(dir_name + "/*.tex"):
|
361 |
base_name_s = os.path.basename(fp)
|
362 |
base_name_f = os.path.basename(f)
|
363 |
-
if base_name_s.lower() == base_name_f.lower():
|
364 |
-
return f
|
365 |
# 试着加上.tex后缀试试
|
366 |
-
if not base_name_s.endswith(
|
367 |
-
|
368 |
-
if base_name_s.lower() == base_name_f.lower():
|
369 |
-
return f
|
370 |
return None
|
371 |
|
372 |
-
|
373 |
def merge_tex_files_(project_foler, main_file, mode):
|
374 |
"""
|
375 |
Merge Tex project recrusively
|
@@ -380,51 +308,13 @@ def merge_tex_files_(project_foler, main_file, mode):
|
|
380 |
fp = os.path.join(project_foler, f)
|
381 |
fp_ = find_tex_file_ignore_case(fp)
|
382 |
if fp_:
|
383 |
-
|
384 |
-
with open(fp_, "r", encoding="utf-8", errors="replace") as fx:
|
385 |
-
c = fx.read()
|
386 |
-
except:
|
387 |
-
c = f"\n\nWarning from GPT-Academic: LaTex source file is missing!\n\n"
|
388 |
else:
|
389 |
-
raise RuntimeError(f
|
390 |
c = merge_tex_files_(project_foler, c, mode)
|
391 |
-
main_file = main_file[:
|
392 |
return main_file
|
393 |
|
394 |
-
|
395 |
-
def find_title_and_abs(main_file):
|
396 |
-
def extract_abstract_1(text):
|
397 |
-
pattern = r"\\abstract\{(.*?)\}"
|
398 |
-
match = re.search(pattern, text, re.DOTALL)
|
399 |
-
if match:
|
400 |
-
return match.group(1)
|
401 |
-
else:
|
402 |
-
return None
|
403 |
-
|
404 |
-
def extract_abstract_2(text):
|
405 |
-
pattern = r"\\begin\{abstract\}(.*?)\\end\{abstract\}"
|
406 |
-
match = re.search(pattern, text, re.DOTALL)
|
407 |
-
if match:
|
408 |
-
return match.group(1)
|
409 |
-
else:
|
410 |
-
return None
|
411 |
-
|
412 |
-
def extract_title(string):
|
413 |
-
pattern = r"\\title\{(.*?)\}"
|
414 |
-
match = re.search(pattern, string, re.DOTALL)
|
415 |
-
|
416 |
-
if match:
|
417 |
-
return match.group(1)
|
418 |
-
else:
|
419 |
-
return None
|
420 |
-
|
421 |
-
abstract = extract_abstract_1(main_file)
|
422 |
-
if abstract is None:
|
423 |
-
abstract = extract_abstract_2(main_file)
|
424 |
-
title = extract_title(main_file)
|
425 |
-
return title, abstract
|
426 |
-
|
427 |
-
|
428 |
def merge_tex_files(project_foler, main_file, mode):
|
429 |
"""
|
430 |
Merge Tex project recrusively
|
@@ -434,105 +324,46 @@ def merge_tex_files(project_foler, main_file, mode):
|
|
434 |
main_file = merge_tex_files_(project_foler, main_file, mode)
|
435 |
main_file = rm_comments(main_file)
|
436 |
|
437 |
-
if mode ==
|
438 |
# find paper documentclass
|
439 |
-
pattern = re.compile(r
|
440 |
match = pattern.search(main_file)
|
441 |
assert match is not None, "Cannot find documentclass statement!"
|
442 |
position = match.end()
|
443 |
-
add_ctex =
|
444 |
-
add_url =
|
445 |
main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
|
446 |
# fontset=windows
|
447 |
import platform
|
448 |
-
|
449 |
-
main_file = re.sub(
|
450 |
-
r"\\documentclass\[(.*?)\]{(.*?)}",
|
451 |
-
r"\\documentclass[\1,fontset=windows,UTF8]{\2}",
|
452 |
-
main_file,
|
453 |
-
)
|
454 |
-
main_file = re.sub(
|
455 |
-
r"\\documentclass{(.*?)}",
|
456 |
-
r"\\documentclass[fontset=windows,UTF8]{\1}",
|
457 |
-
main_file,
|
458 |
-
)
|
459 |
# find paper abstract
|
460 |
-
pattern_opt1 = re.compile(r
|
461 |
pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
|
462 |
match_opt1 = pattern_opt1.search(main_file)
|
463 |
match_opt2 = pattern_opt2.search(main_file)
|
464 |
-
|
465 |
-
# "Cannot find paper abstract section!"
|
466 |
-
main_file = insert_abstract(main_file)
|
467 |
-
match_opt1 = pattern_opt1.search(main_file)
|
468 |
-
match_opt2 = pattern_opt2.search(main_file)
|
469 |
-
assert (match_opt1 is not None) or (
|
470 |
-
match_opt2 is not None
|
471 |
-
), "Cannot find paper abstract section!"
|
472 |
return main_file
|
473 |
|
474 |
|
475 |
-
insert_missing_abs_str = r"""
|
476 |
-
\begin{abstract}
|
477 |
-
The GPT-Academic program cannot find abstract section in this paper.
|
478 |
-
\end{abstract}
|
479 |
-
"""
|
480 |
-
|
481 |
-
|
482 |
-
def insert_abstract(tex_content):
|
483 |
-
if "\\maketitle" in tex_content:
|
484 |
-
# find the position of "\maketitle"
|
485 |
-
find_index = tex_content.index("\\maketitle")
|
486 |
-
# find the nearest ending line
|
487 |
-
end_line_index = tex_content.find("\n", find_index)
|
488 |
-
# insert "abs_str" on the next line
|
489 |
-
modified_tex = (
|
490 |
-
tex_content[: end_line_index + 1]
|
491 |
-
+ "\n\n"
|
492 |
-
+ insert_missing_abs_str
|
493 |
-
+ "\n\n"
|
494 |
-
+ tex_content[end_line_index + 1 :]
|
495 |
-
)
|
496 |
-
return modified_tex
|
497 |
-
elif r"\begin{document}" in tex_content:
|
498 |
-
# find the position of "\maketitle"
|
499 |
-
find_index = tex_content.index(r"\begin{document}")
|
500 |
-
# find the nearest ending line
|
501 |
-
end_line_index = tex_content.find("\n", find_index)
|
502 |
-
# insert "abs_str" on the next line
|
503 |
-
modified_tex = (
|
504 |
-
tex_content[: end_line_index + 1]
|
505 |
-
+ "\n\n"
|
506 |
-
+ insert_missing_abs_str
|
507 |
-
+ "\n\n"
|
508 |
-
+ tex_content[end_line_index + 1 :]
|
509 |
-
)
|
510 |
-
return modified_tex
|
511 |
-
else:
|
512 |
-
return tex_content
|
513 |
-
|
514 |
-
|
515 |
"""
|
516 |
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
517 |
Post process
|
518 |
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
519 |
"""
|
520 |
-
|
521 |
-
|
522 |
def mod_inbraket(match):
|
523 |
"""
|
524 |
-
为啥chatgpt会把cite里面的逗号换成中文逗号呀
|
525 |
"""
|
526 |
# get the matched string
|
527 |
cmd = match.group(1)
|
528 |
str_to_modify = match.group(2)
|
529 |
# modify the matched string
|
530 |
-
str_to_modify = str_to_modify.replace(
|
531 |
-
str_to_modify = str_to_modify.replace(
|
532 |
# str_to_modify = 'BOOM'
|
533 |
return "\\" + cmd + "{" + str_to_modify + "}"
|
534 |
|
535 |
-
|
536 |
def fix_content(final_tex, node_string):
|
537 |
"""
|
538 |
Fix common GPT errors to increase success rate
|
@@ -543,10 +374,10 @@ def fix_content(final_tex, node_string):
|
|
543 |
final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
|
544 |
|
545 |
if "Traceback" in final_tex and "[Local Message]" in final_tex:
|
546 |
-
final_tex = node_string
|
547 |
-
if node_string.count(
|
548 |
-
final_tex = node_string
|
549 |
-
if node_string.count(
|
550 |
# walk and replace any _ without \
|
551 |
final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
|
552 |
|
@@ -554,32 +385,24 @@ def fix_content(final_tex, node_string):
|
|
554 |
# this function count the number of { and }
|
555 |
brace_level = 0
|
556 |
for c in string:
|
557 |
-
if c == "{":
|
558 |
-
|
559 |
-
elif c == "}":
|
560 |
-
brace_level -= 1
|
561 |
return brace_level
|
562 |
-
|
563 |
def join_most(tex_t, tex_o):
|
564 |
# this function join translated string and original string when something goes wrong
|
565 |
p_t = 0
|
566 |
p_o = 0
|
567 |
-
|
568 |
def find_next(string, chars, begin):
|
569 |
p = begin
|
570 |
while p < len(string):
|
571 |
-
if string[p] in chars:
|
572 |
-
return p, string[p]
|
573 |
p += 1
|
574 |
return None, None
|
575 |
-
|
576 |
while True:
|
577 |
-
res1, char = find_next(tex_o, [
|
578 |
-
if res1 is None:
|
579 |
-
break
|
580 |
res2, char = find_next(tex_t, [char], p_t)
|
581 |
-
if res2 is None:
|
582 |
-
break
|
583 |
p_o = res1 + 1
|
584 |
p_t = res2 + 1
|
585 |
return tex_t[:p_t] + tex_o[p_o:]
|
@@ -588,14 +411,10 @@ def fix_content(final_tex, node_string):
|
|
588 |
# 出问题了,还原部分原文,保证括号正确
|
589 |
final_tex = join_most(final_tex, node_string)
|
590 |
return final_tex
|
591 |
-
|
592 |
-
|
593 |
def compile_latex_with_timeout(command, cwd, timeout=60):
|
594 |
import subprocess
|
595 |
-
|
596 |
-
process = subprocess.Popen(
|
597 |
-
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd
|
598 |
-
)
|
599 |
try:
|
600 |
stdout, stderr = process.communicate(timeout=timeout)
|
601 |
except subprocess.TimeoutExpired:
|
@@ -606,51 +425,15 @@ def compile_latex_with_timeout(command, cwd, timeout=60):
|
|
606 |
return True
|
607 |
|
608 |
|
609 |
-
def run_in_subprocess_wrapper_func(func, args, kwargs, return_dict, exception_dict):
|
610 |
-
import sys
|
611 |
-
|
612 |
-
try:
|
613 |
-
result = func(*args, **kwargs)
|
614 |
-
return_dict["result"] = result
|
615 |
-
except Exception as e:
|
616 |
-
exc_info = sys.exc_info()
|
617 |
-
exception_dict["exception"] = exc_info
|
618 |
-
|
619 |
-
|
620 |
-
def run_in_subprocess(func):
|
621 |
-
import multiprocessing
|
622 |
-
|
623 |
-
def wrapper(*args, **kwargs):
|
624 |
-
return_dict = multiprocessing.Manager().dict()
|
625 |
-
exception_dict = multiprocessing.Manager().dict()
|
626 |
-
process = multiprocessing.Process(
|
627 |
-
target=run_in_subprocess_wrapper_func,
|
628 |
-
args=(func, args, kwargs, return_dict, exception_dict),
|
629 |
-
)
|
630 |
-
process.start()
|
631 |
-
process.join()
|
632 |
-
process.close()
|
633 |
-
if "exception" in exception_dict:
|
634 |
-
# ooops, the subprocess ran into an exception
|
635 |
-
exc_info = exception_dict["exception"]
|
636 |
-
raise exc_info[1].with_traceback(exc_info[2])
|
637 |
-
if "result" in return_dict.keys():
|
638 |
-
# If the subprocess ran successfully, return the result
|
639 |
-
return return_dict["result"]
|
640 |
-
|
641 |
-
return wrapper
|
642 |
-
|
643 |
-
|
644 |
-
def _merge_pdfs(pdf1_path, pdf2_path, output_path):
|
645 |
-
import PyPDF2 # PyPDF2这个库有严重的内存泄露问题,把它放到子进程中运行,从而方便内存的释放
|
646 |
|
|
|
|
|
647 |
Percent = 0.95
|
648 |
-
# raise RuntimeError('PyPDF2 has a serious memory leak problem, please use other tools to merge PDF files.')
|
649 |
# Open the first PDF file
|
650 |
-
with open(pdf1_path,
|
651 |
pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)
|
652 |
# Open the second PDF file
|
653 |
-
with open(pdf2_path,
|
654 |
pdf2_reader = PyPDF2.PdfFileReader(pdf2_file)
|
655 |
# Create a new PDF file to store the merged pages
|
656 |
output_writer = PyPDF2.PdfFileWriter()
|
@@ -670,25 +453,12 @@ def _merge_pdfs(pdf1_path, pdf2_path, output_path):
|
|
670 |
page2 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
|
671 |
# Create a new empty page with double width
|
672 |
new_page = PyPDF2.PageObject.createBlankPage(
|
673 |
-
width=int(
|
674 |
-
|
675 |
-
+ int(page2.mediaBox.getWidth()) * Percent
|
676 |
-
),
|
677 |
-
height=max(page1.mediaBox.getHeight(), page2.mediaBox.getHeight()),
|
678 |
)
|
679 |
new_page.mergeTranslatedPage(page1, 0, 0)
|
680 |
-
new_page.mergeTranslatedPage(
|
681 |
-
page2,
|
682 |
-
int(
|
683 |
-
int(page1.mediaBox.getWidth())
|
684 |
-
- int(page2.mediaBox.getWidth()) * (1 - Percent)
|
685 |
-
),
|
686 |
-
0,
|
687 |
-
)
|
688 |
output_writer.addPage(new_page)
|
689 |
# Save the merged PDF file
|
690 |
-
with open(output_path,
|
691 |
output_writer.write(output_file)
|
692 |
-
|
693 |
-
|
694 |
-
merge_pdfs = run_in_subprocess(_merge_pdfs) # PyPDF2这个库有严重的内存泄露问题,把它放到子进程中运行,从而方便内存的释放
|
|
|
1 |
import os, shutil
|
2 |
import re
|
3 |
import numpy as np
|
|
|
4 |
PRESERVE = 0
|
5 |
TRANSFORM = 1
|
6 |
|
7 |
pj = os.path.join
|
8 |
|
9 |
+
class LinkedListNode():
|
|
|
10 |
"""
|
11 |
Linked List Node
|
12 |
"""
|
|
|
13 |
def __init__(self, string, preserve=True) -> None:
|
14 |
self.string = string
|
15 |
self.preserve = preserve
|
|
|
18 |
# self.begin_line = 0
|
19 |
# self.begin_char = 0
|
20 |
|
|
|
21 |
def convert_to_linklist(text, mask):
|
22 |
root = LinkedListNode("", preserve=True)
|
23 |
current_node = root
|
24 |
for c, m, i in zip(text, mask, range(len(text))):
|
25 |
+
if (m==PRESERVE and current_node.preserve) \
|
26 |
+
or (m==TRANSFORM and not current_node.preserve):
|
|
|
27 |
# add
|
28 |
current_node.string += c
|
29 |
else:
|
30 |
+
current_node.next = LinkedListNode(c, preserve=(m==PRESERVE))
|
31 |
current_node = current_node.next
|
32 |
return root
|
33 |
|
|
|
34 |
def post_process(root):
|
35 |
# 修复括号
|
36 |
node = root
|
37 |
while True:
|
38 |
string = node.string
|
39 |
+
if node.preserve:
|
40 |
node = node.next
|
41 |
+
if node is None: break
|
|
|
42 |
continue
|
|
|
43 |
def break_check(string):
|
44 |
+
str_stack = [""] # (lv, index)
|
45 |
for i, c in enumerate(string):
|
46 |
+
if c == '{':
|
47 |
+
str_stack.append('{')
|
48 |
+
elif c == '}':
|
49 |
if len(str_stack) == 1:
|
50 |
+
print('stack fix')
|
51 |
return i
|
52 |
str_stack.pop(-1)
|
53 |
else:
|
54 |
str_stack[-1] += c
|
55 |
return -1
|
|
|
56 |
bp = break_check(string)
|
57 |
|
58 |
if bp == -1:
|
|
|
69 |
node.next = q
|
70 |
|
71 |
node = node.next
|
72 |
+
if node is None: break
|
|
|
73 |
|
74 |
# 屏蔽空行和太短的句子
|
75 |
node = root
|
76 |
while True:
|
77 |
+
if len(node.string.strip('\n').strip(''))==0: node.preserve = True
|
78 |
+
if len(node.string.strip('\n').strip(''))<42: node.preserve = True
|
|
|
|
|
79 |
node = node.next
|
80 |
+
if node is None: break
|
|
|
81 |
node = root
|
82 |
while True:
|
83 |
if node.next and node.preserve and node.next.preserve:
|
84 |
node.string += node.next.string
|
85 |
node.next = node.next.next
|
86 |
node = node.next
|
87 |
+
if node is None: break
|
|
|
88 |
|
89 |
# 将前后断行符脱离
|
90 |
node = root
|
91 |
prev_node = None
|
92 |
while True:
|
93 |
if not node.preserve:
|
94 |
+
lstriped_ = node.string.lstrip().lstrip('\n')
|
95 |
+
if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
|
96 |
+
prev_node.string += node.string[:-len(lstriped_)]
|
|
|
|
|
|
|
|
|
97 |
node.string = lstriped_
|
98 |
+
rstriped_ = node.string.rstrip().rstrip('\n')
|
99 |
+
if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
|
100 |
+
node.next.string = node.string[len(rstriped_):] + node.next.string
|
|
|
|
|
|
|
|
|
101 |
node.string = rstriped_
|
102 |
+
# =====
|
103 |
prev_node = node
|
104 |
node = node.next
|
105 |
+
if node is None: break
|
|
|
106 |
|
107 |
# 标注节点的行数范围
|
108 |
node = root
|
109 |
n_line = 0
|
110 |
expansion = 2
|
111 |
while True:
|
112 |
+
n_l = node.string.count('\n')
|
113 |
+
node.range = [n_line-expansion, n_line+n_l+expansion] # 失败时,扭转的范围
|
114 |
+
n_line = n_line+n_l
|
115 |
node = node.next
|
116 |
+
if node is None: break
|
|
|
117 |
return root
|
118 |
|
119 |
|
|
|
128 |
"""
|
129 |
Add a preserve text area in this paper
|
130 |
e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
|
131 |
+
you can mask out (mask = PRESERVE so that text become untouchable for GPT)
|
132 |
everything between "\begin{equation}" and "\end{equation}"
|
133 |
"""
|
134 |
+
if isinstance(pattern, list): pattern = '|'.join(pattern)
|
|
|
135 |
pattern_compile = re.compile(pattern, flags)
|
136 |
for res in pattern_compile.finditer(text):
|
137 |
+
mask[res.span()[0]:res.span()[1]] = PRESERVE
|
138 |
return text, mask
|
139 |
|
|
|
140 |
def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
|
141 |
"""
|
142 |
Move area out of preserve area (make text editable for GPT)
|
143 |
+
count the number of the braces so as to catch compelete text area.
|
144 |
e.g.
|
145 |
+
\begin{abstract} blablablablablabla. \end{abstract}
|
146 |
"""
|
147 |
+
if isinstance(pattern, list): pattern = '|'.join(pattern)
|
|
|
148 |
pattern_compile = re.compile(pattern, flags)
|
149 |
for res in pattern_compile.finditer(text):
|
150 |
if not forbid_wrapper:
|
151 |
+
mask[res.span()[0]:res.span()[1]] = TRANSFORM
|
152 |
else:
|
153 |
+
mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE # '\\begin{abstract}'
|
154 |
+
mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM # abstract
|
155 |
+
mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE # abstract
|
156 |
return text, mask
|
157 |
|
|
|
158 |
def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
|
159 |
"""
|
160 |
Add a preserve text area in this paper (text become untouchable for GPT).
|
161 |
+
count the number of the braces so as to catch compelete text area.
|
162 |
e.g.
|
163 |
+
\caption{blablablablabla\texbf{blablabla}blablabla.}
|
164 |
"""
|
165 |
pattern_compile = re.compile(pattern, flags)
|
166 |
for res in pattern_compile.finditer(text):
|
167 |
brace_level = -1
|
168 |
p = begin = end = res.regs[0][0]
|
169 |
+
for _ in range(1024*16):
|
170 |
+
if text[p] == '}' and brace_level == 0: break
|
171 |
+
elif text[p] == '}': brace_level -= 1
|
172 |
+
elif text[p] == '{': brace_level += 1
|
|
|
|
|
|
|
173 |
p += 1
|
174 |
+
end = p+1
|
175 |
mask[begin:end] = PRESERVE
|
176 |
return text, mask
|
177 |
|
178 |
+
def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True):
|
|
|
|
|
|
|
179 |
"""
|
180 |
Move area out of preserve area (make text editable for GPT)
|
181 |
+
count the number of the braces so as to catch compelete text area.
|
182 |
e.g.
|
183 |
+
\caption{blablablablabla\texbf{blablabla}blablabla.}
|
184 |
"""
|
185 |
pattern_compile = re.compile(pattern, flags)
|
186 |
for res in pattern_compile.finditer(text):
|
187 |
brace_level = 0
|
188 |
p = begin = end = res.regs[1][0]
|
189 |
+
for _ in range(1024*16):
|
190 |
+
if text[p] == '}' and brace_level == 0: break
|
191 |
+
elif text[p] == '}': brace_level -= 1
|
192 |
+
elif text[p] == '{': brace_level += 1
|
|
|
|
|
|
|
193 |
p += 1
|
194 |
end = p
|
195 |
mask[begin:end] = TRANSFORM
|
196 |
if forbid_wrapper:
|
197 |
+
mask[res.regs[0][0]:begin] = PRESERVE
|
198 |
+
mask[end:res.regs[0][1]] = PRESERVE
|
199 |
return text, mask
|
200 |
|
|
|
201 |
def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
|
202 |
"""
|
203 |
Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
|
204 |
Add it to preserve area
|
205 |
"""
|
206 |
pattern_compile = re.compile(pattern, flags)
|
|
|
207 |
def search_with_line_limit(text, mask):
|
208 |
for res in pattern_compile.finditer(text):
|
209 |
cmd = res.group(1) # begin{what}
|
210 |
+
this = res.group(2) # content between begin and end
|
211 |
+
this_mask = mask[res.regs[2][0]:res.regs[2][1]]
|
212 |
+
white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof',
|
213 |
+
'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate']
|
214 |
+
if (cmd in white_list) or this.count('\n') >= limit_n_lines: # use a magical number 42
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
this, this_mask = search_with_line_limit(this, this_mask)
|
216 |
+
mask[res.regs[2][0]:res.regs[2][1]] = this_mask
|
217 |
else:
|
218 |
+
mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE
|
219 |
return text, mask
|
220 |
+
return search_with_line_limit(text, mask)
|
221 |
|
|
|
222 |
|
223 |
|
224 |
"""
|
|
|
227 |
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
228 |
"""
|
229 |
|
|
|
230 |
def find_main_tex_file(file_manifest, mode):
|
231 |
"""
|
232 |
在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。
|
|
|
234 |
"""
|
235 |
canidates = []
|
236 |
for texf in file_manifest:
|
237 |
+
if os.path.basename(texf).startswith('merge'):
|
238 |
continue
|
239 |
+
with open(texf, 'r', encoding='utf8', errors='ignore') as f:
|
240 |
file_content = f.read()
|
241 |
+
if r'\documentclass' in file_content:
|
242 |
canidates.append(texf)
|
243 |
else:
|
244 |
continue
|
245 |
|
246 |
if len(canidates) == 0:
|
247 |
+
raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)')
|
248 |
elif len(canidates) == 1:
|
249 |
return canidates[0]
|
250 |
+
else: # if len(canidates) >= 2 通过一些Latex模板中常见(但通常不会出现在正文)的单词,对不同latex源文件扣分,取评分最高者返回
|
251 |
canidates_score = []
|
252 |
# 给出一些判定模板文档的词作为扣分项
|
253 |
+
unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
|
254 |
+
expected_words = ['\input', '\ref', '\cite']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
for texf in canidates:
|
256 |
canidates_score.append(0)
|
257 |
+
with open(texf, 'r', encoding='utf8', errors='ignore') as f:
|
258 |
file_content = f.read()
|
259 |
file_content = rm_comments(file_content)
|
260 |
for uw in unexpected_words:
|
|
|
263 |
for uw in expected_words:
|
264 |
if uw in file_content:
|
265 |
canidates_score[-1] += 1
|
266 |
+
select = np.argmax(canidates_score) # 取评分最高者返回
|
267 |
return canidates[select]
|
268 |
+
|
|
|
269 |
def rm_comments(main_file):
|
270 |
new_file_remove_comment_lines = []
|
271 |
for l in main_file.splitlines():
|
|
|
274 |
pass
|
275 |
else:
|
276 |
new_file_remove_comment_lines.append(l)
|
277 |
+
main_file = '\n'.join(new_file_remove_comment_lines)
|
278 |
# main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令
|
279 |
+
main_file = re.sub(r'(?<!\\)%.*', '', main_file) # 使用正则表达式查找半行注释, 并替换为空字符串
|
280 |
return main_file
|
281 |
|
|
|
282 |
def find_tex_file_ignore_case(fp):
|
283 |
dir_name = os.path.dirname(fp)
|
284 |
base_name = os.path.basename(fp)
|
285 |
# 如果输入的文件路径是正确的
|
286 |
+
if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
|
|
|
287 |
# 如果不正确,试着加上.tex后缀试试
|
288 |
+
if not base_name.endswith('.tex'): base_name+='.tex'
|
289 |
+
if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
|
|
|
|
|
290 |
# 如果还找不到,解除大小写限制,再试一次
|
291 |
import glob
|
292 |
+
for f in glob.glob(dir_name+'/*.tex'):
|
|
|
293 |
base_name_s = os.path.basename(fp)
|
294 |
base_name_f = os.path.basename(f)
|
295 |
+
if base_name_s.lower() == base_name_f.lower(): return f
|
|
|
296 |
# 试着加上.tex后缀试试
|
297 |
+
if not base_name_s.endswith('.tex'): base_name_s+='.tex'
|
298 |
+
if base_name_s.lower() == base_name_f.lower(): return f
|
|
|
|
|
299 |
return None
|
300 |
|
|
|
301 |
def merge_tex_files_(project_foler, main_file, mode):
|
302 |
"""
|
303 |
Merge Tex project recrusively
|
|
|
308 |
fp = os.path.join(project_foler, f)
|
309 |
fp_ = find_tex_file_ignore_case(fp)
|
310 |
if fp_:
|
311 |
+
with open(fp_, 'r', encoding='utf-8', errors='replace') as fx: c = fx.read()
|
|
|
|
|
|
|
|
|
312 |
else:
|
313 |
+
raise RuntimeError(f'找不到{fp},Tex源文件缺失!')
|
314 |
c = merge_tex_files_(project_foler, c, mode)
|
315 |
+
main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
|
316 |
return main_file
|
317 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
def merge_tex_files(project_foler, main_file, mode):
|
319 |
"""
|
320 |
Merge Tex project recrusively
|
|
|
324 |
main_file = merge_tex_files_(project_foler, main_file, mode)
|
325 |
main_file = rm_comments(main_file)
|
326 |
|
327 |
+
if mode == 'translate_zh':
|
328 |
# find paper documentclass
|
329 |
+
pattern = re.compile(r'\\documentclass.*\n')
|
330 |
match = pattern.search(main_file)
|
331 |
assert match is not None, "Cannot find documentclass statement!"
|
332 |
position = match.end()
|
333 |
+
add_ctex = '\\usepackage{ctex}\n'
|
334 |
+
add_url = '\\usepackage{url}\n' if '{url}' not in main_file else ''
|
335 |
main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
|
336 |
# fontset=windows
|
337 |
import platform
|
338 |
+
main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
|
339 |
+
main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
340 |
# find paper abstract
|
341 |
+
pattern_opt1 = re.compile(r'\\begin\{abstract\}.*\n')
|
342 |
pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
|
343 |
match_opt1 = pattern_opt1.search(main_file)
|
344 |
match_opt2 = pattern_opt2.search(main_file)
|
345 |
+
assert (match_opt1 is not None) or (match_opt2 is not None), "Cannot find paper abstract section!"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
346 |
return main_file
|
347 |
|
348 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
349 |
"""
|
350 |
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
351 |
Post process
|
352 |
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
353 |
"""
|
|
|
|
|
354 |
def mod_inbraket(match):
|
355 |
"""
|
356 |
+
为啥chatgpt会把cite里面的逗号换成中文逗号呀
|
357 |
"""
|
358 |
# get the matched string
|
359 |
cmd = match.group(1)
|
360 |
str_to_modify = match.group(2)
|
361 |
# modify the matched string
|
362 |
+
str_to_modify = str_to_modify.replace(':', ':') # 前面是中文冒号,后面是英文冒号
|
363 |
+
str_to_modify = str_to_modify.replace(',', ',') # 前面是中文逗号,后面是英文逗号
|
364 |
# str_to_modify = 'BOOM'
|
365 |
return "\\" + cmd + "{" + str_to_modify + "}"
|
366 |
|
|
|
367 |
def fix_content(final_tex, node_string):
|
368 |
"""
|
369 |
Fix common GPT errors to increase success rate
|
|
|
374 |
final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
|
375 |
|
376 |
if "Traceback" in final_tex and "[Local Message]" in final_tex:
|
377 |
+
final_tex = node_string # 出问题了,还原原文
|
378 |
+
if node_string.count('\\begin') != final_tex.count('\\begin'):
|
379 |
+
final_tex = node_string # 出问题了,还原原文
|
380 |
+
if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
|
381 |
# walk and replace any _ without \
|
382 |
final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
|
383 |
|
|
|
385 |
# this function count the number of { and }
|
386 |
brace_level = 0
|
387 |
for c in string:
|
388 |
+
if c == "{": brace_level += 1
|
389 |
+
elif c == "}": brace_level -= 1
|
|
|
|
|
390 |
return brace_level
|
|
|
391 |
def join_most(tex_t, tex_o):
|
392 |
# this function join translated string and original string when something goes wrong
|
393 |
p_t = 0
|
394 |
p_o = 0
|
|
|
395 |
def find_next(string, chars, begin):
|
396 |
p = begin
|
397 |
while p < len(string):
|
398 |
+
if string[p] in chars: return p, string[p]
|
|
|
399 |
p += 1
|
400 |
return None, None
|
|
|
401 |
while True:
|
402 |
+
res1, char = find_next(tex_o, ['{','}'], p_o)
|
403 |
+
if res1 is None: break
|
|
|
404 |
res2, char = find_next(tex_t, [char], p_t)
|
405 |
+
if res2 is None: break
|
|
|
406 |
p_o = res1 + 1
|
407 |
p_t = res2 + 1
|
408 |
return tex_t[:p_t] + tex_o[p_o:]
|
|
|
411 |
# 出问题了,还原部分原文,保证括号正确
|
412 |
final_tex = join_most(final_tex, node_string)
|
413 |
return final_tex
|
414 |
+
|
|
|
415 |
def compile_latex_with_timeout(command, cwd, timeout=60):
|
416 |
import subprocess
|
417 |
+
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
|
|
|
|
|
|
|
418 |
try:
|
419 |
stdout, stderr = process.communicate(timeout=timeout)
|
420 |
except subprocess.TimeoutExpired:
|
|
|
425 |
return True
|
426 |
|
427 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
428 |
|
429 |
+
def merge_pdfs(pdf1_path, pdf2_path, output_path):
|
430 |
+
import PyPDF2
|
431 |
Percent = 0.95
|
|
|
432 |
# Open the first PDF file
|
433 |
+
with open(pdf1_path, 'rb') as pdf1_file:
|
434 |
pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)
|
435 |
# Open the second PDF file
|
436 |
+
with open(pdf2_path, 'rb') as pdf2_file:
|
437 |
pdf2_reader = PyPDF2.PdfFileReader(pdf2_file)
|
438 |
# Create a new PDF file to store the merged pages
|
439 |
output_writer = PyPDF2.PdfFileWriter()
|
|
|
453 |
page2 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
|
454 |
# Create a new empty page with double width
|
455 |
new_page = PyPDF2.PageObject.createBlankPage(
|
456 |
+
width = int(int(page1.mediaBox.getWidth()) + int(page2.mediaBox.getWidth()) * Percent),
|
457 |
+
height = max(page1.mediaBox.getHeight(), page2.mediaBox.getHeight())
|
|
|
|
|
|
|
458 |
)
|
459 |
new_page.mergeTranslatedPage(page1, 0, 0)
|
460 |
+
new_page.mergeTranslatedPage(page2, int(int(page1.mediaBox.getWidth())-int(page2.mediaBox.getWidth())* (1-Percent)), 0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
461 |
output_writer.addPage(new_page)
|
462 |
# Save the merged PDF file
|
463 |
+
with open(output_path, 'wb') as output_file:
|
464 |
output_writer.write(output_file)
|
|
|
|
|
|
crazy_functions/live_audio/aliyunASR.py
CHANGED
@@ -1,106 +1,4 @@
|
|
1 |
-
import time, logging, json
|
2 |
-
import numpy as np
|
3 |
-
from scipy.io.wavfile import WAVE_FORMAT
|
4 |
-
|
5 |
-
def write_numpy_to_wave(filename, rate, data, add_header=False):
|
6 |
-
"""
|
7 |
-
Write a NumPy array as a WAV file.
|
8 |
-
"""
|
9 |
-
def _array_tofile(fid, data):
|
10 |
-
# ravel gives a c-contiguous buffer
|
11 |
-
fid.write(data.ravel().view('b').data)
|
12 |
-
|
13 |
-
if hasattr(filename, 'write'):
|
14 |
-
fid = filename
|
15 |
-
else:
|
16 |
-
fid = open(filename, 'wb')
|
17 |
-
|
18 |
-
fs = rate
|
19 |
-
|
20 |
-
try:
|
21 |
-
dkind = data.dtype.kind
|
22 |
-
if not (dkind == 'i' or dkind == 'f' or (dkind == 'u' and
|
23 |
-
data.dtype.itemsize == 1)):
|
24 |
-
raise ValueError("Unsupported data type '%s'" % data.dtype)
|
25 |
-
|
26 |
-
header_data = b''
|
27 |
-
|
28 |
-
header_data += b'RIFF'
|
29 |
-
header_data += b'\x00\x00\x00\x00'
|
30 |
-
header_data += b'WAVE'
|
31 |
-
|
32 |
-
# fmt chunk
|
33 |
-
header_data += b'fmt '
|
34 |
-
if dkind == 'f':
|
35 |
-
format_tag = WAVE_FORMAT.IEEE_FLOAT
|
36 |
-
else:
|
37 |
-
format_tag = WAVE_FORMAT.PCM
|
38 |
-
if data.ndim == 1:
|
39 |
-
channels = 1
|
40 |
-
else:
|
41 |
-
channels = data.shape[1]
|
42 |
-
bit_depth = data.dtype.itemsize * 8
|
43 |
-
bytes_per_second = fs*(bit_depth // 8)*channels
|
44 |
-
block_align = channels * (bit_depth // 8)
|
45 |
-
|
46 |
-
fmt_chunk_data = struct.pack('<HHIIHH', format_tag, channels, fs,
|
47 |
-
bytes_per_second, block_align, bit_depth)
|
48 |
-
if not (dkind == 'i' or dkind == 'u'):
|
49 |
-
# add cbSize field for non-PCM files
|
50 |
-
fmt_chunk_data += b'\x00\x00'
|
51 |
-
|
52 |
-
header_data += struct.pack('<I', len(fmt_chunk_data))
|
53 |
-
header_data += fmt_chunk_data
|
54 |
-
|
55 |
-
# fact chunk (non-PCM files)
|
56 |
-
if not (dkind == 'i' or dkind == 'u'):
|
57 |
-
header_data += b'fact'
|
58 |
-
header_data += struct.pack('<II', 4, data.shape[0])
|
59 |
-
|
60 |
-
# check data size (needs to be immediately before the data chunk)
|
61 |
-
if ((len(header_data)-4-4) + (4+4+data.nbytes)) > 0xFFFFFFFF:
|
62 |
-
raise ValueError("Data exceeds wave file size limit")
|
63 |
-
if add_header:
|
64 |
-
fid.write(header_data)
|
65 |
-
# data chunk
|
66 |
-
fid.write(b'data')
|
67 |
-
fid.write(struct.pack('<I', data.nbytes))
|
68 |
-
if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and
|
69 |
-
sys.byteorder == 'big'):
|
70 |
-
data = data.byteswap()
|
71 |
-
_array_tofile(fid, data)
|
72 |
-
|
73 |
-
if add_header:
|
74 |
-
# Determine file size and place it in correct
|
75 |
-
# position at start of the file.
|
76 |
-
size = fid.tell()
|
77 |
-
fid.seek(4)
|
78 |
-
fid.write(struct.pack('<I', size-8))
|
79 |
-
|
80 |
-
finally:
|
81 |
-
if not hasattr(filename, 'write'):
|
82 |
-
fid.close()
|
83 |
-
else:
|
84 |
-
fid.seek(0)
|
85 |
-
|
86 |
-
def is_speaker_speaking(vad, data, sample_rate):
|
87 |
-
# Function to detect if the speaker is speaking
|
88 |
-
# The WebRTC VAD only accepts 16-bit mono PCM audio,
|
89 |
-
# sampled at 8000, 16000, 32000 or 48000 Hz.
|
90 |
-
# A frame must be either 10, 20, or 30 ms in duration:
|
91 |
-
frame_duration = 30
|
92 |
-
n_bit_each = int(sample_rate * frame_duration / 1000)*2 # x2 because audio is 16 bit (2 bytes)
|
93 |
-
res_list = []
|
94 |
-
for t in range(len(data)):
|
95 |
-
if t!=0 and t % n_bit_each == 0:
|
96 |
-
res_list.append(vad.is_speech(data[t-n_bit_each:t], sample_rate))
|
97 |
-
|
98 |
-
info = ''.join(['^' if r else '.' for r in res_list])
|
99 |
-
info = info[:10]
|
100 |
-
if any(res_list):
|
101 |
-
return True, info
|
102 |
-
else:
|
103 |
-
return False, info
|
104 |
|
105 |
|
106 |
class AliyunASR():
|
@@ -168,22 +66,12 @@ class AliyunASR():
|
|
168 |
on_close=self.test_on_close,
|
169 |
callback_args=[uuid.hex]
|
170 |
)
|
171 |
-
|
172 |
r = sr.start(aformat="pcm",
|
173 |
-
timeout=timeout_limit_second,
|
174 |
enable_intermediate_result=True,
|
175 |
enable_punctuation_prediction=True,
|
176 |
enable_inverse_text_normalization=True)
|
177 |
|
178 |
-
import webrtcvad
|
179 |
-
vad = webrtcvad.Vad()
|
180 |
-
vad.set_mode(1)
|
181 |
-
|
182 |
-
is_previous_frame_transmitted = False # 上一帧是否有人说话
|
183 |
-
previous_frame_data = None
|
184 |
-
echo_cnt = 0 # 在没有声音之后,继续向服务器发送n次音频数据
|
185 |
-
echo_cnt_max = 4 # 在没有声音之后,继续向服务器发送n次音频数据
|
186 |
-
keep_alive_last_send_time = time.time()
|
187 |
while not self.stop:
|
188 |
# time.sleep(self.capture_interval)
|
189 |
audio = rad.read(uuid.hex)
|
@@ -191,32 +79,12 @@ class AliyunASR():
|
|
191 |
# convert to pcm file
|
192 |
temp_file = f'{temp_folder}/{uuid.hex}.pcm' #
|
193 |
dsdata = change_sample_rate(audio, rad.rate, NEW_SAMPLERATE) # 48000 --> 16000
|
194 |
-
|
195 |
# read pcm binary
|
196 |
with open(temp_file, "rb") as f: data = f.read()
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
# 如果话筒激活 / 如果处于回声收尾阶段
|
201 |
-
echo_cnt -= 1
|
202 |
-
if not is_previous_frame_transmitted: # 上一帧没有人声,但是我们把上一帧同样加上
|
203 |
-
if previous_frame_data is not None: data = previous_frame_data + data
|
204 |
-
if is_speaking:
|
205 |
-
echo_cnt = echo_cnt_max
|
206 |
-
slices = zip(*(iter(data),) * 640) # 640个字节为一组
|
207 |
-
for i in slices: sr.send_audio(bytes(i))
|
208 |
-
keep_alive_last_send_time = time.time()
|
209 |
-
is_previous_frame_transmitted = True
|
210 |
-
else:
|
211 |
-
is_previous_frame_transmitted = False
|
212 |
-
echo_cnt = 0
|
213 |
-
# 保持链接激活,即使没有声音,也根据时间间隔,发送一些音频片段给服务器
|
214 |
-
if time.time() - keep_alive_last_send_time > timeout_limit_second/2:
|
215 |
-
slices = zip(*(iter(data),) * 640) # 640个字节为一组
|
216 |
-
for i in slices: sr.send_audio(bytes(i))
|
217 |
-
keep_alive_last_send_time = time.time()
|
218 |
-
is_previous_frame_transmitted = True
|
219 |
-
self.audio_shape = info
|
220 |
else:
|
221 |
time.sleep(0.1)
|
222 |
|
|
|
1 |
+
import time, logging, json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
|
4 |
class AliyunASR():
|
|
|
66 |
on_close=self.test_on_close,
|
67 |
callback_args=[uuid.hex]
|
68 |
)
|
69 |
+
|
70 |
r = sr.start(aformat="pcm",
|
|
|
71 |
enable_intermediate_result=True,
|
72 |
enable_punctuation_prediction=True,
|
73 |
enable_inverse_text_normalization=True)
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
while not self.stop:
|
76 |
# time.sleep(self.capture_interval)
|
77 |
audio = rad.read(uuid.hex)
|
|
|
79 |
# convert to pcm file
|
80 |
temp_file = f'{temp_folder}/{uuid.hex}.pcm' #
|
81 |
dsdata = change_sample_rate(audio, rad.rate, NEW_SAMPLERATE) # 48000 --> 16000
|
82 |
+
io.wavfile.write(temp_file, NEW_SAMPLERATE, dsdata)
|
83 |
# read pcm binary
|
84 |
with open(temp_file, "rb") as f: data = f.read()
|
85 |
+
# print('audio len:', len(audio), '\t ds len:', len(dsdata), '\t need n send:', len(data)//640)
|
86 |
+
slices = zip(*(iter(data),) * 640) # 640个字节为一组
|
87 |
+
for i in slices: sr.send_audio(bytes(i))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
else:
|
89 |
time.sleep(0.1)
|
90 |
|
crazy_functions/live_audio/audio_io.py
CHANGED
@@ -35,7 +35,7 @@ class RealtimeAudioDistribution():
|
|
35 |
def read(self, uuid):
|
36 |
if uuid in self.data:
|
37 |
res = self.data.pop(uuid)
|
38 |
-
|
39 |
else:
|
40 |
res = None
|
41 |
return res
|
|
|
35 |
def read(self, uuid):
|
36 |
if uuid in self.data:
|
37 |
res = self.data.pop(uuid)
|
38 |
+
print('\r read-', len(res), '-', max(res), end='', flush=True)
|
39 |
else:
|
40 |
res = None
|
41 |
return res
|
crazy_functions/multi_stage/multi_stage_utils.py
DELETED
@@ -1,93 +0,0 @@
|
|
1 |
-
from pydantic import BaseModel, Field
|
2 |
-
from typing import List
|
3 |
-
from toolbox import update_ui_lastest_msg, disable_auto_promotion
|
4 |
-
from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log_folder
|
5 |
-
from request_llms.bridge_all import predict_no_ui_long_connection
|
6 |
-
from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
|
7 |
-
import time
|
8 |
-
import pickle
|
9 |
-
|
10 |
-
def have_any_recent_upload_files(chatbot):
|
11 |
-
_5min = 5 * 60
|
12 |
-
if not chatbot: return False # chatbot is None
|
13 |
-
most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
|
14 |
-
if not most_recent_uploaded: return False # most_recent_uploaded is None
|
15 |
-
if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
|
16 |
-
else: return False # most_recent_uploaded is too old
|
17 |
-
|
18 |
-
class GptAcademicState():
|
19 |
-
def __init__(self):
|
20 |
-
self.reset()
|
21 |
-
|
22 |
-
def reset(self):
|
23 |
-
pass
|
24 |
-
|
25 |
-
def dump_state(self, chatbot):
|
26 |
-
chatbot._cookies['plugin_state'] = pickle.dumps(self)
|
27 |
-
|
28 |
-
def set_state(self, chatbot, key, value):
|
29 |
-
setattr(self, key, value)
|
30 |
-
chatbot._cookies['plugin_state'] = pickle.dumps(self)
|
31 |
-
|
32 |
-
def get_state(chatbot, cls=None):
|
33 |
-
state = chatbot._cookies.get('plugin_state', None)
|
34 |
-
if state is not None: state = pickle.loads(state)
|
35 |
-
elif cls is not None: state = cls()
|
36 |
-
else: state = GptAcademicState()
|
37 |
-
state.chatbot = chatbot
|
38 |
-
return state
|
39 |
-
|
40 |
-
|
41 |
-
class GptAcademicGameBaseState():
|
42 |
-
"""
|
43 |
-
1. first init: __init__ ->
|
44 |
-
"""
|
45 |
-
def init_game(self, chatbot, lock_plugin):
|
46 |
-
self.plugin_name = None
|
47 |
-
self.callback_fn = None
|
48 |
-
self.delete_game = False
|
49 |
-
self.step_cnt = 0
|
50 |
-
|
51 |
-
def lock_plugin(self, chatbot):
|
52 |
-
if self.callback_fn is None:
|
53 |
-
raise ValueError("callback_fn is None")
|
54 |
-
chatbot._cookies['lock_plugin'] = self.callback_fn
|
55 |
-
self.dump_state(chatbot)
|
56 |
-
|
57 |
-
def get_plugin_name(self):
|
58 |
-
if self.plugin_name is None:
|
59 |
-
raise ValueError("plugin_name is None")
|
60 |
-
return self.plugin_name
|
61 |
-
|
62 |
-
def dump_state(self, chatbot):
|
63 |
-
chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = pickle.dumps(self)
|
64 |
-
|
65 |
-
def set_state(self, chatbot, key, value):
|
66 |
-
setattr(self, key, value)
|
67 |
-
chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = pickle.dumps(self)
|
68 |
-
|
69 |
-
@staticmethod
|
70 |
-
def sync_state(chatbot, llm_kwargs, cls, plugin_name, callback_fn, lock_plugin=True):
|
71 |
-
state = chatbot._cookies.get(f'plugin_state/{plugin_name}', None)
|
72 |
-
if state is not None:
|
73 |
-
state = pickle.loads(state)
|
74 |
-
else:
|
75 |
-
state = cls()
|
76 |
-
state.init_game(chatbot, lock_plugin)
|
77 |
-
state.plugin_name = plugin_name
|
78 |
-
state.llm_kwargs = llm_kwargs
|
79 |
-
state.chatbot = chatbot
|
80 |
-
state.callback_fn = callback_fn
|
81 |
-
return state
|
82 |
-
|
83 |
-
def continue_game(self, prompt, chatbot, history):
|
84 |
-
# 游戏主体
|
85 |
-
yield from self.step(prompt, chatbot, history)
|
86 |
-
self.step_cnt += 1
|
87 |
-
# 保存状态,收尾
|
88 |
-
self.dump_state(chatbot)
|
89 |
-
# 如果游戏结束,清理
|
90 |
-
if self.delete_game:
|
91 |
-
chatbot._cookies['lock_plugin'] = None
|
92 |
-
chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = None
|
93 |
-
yield from update_ui(chatbot=chatbot, history=history)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/pdf_fns/breakdown_txt.py
DELETED
@@ -1,125 +0,0 @@
|
|
1 |
-
from crazy_functions.ipc_fns.mp import run_in_subprocess_with_timeout
|
2 |
-
|
3 |
-
def force_breakdown(txt, limit, get_token_fn):
|
4 |
-
""" 当无法用标点、空行分割时,我们用最暴力的方法切割
|
5 |
-
"""
|
6 |
-
for i in reversed(range(len(txt))):
|
7 |
-
if get_token_fn(txt[:i]) < limit:
|
8 |
-
return txt[:i], txt[i:]
|
9 |
-
return "Tiktoken未知错误", "Tiktoken未知错误"
|
10 |
-
|
11 |
-
|
12 |
-
def maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage):
|
13 |
-
""" 为了加速计算,我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时, 我们把 _max 后的文字转存至 remain_txt_to_cut_storage
|
14 |
-
当 remain_txt_to_cut < `_min` 时,我们再把 remain_txt_to_cut_storage 中的部分文字取出
|
15 |
-
"""
|
16 |
-
_min = int(5e4)
|
17 |
-
_max = int(1e5)
|
18 |
-
# print(len(remain_txt_to_cut), len(remain_txt_to_cut_storage))
|
19 |
-
if len(remain_txt_to_cut) < _min and len(remain_txt_to_cut_storage) > 0:
|
20 |
-
remain_txt_to_cut = remain_txt_to_cut + remain_txt_to_cut_storage
|
21 |
-
remain_txt_to_cut_storage = ""
|
22 |
-
if len(remain_txt_to_cut) > _max:
|
23 |
-
remain_txt_to_cut_storage = remain_txt_to_cut[_max:] + remain_txt_to_cut_storage
|
24 |
-
remain_txt_to_cut = remain_txt_to_cut[:_max]
|
25 |
-
return remain_txt_to_cut, remain_txt_to_cut_storage
|
26 |
-
|
27 |
-
|
28 |
-
def cut(limit, get_token_fn, txt_tocut, must_break_at_empty_line, break_anyway=False):
|
29 |
-
""" 文本切分
|
30 |
-
"""
|
31 |
-
res = []
|
32 |
-
total_len = len(txt_tocut)
|
33 |
-
fin_len = 0
|
34 |
-
remain_txt_to_cut = txt_tocut
|
35 |
-
remain_txt_to_cut_storage = ""
|
36 |
-
# 为了加速计算,我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时, 我们把 _max 后的文字转存至 remain_txt_to_cut_storage
|
37 |
-
remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage)
|
38 |
-
|
39 |
-
while True:
|
40 |
-
if get_token_fn(remain_txt_to_cut) <= limit:
|
41 |
-
# 如果剩余文本的token数小于限制,那么就不用切了
|
42 |
-
res.append(remain_txt_to_cut); fin_len+=len(remain_txt_to_cut)
|
43 |
-
break
|
44 |
-
else:
|
45 |
-
# 如果剩余文本的token数大于限制,那么就切
|
46 |
-
lines = remain_txt_to_cut.split('\n')
|
47 |
-
|
48 |
-
# 估计一个切分点
|
49 |
-
estimated_line_cut = limit / get_token_fn(remain_txt_to_cut) * len(lines)
|
50 |
-
estimated_line_cut = int(estimated_line_cut)
|
51 |
-
|
52 |
-
# 开始查找合适切分点的偏移(cnt)
|
53 |
-
cnt = 0
|
54 |
-
for cnt in reversed(range(estimated_line_cut)):
|
55 |
-
if must_break_at_empty_line:
|
56 |
-
# 首先尝试用双空行(\n\n)作为切分点
|
57 |
-
if lines[cnt] != "":
|
58 |
-
continue
|
59 |
-
prev = "\n".join(lines[:cnt])
|
60 |
-
post = "\n".join(lines[cnt:])
|
61 |
-
if get_token_fn(prev) < limit:
|
62 |
-
break
|
63 |
-
|
64 |
-
if cnt == 0:
|
65 |
-
# 如果没有找到合适的切分点
|
66 |
-
if break_anyway:
|
67 |
-
# 是否允许暴力切分
|
68 |
-
prev, post = force_breakdown(remain_txt_to_cut, limit, get_token_fn)
|
69 |
-
else:
|
70 |
-
# 不允许直接报错
|
71 |
-
raise RuntimeError(f"存在一行极长的文本!{remain_txt_to_cut}")
|
72 |
-
|
73 |
-
# 追加列表
|
74 |
-
res.append(prev); fin_len+=len(prev)
|
75 |
-
# 准备下一次迭代
|
76 |
-
remain_txt_to_cut = post
|
77 |
-
remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage)
|
78 |
-
process = fin_len/total_len
|
79 |
-
print(f'正在文本切分 {int(process*100)}%')
|
80 |
-
if len(remain_txt_to_cut.strip()) == 0:
|
81 |
-
break
|
82 |
-
return res
|
83 |
-
|
84 |
-
|
85 |
-
def breakdown_text_to_satisfy_token_limit_(txt, limit, llm_model="gpt-3.5-turbo"):
|
86 |
-
""" 使用多种方式尝试切分文本,以满足 token 限制
|
87 |
-
"""
|
88 |
-
from request_llms.bridge_all import model_info
|
89 |
-
enc = model_info[llm_model]['tokenizer']
|
90 |
-
def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=()))
|
91 |
-
try:
|
92 |
-
# 第1次尝试,将双空行(\n\n)作为切分点
|
93 |
-
return cut(limit, get_token_fn, txt, must_break_at_empty_line=True)
|
94 |
-
except RuntimeError:
|
95 |
-
try:
|
96 |
-
# 第2次尝试,将单空行(\n)作为切分点
|
97 |
-
return cut(limit, get_token_fn, txt, must_break_at_empty_line=False)
|
98 |
-
except RuntimeError:
|
99 |
-
try:
|
100 |
-
# 第3次尝试,将英文句号(.)作为切分点
|
101 |
-
res = cut(limit, get_token_fn, txt.replace('.', '。\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在
|
102 |
-
return [r.replace('。\n', '.') for r in res]
|
103 |
-
except RuntimeError as e:
|
104 |
-
try:
|
105 |
-
# 第4次尝试,将中文句号(。)作为切分点
|
106 |
-
res = cut(limit, get_token_fn, txt.replace('。', '。。\n'), must_break_at_empty_line=False)
|
107 |
-
return [r.replace('。。\n', '。') for r in res]
|
108 |
-
except RuntimeError as e:
|
109 |
-
# 第5次尝试,没办法了,随便切一下吧
|
110 |
-
return cut(limit, get_token_fn, txt, must_break_at_empty_line=False, break_anyway=True)
|
111 |
-
|
112 |
-
breakdown_text_to_satisfy_token_limit = run_in_subprocess_with_timeout(breakdown_text_to_satisfy_token_limit_, timeout=60)
|
113 |
-
|
114 |
-
if __name__ == '__main__':
|
115 |
-
from crazy_functions.crazy_utils import read_and_clean_pdf_text
|
116 |
-
file_content, page_one = read_and_clean_pdf_text("build/assets/at.pdf")
|
117 |
-
|
118 |
-
from request_llms.bridge_all import model_info
|
119 |
-
for i in range(5):
|
120 |
-
file_content += file_content
|
121 |
-
|
122 |
-
print(len(file_content))
|
123 |
-
TOKEN_LIMIT_PER_FRAGMENT = 2500
|
124 |
-
res = breakdown_text_to_satisfy_token_limit(file_content, TOKEN_LIMIT_PER_FRAGMENT)
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/pdf_fns/parse_pdf.py
CHANGED
@@ -1,26 +1,16 @@
|
|
1 |
-
from functools import lru_cache
|
2 |
-
from toolbox import gen_time_str
|
3 |
-
from toolbox import promote_file_to_downloadzone
|
4 |
-
from toolbox import write_history_to_file, promote_file_to_downloadzone
|
5 |
-
from toolbox import get_conf
|
6 |
-
from toolbox import ProxyNetworkActivate
|
7 |
-
from colorful import *
|
8 |
import requests
|
9 |
import random
|
10 |
-
import
|
11 |
-
import os
|
12 |
-
import math
|
13 |
-
|
14 |
class GROBID_OFFLINE_EXCEPTION(Exception): pass
|
15 |
|
16 |
def get_avail_grobid_url():
|
17 |
-
|
|
|
18 |
if len(GROBID_URLS) == 0: return None
|
19 |
try:
|
20 |
_grobid_url = random.choice(GROBID_URLS) # 随机负载均衡
|
21 |
if _grobid_url.endswith('/'): _grobid_url = _grobid_url.rstrip('/')
|
22 |
-
|
23 |
-
res = requests.get(_grobid_url+'/api/isalive')
|
24 |
if res.text=='true': return _grobid_url
|
25 |
else: return None
|
26 |
except:
|
@@ -31,141 +21,10 @@ def parse_pdf(pdf_path, grobid_url):
|
|
31 |
import scipdf # pip install scipdf_parser
|
32 |
if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/')
|
33 |
try:
|
34 |
-
|
35 |
-
article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url)
|
36 |
except GROBID_OFFLINE_EXCEPTION:
|
37 |
raise GROBID_OFFLINE_EXCEPTION("GROBID服务不可用,请修改config中的GROBID_URL,可修改成本地GROBID服务。")
|
38 |
except:
|
39 |
raise RuntimeError("解析PDF失败,请检查PDF是否损坏。")
|
40 |
return article_dict
|
41 |
|
42 |
-
|
43 |
-
def produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chatbot, fp, generated_conclusion_files):
|
44 |
-
# -=-=-=-=-=-=-=-= 写出第1个文件:翻译前后混合 -=-=-=-=-=-=-=-=
|
45 |
-
res_path = write_history_to_file(meta + ["# Meta Translation" , paper_meta_info] + gpt_response_collection, file_basename=f"{gen_time_str()}translated_and_original.md", file_fullname=None)
|
46 |
-
promote_file_to_downloadzone(res_path, rename_file=os.path.basename(res_path)+'.md', chatbot=chatbot)
|
47 |
-
generated_conclusion_files.append(res_path)
|
48 |
-
|
49 |
-
# -=-=-=-=-=-=-=-= 写出第2个文件:仅翻译后的文本 -=-=-=-=-=-=-=-=
|
50 |
-
translated_res_array = []
|
51 |
-
# 记录当前的大章节标题:
|
52 |
-
last_section_name = ""
|
53 |
-
for index, value in enumerate(gpt_response_collection):
|
54 |
-
# 先挑选偶数序列号:
|
55 |
-
if index % 2 != 0:
|
56 |
-
# 先提取当前英文标题:
|
57 |
-
cur_section_name = gpt_response_collection[index-1].split('\n')[0].split(" Part")[0]
|
58 |
-
# 如果index是1的话,则直接使用first section name:
|
59 |
-
if cur_section_name != last_section_name:
|
60 |
-
cur_value = cur_section_name + '\n'
|
61 |
-
last_section_name = copy.deepcopy(cur_section_name)
|
62 |
-
else:
|
63 |
-
cur_value = ""
|
64 |
-
# 再做一个小修改:重新修改当前part的标题,默认用英文的
|
65 |
-
cur_value += value
|
66 |
-
translated_res_array.append(cur_value)
|
67 |
-
res_path = write_history_to_file(meta + ["# Meta Translation" , paper_meta_info] + translated_res_array,
|
68 |
-
file_basename = f"{gen_time_str()}-translated_only.md",
|
69 |
-
file_fullname = None,
|
70 |
-
auto_caption = False)
|
71 |
-
promote_file_to_downloadzone(res_path, rename_file=os.path.basename(res_path)+'.md', chatbot=chatbot)
|
72 |
-
generated_conclusion_files.append(res_path)
|
73 |
-
return res_path
|
74 |
-
|
75 |
-
def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_files, TOKEN_LIMIT_PER_FRAGMENT, DST_LANG):
|
76 |
-
from crazy_functions.pdf_fns.report_gen_html import construct_html
|
77 |
-
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
|
78 |
-
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
79 |
-
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
80 |
-
|
81 |
-
prompt = "以下是一篇学术论文的基本信息:\n"
|
82 |
-
# title
|
83 |
-
title = article_dict.get('title', '无法获取 title'); prompt += f'title:{title}\n\n'
|
84 |
-
# authors
|
85 |
-
authors = article_dict.get('authors', '无法获取 authors')[:100]; prompt += f'authors:{authors}\n\n'
|
86 |
-
# abstract
|
87 |
-
abstract = article_dict.get('abstract', '无法获取 abstract'); prompt += f'abstract:{abstract}\n\n'
|
88 |
-
# command
|
89 |
-
prompt += f"请将题目和摘要翻译为{DST_LANG}。"
|
90 |
-
meta = [f'# Title:\n\n', title, f'# Abstract:\n\n', abstract ]
|
91 |
-
|
92 |
-
# 单线,获取文章meta信息
|
93 |
-
paper_meta_info = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
94 |
-
inputs=prompt,
|
95 |
-
inputs_show_user=prompt,
|
96 |
-
llm_kwargs=llm_kwargs,
|
97 |
-
chatbot=chatbot, history=[],
|
98 |
-
sys_prompt="You are an academic paper reader。",
|
99 |
-
)
|
100 |
-
|
101 |
-
# 多线,翻译
|
102 |
-
inputs_array = []
|
103 |
-
inputs_show_user_array = []
|
104 |
-
|
105 |
-
# get_token_num
|
106 |
-
from request_llms.bridge_all import model_info
|
107 |
-
enc = model_info[llm_kwargs['llm_model']]['tokenizer']
|
108 |
-
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
109 |
-
|
110 |
-
def break_down(txt):
|
111 |
-
raw_token_num = get_token_num(txt)
|
112 |
-
if raw_token_num <= TOKEN_LIMIT_PER_FRAGMENT:
|
113 |
-
return [txt]
|
114 |
-
else:
|
115 |
-
# raw_token_num > TOKEN_LIMIT_PER_FRAGMENT
|
116 |
-
# find a smooth token limit to achieve even seperation
|
117 |
-
count = int(math.ceil(raw_token_num / TOKEN_LIMIT_PER_FRAGMENT))
|
118 |
-
token_limit_smooth = raw_token_num // count + count
|
119 |
-
return breakdown_text_to_satisfy_token_limit(txt, limit=token_limit_smooth, llm_model=llm_kwargs['llm_model'])
|
120 |
-
|
121 |
-
for section in article_dict.get('sections'):
|
122 |
-
if len(section['text']) == 0: continue
|
123 |
-
section_frags = break_down(section['text'])
|
124 |
-
for i, fragment in enumerate(section_frags):
|
125 |
-
heading = section['heading']
|
126 |
-
if len(section_frags) > 1: heading += f' Part-{i+1}'
|
127 |
-
inputs_array.append(
|
128 |
-
f"你需要翻译{heading}章节,内容如下: \n\n{fragment}"
|
129 |
-
)
|
130 |
-
inputs_show_user_array.append(
|
131 |
-
f"# {heading}\n\n{fragment}"
|
132 |
-
)
|
133 |
-
|
134 |
-
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
135 |
-
inputs_array=inputs_array,
|
136 |
-
inputs_show_user_array=inputs_show_user_array,
|
137 |
-
llm_kwargs=llm_kwargs,
|
138 |
-
chatbot=chatbot,
|
139 |
-
history_array=[meta for _ in inputs_array],
|
140 |
-
sys_prompt_array=[
|
141 |
-
"请你作为一个学术翻译,负责把学术论文准确翻译成中文。注意文章中的每一句话都要翻译。" for _ in inputs_array],
|
142 |
-
)
|
143 |
-
# -=-=-=-=-=-=-=-= 写出Markdown文件 -=-=-=-=-=-=-=-=
|
144 |
-
produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chatbot, fp, generated_conclusion_files)
|
145 |
-
|
146 |
-
# -=-=-=-=-=-=-=-= 写出HTML文件 -=-=-=-=-=-=-=-=
|
147 |
-
ch = construct_html()
|
148 |
-
orig = ""
|
149 |
-
trans = ""
|
150 |
-
gpt_response_collection_html = copy.deepcopy(gpt_response_collection)
|
151 |
-
for i,k in enumerate(gpt_response_collection_html):
|
152 |
-
if i%2==0:
|
153 |
-
gpt_response_collection_html[i] = inputs_show_user_array[i//2]
|
154 |
-
else:
|
155 |
-
# 先提取当前英文标题:
|
156 |
-
cur_section_name = gpt_response_collection[i-1].split('\n')[0].split(" Part")[0]
|
157 |
-
cur_value = cur_section_name + "\n" + gpt_response_collection_html[i]
|
158 |
-
gpt_response_collection_html[i] = cur_value
|
159 |
-
|
160 |
-
final = ["", "", "一、论文概况", "", "Abstract", paper_meta_info, "二、论文翻译", ""]
|
161 |
-
final.extend(gpt_response_collection_html)
|
162 |
-
for i, k in enumerate(final):
|
163 |
-
if i%2==0:
|
164 |
-
orig = k
|
165 |
-
if i%2==1:
|
166 |
-
trans = k
|
167 |
-
ch.add_row(a=orig, b=trans)
|
168 |
-
create_report_file_name = f"{os.path.basename(fp)}.trans.html"
|
169 |
-
html_file = ch.save_file(create_report_file_name)
|
170 |
-
generated_conclusion_files.append(html_file)
|
171 |
-
promote_file_to_downloadzone(html_file, rename_file=os.path.basename(html_file), chatbot=chatbot)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import requests
|
2 |
import random
|
3 |
+
from functools import lru_cache
|
|
|
|
|
|
|
4 |
class GROBID_OFFLINE_EXCEPTION(Exception): pass
|
5 |
|
6 |
def get_avail_grobid_url():
|
7 |
+
from toolbox import get_conf
|
8 |
+
GROBID_URLS, = get_conf('GROBID_URLS')
|
9 |
if len(GROBID_URLS) == 0: return None
|
10 |
try:
|
11 |
_grobid_url = random.choice(GROBID_URLS) # 随机负载均衡
|
12 |
if _grobid_url.endswith('/'): _grobid_url = _grobid_url.rstrip('/')
|
13 |
+
res = requests.get(_grobid_url+'/api/isalive')
|
|
|
14 |
if res.text=='true': return _grobid_url
|
15 |
else: return None
|
16 |
except:
|
|
|
21 |
import scipdf # pip install scipdf_parser
|
22 |
if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/')
|
23 |
try:
|
24 |
+
article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url)
|
|
|
25 |
except GROBID_OFFLINE_EXCEPTION:
|
26 |
raise GROBID_OFFLINE_EXCEPTION("GROBID服务不可用,请修改config中的GROBID_URL,可修改成本地GROBID服务。")
|
27 |
except:
|
28 |
raise RuntimeError("解析PDF失败,请检查PDF是否损坏。")
|
29 |
return article_dict
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/pdf_fns/parse_word.py
DELETED
@@ -1,85 +0,0 @@
|
|
1 |
-
from crazy_functions.crazy_utils import read_and_clean_pdf_text, get_files_from_everything
|
2 |
-
import os
|
3 |
-
import re
|
4 |
-
def extract_text_from_files(txt, chatbot, history):
|
5 |
-
"""
|
6 |
-
查找pdf/md/word并获取文本内容并返回状态以及文本
|
7 |
-
|
8 |
-
输入参数 Args:
|
9 |
-
chatbot: chatbot inputs and outputs (用户界面对话窗口句柄,用于数据流可视化)
|
10 |
-
history (list): List of chat history (历史,对话历史列表)
|
11 |
-
|
12 |
-
输出 Returns:
|
13 |
-
文件是否存在(bool)
|
14 |
-
final_result(list):文本内容
|
15 |
-
page_one(list):第一页内容/摘要
|
16 |
-
file_manifest(list):文件路径
|
17 |
-
excption(string):需要用户手动处理的信息,如没出错则保持为空
|
18 |
-
"""
|
19 |
-
|
20 |
-
final_result = []
|
21 |
-
page_one = []
|
22 |
-
file_manifest = []
|
23 |
-
excption = ""
|
24 |
-
|
25 |
-
if txt == "":
|
26 |
-
final_result.append(txt)
|
27 |
-
return False, final_result, page_one, file_manifest, excption #如输入区内容不是文件则直接返回输入区内容
|
28 |
-
|
29 |
-
#查找输入区内容中的文件
|
30 |
-
file_pdf,pdf_manifest,folder_pdf = get_files_from_everything(txt, '.pdf')
|
31 |
-
file_md,md_manifest,folder_md = get_files_from_everything(txt, '.md')
|
32 |
-
file_word,word_manifest,folder_word = get_files_from_everything(txt, '.docx')
|
33 |
-
file_doc,doc_manifest,folder_doc = get_files_from_everything(txt, '.doc')
|
34 |
-
|
35 |
-
if file_doc:
|
36 |
-
excption = "word"
|
37 |
-
return False, final_result, page_one, file_manifest, excption
|
38 |
-
|
39 |
-
file_num = len(pdf_manifest) + len(md_manifest) + len(word_manifest)
|
40 |
-
if file_num == 0:
|
41 |
-
final_result.append(txt)
|
42 |
-
return False, final_result, page_one, file_manifest, excption #如输入区内容不是文件则直接返回输入区内容
|
43 |
-
|
44 |
-
if file_pdf:
|
45 |
-
try: # 尝试导入依赖,如果缺少依赖,则给出安装建议
|
46 |
-
import fitz
|
47 |
-
except:
|
48 |
-
excption = "pdf"
|
49 |
-
return False, final_result, page_one, file_manifest, excption
|
50 |
-
for index, fp in enumerate(pdf_manifest):
|
51 |
-
file_content, pdf_one = read_and_clean_pdf_text(fp) # (尝试)按照章节切割PDF
|
52 |
-
file_content = file_content.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
|
53 |
-
pdf_one = str(pdf_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
|
54 |
-
final_result.append(file_content)
|
55 |
-
page_one.append(pdf_one)
|
56 |
-
file_manifest.append(os.path.relpath(fp, folder_pdf))
|
57 |
-
|
58 |
-
if file_md:
|
59 |
-
for index, fp in enumerate(md_manifest):
|
60 |
-
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
61 |
-
file_content = f.read()
|
62 |
-
file_content = file_content.encode('utf-8', 'ignore').decode()
|
63 |
-
headers = re.findall(r'^#\s(.*)$', file_content, re.MULTILINE) #接下来提取md中的一级/二级标题作为摘要
|
64 |
-
if len(headers) > 0:
|
65 |
-
page_one.append("\n".join(headers)) #合并所有的标题,以换行符分割
|
66 |
-
else:
|
67 |
-
page_one.append("")
|
68 |
-
final_result.append(file_content)
|
69 |
-
file_manifest.append(os.path.relpath(fp, folder_md))
|
70 |
-
|
71 |
-
if file_word:
|
72 |
-
try: # 尝试导入依赖,如果缺少依赖,则给出安装建议
|
73 |
-
from docx import Document
|
74 |
-
except:
|
75 |
-
excption = "word_pip"
|
76 |
-
return False, final_result, page_one, file_manifest, excption
|
77 |
-
for index, fp in enumerate(word_manifest):
|
78 |
-
doc = Document(fp)
|
79 |
-
file_content = '\n'.join([p.text for p in doc.paragraphs])
|
80 |
-
file_content = file_content.encode('utf-8', 'ignore').decode()
|
81 |
-
page_one.append(file_content[:200])
|
82 |
-
final_result.append(file_content)
|
83 |
-
file_manifest.append(os.path.relpath(fp, folder_word))
|
84 |
-
|
85 |
-
return True, final_result, page_one, file_manifest, excption
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/pdf_fns/report_gen_html.py
DELETED
@@ -1,58 +0,0 @@
|
|
1 |
-
from toolbox import update_ui, get_conf, trimmed_format_exc, get_log_folder
|
2 |
-
import os
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
class construct_html():
|
8 |
-
def __init__(self) -> None:
|
9 |
-
self.html_string = ""
|
10 |
-
|
11 |
-
def add_row(self, a, b):
|
12 |
-
from toolbox import markdown_convertion
|
13 |
-
template = """
|
14 |
-
{
|
15 |
-
primary_col: {
|
16 |
-
header: String.raw`__PRIMARY_HEADER__`,
|
17 |
-
msg: String.raw`__PRIMARY_MSG__`,
|
18 |
-
},
|
19 |
-
secondary_rol: {
|
20 |
-
header: String.raw`__SECONDARY_HEADER__`,
|
21 |
-
msg: String.raw`__SECONDARY_MSG__`,
|
22 |
-
}
|
23 |
-
},
|
24 |
-
"""
|
25 |
-
def std(str):
|
26 |
-
str = str.replace(r'`',r'`')
|
27 |
-
if str.endswith("\\"): str += ' '
|
28 |
-
if str.endswith("}"): str += ' '
|
29 |
-
if str.endswith("$"): str += ' '
|
30 |
-
return str
|
31 |
-
|
32 |
-
template_ = template
|
33 |
-
a_lines = a.split('\n')
|
34 |
-
b_lines = b.split('\n')
|
35 |
-
|
36 |
-
if len(a_lines) == 1 or len(a_lines[0]) > 50:
|
37 |
-
template_ = template_.replace("__PRIMARY_HEADER__", std(a[:20]))
|
38 |
-
template_ = template_.replace("__PRIMARY_MSG__", std(markdown_convertion(a)))
|
39 |
-
else:
|
40 |
-
template_ = template_.replace("__PRIMARY_HEADER__", std(a_lines[0]))
|
41 |
-
template_ = template_.replace("__PRIMARY_MSG__", std(markdown_convertion('\n'.join(a_lines[1:]))))
|
42 |
-
|
43 |
-
if len(b_lines) == 1 or len(b_lines[0]) > 50:
|
44 |
-
template_ = template_.replace("__SECONDARY_HEADER__", std(b[:20]))
|
45 |
-
template_ = template_.replace("__SECONDARY_MSG__", std(markdown_convertion(b)))
|
46 |
-
else:
|
47 |
-
template_ = template_.replace("__SECONDARY_HEADER__", std(b_lines[0]))
|
48 |
-
template_ = template_.replace("__SECONDARY_MSG__", std(markdown_convertion('\n'.join(b_lines[1:]))))
|
49 |
-
self.html_string += template_
|
50 |
-
|
51 |
-
def save_file(self, file_name):
|
52 |
-
from toolbox import get_log_folder
|
53 |
-
with open('crazy_functions/pdf_fns/report_template.html', 'r', encoding='utf8') as f:
|
54 |
-
html_template = f.read()
|
55 |
-
html_template = html_template.replace("__TF_ARR__", self.html_string)
|
56 |
-
with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f:
|
57 |
-
f.write(html_template.encode('utf-8', 'ignore').decode())
|
58 |
-
return os.path.join(get_log_folder(), file_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/pdf_fns/report_template.html
DELETED
The diff for this file is too large to render.
See raw diff
|
|
crazy_functions/vector_fns/__init__.py
DELETED
File without changes
|
crazy_functions/vector_fns/general_file_loader.py
DELETED
@@ -1,70 +0,0 @@
|
|
1 |
-
# From project chatglm-langchain
|
2 |
-
|
3 |
-
|
4 |
-
from langchain.document_loaders import UnstructuredFileLoader
|
5 |
-
from langchain.text_splitter import CharacterTextSplitter
|
6 |
-
import re
|
7 |
-
from typing import List
|
8 |
-
|
9 |
-
class ChineseTextSplitter(CharacterTextSplitter):
|
10 |
-
def __init__(self, pdf: bool = False, sentence_size: int = None, **kwargs):
|
11 |
-
super().__init__(**kwargs)
|
12 |
-
self.pdf = pdf
|
13 |
-
self.sentence_size = sentence_size
|
14 |
-
|
15 |
-
def split_text1(self, text: str) -> List[str]:
|
16 |
-
if self.pdf:
|
17 |
-
text = re.sub(r"\n{3,}", "\n", text)
|
18 |
-
text = re.sub('\s', ' ', text)
|
19 |
-
text = text.replace("\n\n", "")
|
20 |
-
sent_sep_pattern = re.compile('([﹒﹔﹖﹗.。!?]["’”」』]{0,2}|(?=["‘“「『]{1,2}|$))') # del :;
|
21 |
-
sent_list = []
|
22 |
-
for ele in sent_sep_pattern.split(text):
|
23 |
-
if sent_sep_pattern.match(ele) and sent_list:
|
24 |
-
sent_list[-1] += ele
|
25 |
-
elif ele:
|
26 |
-
sent_list.append(ele)
|
27 |
-
return sent_list
|
28 |
-
|
29 |
-
def split_text(self, text: str) -> List[str]: ##此处需要进一步优化逻辑
|
30 |
-
if self.pdf:
|
31 |
-
text = re.sub(r"\n{3,}", r"\n", text)
|
32 |
-
text = re.sub('\s', " ", text)
|
33 |
-
text = re.sub("\n\n", "", text)
|
34 |
-
|
35 |
-
text = re.sub(r'([;;.!?。!?\?])([^”’])', r"\1\n\2", text) # 单字符断句符
|
36 |
-
text = re.sub(r'(\.{6})([^"’”」』])', r"\1\n\2", text) # 英文省略号
|
37 |
-
text = re.sub(r'(\…{2})([^"’”」』])', r"\1\n\2", text) # 中文省略号
|
38 |
-
text = re.sub(r'([;;!?。!?\?]["’”」』]{0,2})([^;;!?,。!?\?])', r'\1\n\2', text)
|
39 |
-
# 如果双引号前有终止符,那么双引号才是句子的终点,把分句符\n放到双引号后,注意前面的几句都小心保留了双引号
|
40 |
-
text = text.rstrip() # 段尾如果有多余的\n就去掉它
|
41 |
-
# 很多规则中会考虑分号;,但是这里我把它忽略不计,破折号、英文双引号等同样忽略,需要的再做些简单调整即可。
|
42 |
-
ls = [i for i in text.split("\n") if i]
|
43 |
-
for ele in ls:
|
44 |
-
if len(ele) > self.sentence_size:
|
45 |
-
ele1 = re.sub(r'([,,.]["’”」』]{0,2})([^,,.])', r'\1\n\2', ele)
|
46 |
-
ele1_ls = ele1.split("\n")
|
47 |
-
for ele_ele1 in ele1_ls:
|
48 |
-
if len(ele_ele1) > self.sentence_size:
|
49 |
-
ele_ele2 = re.sub(r'([\n]{1,}| {2,}["’”」』]{0,2})([^\s])', r'\1\n\2', ele_ele1)
|
50 |
-
ele2_ls = ele_ele2.split("\n")
|
51 |
-
for ele_ele2 in ele2_ls:
|
52 |
-
if len(ele_ele2) > self.sentence_size:
|
53 |
-
ele_ele3 = re.sub('( ["’”」』]{0,2})([^ ])', r'\1\n\2', ele_ele2)
|
54 |
-
ele2_id = ele2_ls.index(ele_ele2)
|
55 |
-
ele2_ls = ele2_ls[:ele2_id] + [i for i in ele_ele3.split("\n") if i] + ele2_ls[
|
56 |
-
ele2_id + 1:]
|
57 |
-
ele_id = ele1_ls.index(ele_ele1)
|
58 |
-
ele1_ls = ele1_ls[:ele_id] + [i for i in ele2_ls if i] + ele1_ls[ele_id + 1:]
|
59 |
-
|
60 |
-
id = ls.index(ele)
|
61 |
-
ls = ls[:id] + [i for i in ele1_ls if i] + ls[id + 1:]
|
62 |
-
return ls
|
63 |
-
|
64 |
-
def load_file(filepath, sentence_size):
|
65 |
-
loader = UnstructuredFileLoader(filepath, mode="elements")
|
66 |
-
textsplitter = ChineseTextSplitter(pdf=False, sentence_size=sentence_size)
|
67 |
-
docs = loader.load_and_split(text_splitter=textsplitter)
|
68 |
-
# write_check_file(filepath, docs)
|
69 |
-
return docs
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/vector_fns/vector_database.py
DELETED
@@ -1,338 +0,0 @@
|
|
1 |
-
# From project chatglm-langchain
|
2 |
-
|
3 |
-
import threading
|
4 |
-
from toolbox import Singleton
|
5 |
-
import os
|
6 |
-
import shutil
|
7 |
-
import os
|
8 |
-
import uuid
|
9 |
-
import tqdm
|
10 |
-
from langchain.vectorstores import FAISS
|
11 |
-
from langchain.docstore.document import Document
|
12 |
-
from typing import List, Tuple
|
13 |
-
import numpy as np
|
14 |
-
from crazy_functions.vector_fns.general_file_loader import load_file
|
15 |
-
|
16 |
-
embedding_model_dict = {
|
17 |
-
"ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
|
18 |
-
"ernie-base": "nghuyong/ernie-3.0-base-zh",
|
19 |
-
"text2vec-base": "shibing624/text2vec-base-chinese",
|
20 |
-
"text2vec": "GanymedeNil/text2vec-large-chinese",
|
21 |
-
}
|
22 |
-
|
23 |
-
# Embedding model name
|
24 |
-
EMBEDDING_MODEL = "text2vec"
|
25 |
-
|
26 |
-
# Embedding running device
|
27 |
-
EMBEDDING_DEVICE = "cpu"
|
28 |
-
|
29 |
-
# 基于上下文的prompt模版,请务必保留"{question}"和"{context}"
|
30 |
-
PROMPT_TEMPLATE = """已知信息:
|
31 |
-
{context}
|
32 |
-
|
33 |
-
根据上述已知信息,简洁和专业的来回答用户的问题。如果无法从中得到答案,请说 “根据已知信息无法回答该问题” 或 “没有提供足够的相关信息”,不允许在答案中添加编造成分,答案请使用中文。 问题是:{question}"""
|
34 |
-
|
35 |
-
# 文本分句长度
|
36 |
-
SENTENCE_SIZE = 100
|
37 |
-
|
38 |
-
# 匹配后单段上下文长度
|
39 |
-
CHUNK_SIZE = 250
|
40 |
-
|
41 |
-
# LLM input history length
|
42 |
-
LLM_HISTORY_LEN = 3
|
43 |
-
|
44 |
-
# return top-k text chunk from vector store
|
45 |
-
VECTOR_SEARCH_TOP_K = 5
|
46 |
-
|
47 |
-
# 知识检索内容相关度 Score, 数值范围约为0-1100,如果为0,则不生效,经测试设置为小于500时,匹配结果更精准
|
48 |
-
VECTOR_SEARCH_SCORE_THRESHOLD = 0
|
49 |
-
|
50 |
-
NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nltk_data")
|
51 |
-
|
52 |
-
FLAG_USER_NAME = uuid.uuid4().hex
|
53 |
-
|
54 |
-
# 是否开启跨域,默认为False,如果需要开启,请设置为True
|
55 |
-
# is open cross domain
|
56 |
-
OPEN_CROSS_DOMAIN = False
|
57 |
-
|
58 |
-
def similarity_search_with_score_by_vector(
|
59 |
-
self, embedding: List[float], k: int = 4
|
60 |
-
) -> List[Tuple[Document, float]]:
|
61 |
-
|
62 |
-
def seperate_list(ls: List[int]) -> List[List[int]]:
|
63 |
-
lists = []
|
64 |
-
ls1 = [ls[0]]
|
65 |
-
for i in range(1, len(ls)):
|
66 |
-
if ls[i - 1] + 1 == ls[i]:
|
67 |
-
ls1.append(ls[i])
|
68 |
-
else:
|
69 |
-
lists.append(ls1)
|
70 |
-
ls1 = [ls[i]]
|
71 |
-
lists.append(ls1)
|
72 |
-
return lists
|
73 |
-
|
74 |
-
scores, indices = self.index.search(np.array([embedding], dtype=np.float32), k)
|
75 |
-
docs = []
|
76 |
-
id_set = set()
|
77 |
-
store_len = len(self.index_to_docstore_id)
|
78 |
-
for j, i in enumerate(indices[0]):
|
79 |
-
if i == -1 or 0 < self.score_threshold < scores[0][j]:
|
80 |
-
# This happens when not enough docs are returned.
|
81 |
-
continue
|
82 |
-
_id = self.index_to_docstore_id[i]
|
83 |
-
doc = self.docstore.search(_id)
|
84 |
-
if not self.chunk_conent:
|
85 |
-
if not isinstance(doc, Document):
|
86 |
-
raise ValueError(f"Could not find document for id {_id}, got {doc}")
|
87 |
-
doc.metadata["score"] = int(scores[0][j])
|
88 |
-
docs.append(doc)
|
89 |
-
continue
|
90 |
-
id_set.add(i)
|
91 |
-
docs_len = len(doc.page_content)
|
92 |
-
for k in range(1, max(i, store_len - i)):
|
93 |
-
break_flag = False
|
94 |
-
for l in [i + k, i - k]:
|
95 |
-
if 0 <= l < len(self.index_to_docstore_id):
|
96 |
-
_id0 = self.index_to_docstore_id[l]
|
97 |
-
doc0 = self.docstore.search(_id0)
|
98 |
-
if docs_len + len(doc0.page_content) > self.chunk_size:
|
99 |
-
break_flag = True
|
100 |
-
break
|
101 |
-
elif doc0.metadata["source"] == doc.metadata["source"]:
|
102 |
-
docs_len += len(doc0.page_content)
|
103 |
-
id_set.add(l)
|
104 |
-
if break_flag:
|
105 |
-
break
|
106 |
-
if not self.chunk_conent:
|
107 |
-
return docs
|
108 |
-
if len(id_set) == 0 and self.score_threshold > 0:
|
109 |
-
return []
|
110 |
-
id_list = sorted(list(id_set))
|
111 |
-
id_lists = seperate_list(id_list)
|
112 |
-
for id_seq in id_lists:
|
113 |
-
for id in id_seq:
|
114 |
-
if id == id_seq[0]:
|
115 |
-
_id = self.index_to_docstore_id[id]
|
116 |
-
doc = self.docstore.search(_id)
|
117 |
-
else:
|
118 |
-
_id0 = self.index_to_docstore_id[id]
|
119 |
-
doc0 = self.docstore.search(_id0)
|
120 |
-
doc.page_content += " " + doc0.page_content
|
121 |
-
if not isinstance(doc, Document):
|
122 |
-
raise ValueError(f"Could not find document for id {_id}, got {doc}")
|
123 |
-
doc_score = min([scores[0][id] for id in [indices[0].tolist().index(i) for i in id_seq if i in indices[0]]])
|
124 |
-
doc.metadata["score"] = int(doc_score)
|
125 |
-
docs.append(doc)
|
126 |
-
return docs
|
127 |
-
|
128 |
-
|
129 |
-
class LocalDocQA:
|
130 |
-
llm: object = None
|
131 |
-
embeddings: object = None
|
132 |
-
top_k: int = VECTOR_SEARCH_TOP_K
|
133 |
-
chunk_size: int = CHUNK_SIZE
|
134 |
-
chunk_conent: bool = True
|
135 |
-
score_threshold: int = VECTOR_SEARCH_SCORE_THRESHOLD
|
136 |
-
|
137 |
-
def init_cfg(self,
|
138 |
-
top_k=VECTOR_SEARCH_TOP_K,
|
139 |
-
):
|
140 |
-
|
141 |
-
self.llm = None
|
142 |
-
self.top_k = top_k
|
143 |
-
|
144 |
-
def init_knowledge_vector_store(self,
|
145 |
-
filepath,
|
146 |
-
vs_path: str or os.PathLike = None,
|
147 |
-
sentence_size=SENTENCE_SIZE,
|
148 |
-
text2vec=None):
|
149 |
-
loaded_files = []
|
150 |
-
failed_files = []
|
151 |
-
if isinstance(filepath, str):
|
152 |
-
if not os.path.exists(filepath):
|
153 |
-
print("路径不存在")
|
154 |
-
return None
|
155 |
-
elif os.path.isfile(filepath):
|
156 |
-
file = os.path.split(filepath)[-1]
|
157 |
-
try:
|
158 |
-
docs = load_file(filepath, SENTENCE_SIZE)
|
159 |
-
print(f"{file} 已成功加载")
|
160 |
-
loaded_files.append(filepath)
|
161 |
-
except Exception as e:
|
162 |
-
print(e)
|
163 |
-
print(f"{file} 未能成功加载")
|
164 |
-
return None
|
165 |
-
elif os.path.isdir(filepath):
|
166 |
-
docs = []
|
167 |
-
for file in tqdm(os.listdir(filepath), desc="加载文件"):
|
168 |
-
fullfilepath = os.path.join(filepath, file)
|
169 |
-
try:
|
170 |
-
docs += load_file(fullfilepath, SENTENCE_SIZE)
|
171 |
-
loaded_files.append(fullfilepath)
|
172 |
-
except Exception as e:
|
173 |
-
print(e)
|
174 |
-
failed_files.append(file)
|
175 |
-
|
176 |
-
if len(failed_files) > 0:
|
177 |
-
print("以下文件未能成功加载:")
|
178 |
-
for file in failed_files:
|
179 |
-
print(f"{file}\n")
|
180 |
-
|
181 |
-
else:
|
182 |
-
docs = []
|
183 |
-
for file in filepath:
|
184 |
-
docs += load_file(file, SENTENCE_SIZE)
|
185 |
-
print(f"{file} 已成功加载")
|
186 |
-
loaded_files.append(file)
|
187 |
-
|
188 |
-
if len(docs) > 0:
|
189 |
-
print("文件加载完毕,正在生成向量库")
|
190 |
-
if vs_path and os.path.isdir(vs_path):
|
191 |
-
try:
|
192 |
-
self.vector_store = FAISS.load_local(vs_path, text2vec)
|
193 |
-
self.vector_store.add_documents(docs)
|
194 |
-
except:
|
195 |
-
self.vector_store = FAISS.from_documents(docs, text2vec)
|
196 |
-
else:
|
197 |
-
self.vector_store = FAISS.from_documents(docs, text2vec) # docs 为Document列表
|
198 |
-
|
199 |
-
self.vector_store.save_local(vs_path)
|
200 |
-
return vs_path, loaded_files
|
201 |
-
else:
|
202 |
-
raise RuntimeError("文件加载失败,请检查文件格式是否正确")
|
203 |
-
|
204 |
-
def get_loaded_file(self, vs_path):
|
205 |
-
ds = self.vector_store.docstore
|
206 |
-
return set([ds._dict[k].metadata['source'].split(vs_path)[-1] for k in ds._dict])
|
207 |
-
|
208 |
-
|
209 |
-
# query 查询内容
|
210 |
-
# vs_path 知识库路径
|
211 |
-
# chunk_conent 是否启用上下文关联
|
212 |
-
# score_threshold 搜索匹配score阈值
|
213 |
-
# vector_search_top_k 搜索知识库内容条数,默认搜索5条结果
|
214 |
-
# chunk_sizes 匹配单段内容的连接上下文长度
|
215 |
-
def get_knowledge_based_conent_test(self, query, vs_path, chunk_conent,
|
216 |
-
score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
|
217 |
-
vector_search_top_k=VECTOR_SEARCH_TOP_K, chunk_size=CHUNK_SIZE,
|
218 |
-
text2vec=None):
|
219 |
-
self.vector_store = FAISS.load_local(vs_path, text2vec)
|
220 |
-
self.vector_store.chunk_conent = chunk_conent
|
221 |
-
self.vector_store.score_threshold = score_threshold
|
222 |
-
self.vector_store.chunk_size = chunk_size
|
223 |
-
|
224 |
-
embedding = self.vector_store.embedding_function.embed_query(query)
|
225 |
-
related_docs_with_score = similarity_search_with_score_by_vector(self.vector_store, embedding, k=vector_search_top_k)
|
226 |
-
|
227 |
-
if not related_docs_with_score:
|
228 |
-
response = {"query": query,
|
229 |
-
"source_documents": []}
|
230 |
-
return response, ""
|
231 |
-
# prompt = f"{query}. You should answer this question using information from following documents: \n\n"
|
232 |
-
prompt = f"{query}. 你必须利用以下文档中包含的信息回答这个问题: \n\n---\n\n"
|
233 |
-
prompt += "\n\n".join([f"({k}): " + doc.page_content for k, doc in enumerate(related_docs_with_score)])
|
234 |
-
prompt += "\n\n---\n\n"
|
235 |
-
prompt = prompt.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
|
236 |
-
# print(prompt)
|
237 |
-
response = {"query": query, "source_documents": related_docs_with_score}
|
238 |
-
return response, prompt
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
def construct_vector_store(vs_id, vs_path, files, sentence_size, history, one_conent, one_content_segmentation, text2vec):
|
244 |
-
for file in files:
|
245 |
-
assert os.path.exists(file), "输入文件不存在:" + file
|
246 |
-
import nltk
|
247 |
-
if NLTK_DATA_PATH not in nltk.data.path: nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
|
248 |
-
local_doc_qa = LocalDocQA()
|
249 |
-
local_doc_qa.init_cfg()
|
250 |
-
filelist = []
|
251 |
-
if not os.path.exists(os.path.join(vs_path, vs_id)):
|
252 |
-
os.makedirs(os.path.join(vs_path, vs_id))
|
253 |
-
for file in files:
|
254 |
-
file_name = file.name if not isinstance(file, str) else file
|
255 |
-
filename = os.path.split(file_name)[-1]
|
256 |
-
shutil.copyfile(file_name, os.path.join(vs_path, vs_id, filename))
|
257 |
-
filelist.append(os.path.join(vs_path, vs_id, filename))
|
258 |
-
vs_path, loaded_files = local_doc_qa.init_knowledge_vector_store(filelist, os.path.join(vs_path, vs_id), sentence_size, text2vec)
|
259 |
-
|
260 |
-
if len(loaded_files):
|
261 |
-
file_status = f"已添加 {'、'.join([os.path.split(i)[-1] for i in loaded_files if i])} 内容至知识库,并已加载知识库,请开始提问"
|
262 |
-
else:
|
263 |
-
pass
|
264 |
-
# file_status = "文件未成功加载,请重新上传文件"
|
265 |
-
# print(file_status)
|
266 |
-
return local_doc_qa, vs_path
|
267 |
-
|
268 |
-
@Singleton
|
269 |
-
class knowledge_archive_interface():
|
270 |
-
def __init__(self) -> None:
|
271 |
-
self.threadLock = threading.Lock()
|
272 |
-
self.current_id = ""
|
273 |
-
self.kai_path = None
|
274 |
-
self.qa_handle = None
|
275 |
-
self.text2vec_large_chinese = None
|
276 |
-
|
277 |
-
def get_chinese_text2vec(self):
|
278 |
-
if self.text2vec_large_chinese is None:
|
279 |
-
# < -------------------预热文本向量化模组--------------- >
|
280 |
-
from toolbox import ProxyNetworkActivate
|
281 |
-
print('Checking Text2vec ...')
|
282 |
-
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
283 |
-
with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
|
284 |
-
self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
|
285 |
-
|
286 |
-
return self.text2vec_large_chinese
|
287 |
-
|
288 |
-
|
289 |
-
def feed_archive(self, file_manifest, vs_path, id="default"):
|
290 |
-
self.threadLock.acquire()
|
291 |
-
# import uuid
|
292 |
-
self.current_id = id
|
293 |
-
self.qa_handle, self.kai_path = construct_vector_store(
|
294 |
-
vs_id=self.current_id,
|
295 |
-
vs_path=vs_path,
|
296 |
-
files=file_manifest,
|
297 |
-
sentence_size=100,
|
298 |
-
history=[],
|
299 |
-
one_conent="",
|
300 |
-
one_content_segmentation="",
|
301 |
-
text2vec = self.get_chinese_text2vec(),
|
302 |
-
)
|
303 |
-
self.threadLock.release()
|
304 |
-
|
305 |
-
def get_current_archive_id(self):
|
306 |
-
return self.current_id
|
307 |
-
|
308 |
-
def get_loaded_file(self, vs_path):
|
309 |
-
return self.qa_handle.get_loaded_file(vs_path)
|
310 |
-
|
311 |
-
def answer_with_archive_by_id(self, txt, id, vs_path):
|
312 |
-
self.threadLock.acquire()
|
313 |
-
if not self.current_id == id:
|
314 |
-
self.current_id = id
|
315 |
-
self.qa_handle, self.kai_path = construct_vector_store(
|
316 |
-
vs_id=self.current_id,
|
317 |
-
vs_path=vs_path,
|
318 |
-
files=[],
|
319 |
-
sentence_size=100,
|
320 |
-
history=[],
|
321 |
-
one_conent="",
|
322 |
-
one_content_segmentation="",
|
323 |
-
text2vec = self.get_chinese_text2vec(),
|
324 |
-
)
|
325 |
-
VECTOR_SEARCH_SCORE_THRESHOLD = 0
|
326 |
-
VECTOR_SEARCH_TOP_K = 4
|
327 |
-
CHUNK_SIZE = 512
|
328 |
-
resp, prompt = self.qa_handle.get_knowledge_based_conent_test(
|
329 |
-
query = txt,
|
330 |
-
vs_path = self.kai_path,
|
331 |
-
score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
|
332 |
-
vector_search_top_k=VECTOR_SEARCH_TOP_K,
|
333 |
-
chunk_conent=True,
|
334 |
-
chunk_size=CHUNK_SIZE,
|
335 |
-
text2vec = self.get_chinese_text2vec(),
|
336 |
-
)
|
337 |
-
self.threadLock.release()
|
338 |
-
return resp, prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/vt_fns/vt_call_plugin.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from pydantic import BaseModel, Field
|
2 |
from typing import List
|
3 |
from toolbox import update_ui_lastest_msg, disable_auto_promotion
|
4 |
-
from
|
5 |
from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
|
6 |
import copy, json, pickle, os, sys, time
|
7 |
|
|
|
1 |
from pydantic import BaseModel, Field
|
2 |
from typing import List
|
3 |
from toolbox import update_ui_lastest_msg, disable_auto_promotion
|
4 |
+
from request_llm.bridge_all import predict_no_ui_long_connection
|
5 |
from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
|
6 |
import copy, json, pickle, os, sys, time
|
7 |
|
crazy_functions/vt_fns/vt_modify_config.py
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
from pydantic import BaseModel, Field
|
2 |
from typing import List
|
3 |
from toolbox import update_ui_lastest_msg, get_conf
|
4 |
-
from
|
5 |
from crazy_functions.json_fns.pydantic_io import GptJsonIO
|
6 |
import copy, json, pickle, os, sys
|
7 |
|
8 |
|
9 |
def modify_configuration_hot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
|
10 |
-
ALLOW_RESET_CONFIG = get_conf('ALLOW_RESET_CONFIG')
|
11 |
if not ALLOW_RESET_CONFIG:
|
12 |
yield from update_ui_lastest_msg(
|
13 |
lastmsg=f"当前配置不允许被修改!如需激活本功能,请在config.py中设置ALLOW_RESET_CONFIG=True后重启软件。",
|
@@ -66,7 +66,7 @@ def modify_configuration_hot(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
|
|
66 |
)
|
67 |
|
68 |
def modify_configuration_reboot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
|
69 |
-
ALLOW_RESET_CONFIG = get_conf('ALLOW_RESET_CONFIG')
|
70 |
if not ALLOW_RESET_CONFIG:
|
71 |
yield from update_ui_lastest_msg(
|
72 |
lastmsg=f"当前配置不允许被修改!如需激活本功能,请在config.py中设置ALLOW_RESET_CONFIG=True后重启软件。",
|
|
|
1 |
from pydantic import BaseModel, Field
|
2 |
from typing import List
|
3 |
from toolbox import update_ui_lastest_msg, get_conf
|
4 |
+
from request_llm.bridge_all import predict_no_ui_long_connection
|
5 |
from crazy_functions.json_fns.pydantic_io import GptJsonIO
|
6 |
import copy, json, pickle, os, sys
|
7 |
|
8 |
|
9 |
def modify_configuration_hot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
|
10 |
+
ALLOW_RESET_CONFIG, = get_conf('ALLOW_RESET_CONFIG')
|
11 |
if not ALLOW_RESET_CONFIG:
|
12 |
yield from update_ui_lastest_msg(
|
13 |
lastmsg=f"当前配置不允许被修改!如需激活本功能,请在config.py中设置ALLOW_RESET_CONFIG=True后重启软件。",
|
|
|
66 |
)
|
67 |
|
68 |
def modify_configuration_reboot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
|
69 |
+
ALLOW_RESET_CONFIG, = get_conf('ALLOW_RESET_CONFIG')
|
70 |
if not ALLOW_RESET_CONFIG:
|
71 |
yield from update_ui_lastest_msg(
|
72 |
lastmsg=f"当前配置不允许被修改!如需激活本功能,请在config.py中设置ALLOW_RESET_CONFIG=True后重启软件。",
|
crazy_functions/下载arxiv论文翻译摘要.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
from toolbox import update_ui, get_log_folder
|
2 |
from toolbox import write_history_to_file, promote_file_to_downloadzone
|
3 |
-
from toolbox import CatchException,
|
4 |
import re, requests, unicodedata, os
|
5 |
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
6 |
def download_arxiv_(url_pdf):
|
@@ -43,7 +43,7 @@ def download_arxiv_(url_pdf):
|
|
43 |
file_path = download_dir+title_str
|
44 |
|
45 |
print('下载中')
|
46 |
-
proxies = get_conf('proxies')
|
47 |
r = requests.get(requests_pdf_url, proxies=proxies)
|
48 |
with open(file_path, 'wb+') as f:
|
49 |
f.write(r.content)
|
@@ -77,7 +77,7 @@ def get_name(_url_):
|
|
77 |
# print('在缓存中')
|
78 |
# return arxiv_recall[_url_]
|
79 |
|
80 |
-
proxies = get_conf('proxies')
|
81 |
res = requests.get(_url_, proxies=proxies)
|
82 |
|
83 |
bs = BeautifulSoup(res.text, 'html.parser')
|
@@ -130,7 +130,7 @@ def get_name(_url_):
|
|
130 |
|
131 |
|
132 |
@CatchException
|
133 |
-
def 下载arxiv论文并翻译摘要(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt,
|
134 |
|
135 |
CRAZY_FUNCTION_INFO = "下载arxiv论文并翻译摘要,函数插件作者[binary-husky]。正在提取摘要并下载PDF文档……"
|
136 |
import glob
|
@@ -144,7 +144,7 @@ def 下载arxiv论文并翻译摘要(txt, llm_kwargs, plugin_kwargs, chatbot, hi
|
|
144 |
try:
|
145 |
import bs4
|
146 |
except:
|
147 |
-
|
148 |
a = f"解析项目: {txt}",
|
149 |
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade beautifulsoup4```。")
|
150 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
@@ -157,7 +157,7 @@ def 下载arxiv论文并翻译摘要(txt, llm_kwargs, plugin_kwargs, chatbot, hi
|
|
157 |
try:
|
158 |
pdf_path, info = download_arxiv_(txt)
|
159 |
except:
|
160 |
-
|
161 |
a = f"解析项目: {txt}",
|
162 |
b = f"下载pdf文件未成功")
|
163 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
|
|
1 |
from toolbox import update_ui, get_log_folder
|
2 |
from toolbox import write_history_to_file, promote_file_to_downloadzone
|
3 |
+
from toolbox import CatchException, report_execption, get_conf
|
4 |
import re, requests, unicodedata, os
|
5 |
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
6 |
def download_arxiv_(url_pdf):
|
|
|
43 |
file_path = download_dir+title_str
|
44 |
|
45 |
print('下载中')
|
46 |
+
proxies, = get_conf('proxies')
|
47 |
r = requests.get(requests_pdf_url, proxies=proxies)
|
48 |
with open(file_path, 'wb+') as f:
|
49 |
f.write(r.content)
|
|
|
77 |
# print('在缓存中')
|
78 |
# return arxiv_recall[_url_]
|
79 |
|
80 |
+
proxies, = get_conf('proxies')
|
81 |
res = requests.get(_url_, proxies=proxies)
|
82 |
|
83 |
bs = BeautifulSoup(res.text, 'html.parser')
|
|
|
130 |
|
131 |
|
132 |
@CatchException
|
133 |
+
def 下载arxiv论文并翻译摘要(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
134 |
|
135 |
CRAZY_FUNCTION_INFO = "下载arxiv论文并翻译摘要,函数插件作者[binary-husky]。正在提取摘要并下载PDF文档……"
|
136 |
import glob
|
|
|
144 |
try:
|
145 |
import bs4
|
146 |
except:
|
147 |
+
report_execption(chatbot, history,
|
148 |
a = f"解析项目: {txt}",
|
149 |
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade beautifulsoup4```。")
|
150 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
|
|
157 |
try:
|
158 |
pdf_path, info = download_arxiv_(txt)
|
159 |
except:
|
160 |
+
report_execption(chatbot, history,
|
161 |
a = f"解析项目: {txt}",
|
162 |
b = f"下载pdf文件未成功")
|
163 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
crazy_functions/互动小游戏.py
DELETED
@@ -1,40 +0,0 @@
|
|
1 |
-
from toolbox import CatchException, update_ui, update_ui_lastest_msg
|
2 |
-
from crazy_functions.multi_stage.multi_stage_utils import GptAcademicGameBaseState
|
3 |
-
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
4 |
-
from request_llms.bridge_all import predict_no_ui_long_connection
|
5 |
-
from crazy_functions.game_fns.game_utils import get_code_block, is_same_thing
|
6 |
-
|
7 |
-
@CatchException
|
8 |
-
def 随机小游戏(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
9 |
-
from crazy_functions.game_fns.game_interactive_story import MiniGame_ResumeStory
|
10 |
-
# 清空历史
|
11 |
-
history = []
|
12 |
-
# 选择游戏
|
13 |
-
cls = MiniGame_ResumeStory
|
14 |
-
# 如果之前已经初始化了游戏实例,则继续该实例;否则重新初始化
|
15 |
-
state = cls.sync_state(chatbot,
|
16 |
-
llm_kwargs,
|
17 |
-
cls,
|
18 |
-
plugin_name='MiniGame_ResumeStory',
|
19 |
-
callback_fn='crazy_functions.互动小游戏->随机小游戏',
|
20 |
-
lock_plugin=True
|
21 |
-
)
|
22 |
-
yield from state.continue_game(prompt, chatbot, history)
|
23 |
-
|
24 |
-
|
25 |
-
@CatchException
|
26 |
-
def 随机小游戏1(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
27 |
-
from crazy_functions.game_fns.game_ascii_art import MiniGame_ASCII_Art
|
28 |
-
# 清空历史
|
29 |
-
history = []
|
30 |
-
# 选择游戏
|
31 |
-
cls = MiniGame_ASCII_Art
|
32 |
-
# 如果之前已经初始化了游戏实例,则继续该实例;否则重新初始化
|
33 |
-
state = cls.sync_state(chatbot,
|
34 |
-
llm_kwargs,
|
35 |
-
cls,
|
36 |
-
plugin_name='MiniGame_ASCII_Art',
|
37 |
-
callback_fn='crazy_functions.互动小游戏->随机小游戏1',
|
38 |
-
lock_plugin=True
|
39 |
-
)
|
40 |
-
yield from state.continue_game(prompt, chatbot, history)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/交互功能函数模板.py
CHANGED
@@ -3,7 +3,7 @@ from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
|
3 |
|
4 |
|
5 |
@CatchException
|
6 |
-
def 交互功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt,
|
7 |
"""
|
8 |
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
9 |
llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
|
@@ -11,7 +11,7 @@ def 交互功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
|
|
11 |
chatbot 聊天显示框的句柄,用于显示给用户
|
12 |
history 聊天历史,前情提要
|
13 |
system_prompt 给gpt的静默提醒
|
14 |
-
|
15 |
"""
|
16 |
history = [] # 清空历史,以免输入溢出
|
17 |
chatbot.append(("这是什么功能?", "交互功能函数模板。在执行完成之后, 可以将自身的状态存储到cookie中, 等待用户的再次调用。"))
|
|
|
3 |
|
4 |
|
5 |
@CatchException
|
6 |
+
def 交互功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
7 |
"""
|
8 |
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
9 |
llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
|
|
|
11 |
chatbot 聊天显示框的句柄,用于显示给用户
|
12 |
history 聊天历史,前情提要
|
13 |
system_prompt 给gpt的静默提醒
|
14 |
+
web_port 当前软件运行的端口号
|
15 |
"""
|
16 |
history = [] # 清空历史,以免输入溢出
|
17 |
chatbot.append(("这是什么功能?", "交互功能函数模板。在执行完成之后, 可以将自身的状态存储到cookie中, 等待用户的再次调用。"))
|
crazy_functions/函数动态生成.py
DELETED
@@ -1,252 +0,0 @@
|
|
1 |
-
# 本源代码中, ⭐ = 关键步骤
|
2 |
-
"""
|
3 |
-
测试:
|
4 |
-
- 裁剪图像,保留下半部分
|
5 |
-
- 交换图像的蓝色通道和红色通道
|
6 |
-
- 将图像转为灰度图像
|
7 |
-
- 将csv文件转excel表格
|
8 |
-
|
9 |
-
Testing:
|
10 |
-
- Crop the image, keeping the bottom half.
|
11 |
-
- Swap the blue channel and red channel of the image.
|
12 |
-
- Convert the image to grayscale.
|
13 |
-
- Convert the CSV file to an Excel spreadsheet.
|
14 |
-
"""
|
15 |
-
|
16 |
-
|
17 |
-
from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, is_the_upload_folder
|
18 |
-
from toolbox import promote_file_to_downloadzone, get_log_folder, update_ui_lastest_msg
|
19 |
-
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_plugin_arg
|
20 |
-
from .crazy_utils import input_clipping, try_install_deps
|
21 |
-
from crazy_functions.gen_fns.gen_fns_shared import is_function_successfully_generated
|
22 |
-
from crazy_functions.gen_fns.gen_fns_shared import get_class_name
|
23 |
-
from crazy_functions.gen_fns.gen_fns_shared import subprocess_worker
|
24 |
-
from crazy_functions.gen_fns.gen_fns_shared import try_make_module
|
25 |
-
import os
|
26 |
-
import time
|
27 |
-
import glob
|
28 |
-
import multiprocessing
|
29 |
-
|
30 |
-
templete = """
|
31 |
-
```python
|
32 |
-
import ... # Put dependencies here, e.g. import numpy as np.
|
33 |
-
|
34 |
-
class TerminalFunction(object): # Do not change the name of the class, The name of the class must be `TerminalFunction`
|
35 |
-
|
36 |
-
def run(self, path): # The name of the function must be `run`, it takes only a positional argument.
|
37 |
-
# rewrite the function you have just written here
|
38 |
-
...
|
39 |
-
return generated_file_path
|
40 |
-
```
|
41 |
-
"""
|
42 |
-
|
43 |
-
def inspect_dependency(chatbot, history):
|
44 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
45 |
-
return True
|
46 |
-
|
47 |
-
def get_code_block(reply):
|
48 |
-
import re
|
49 |
-
pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
|
50 |
-
matches = re.findall(pattern, reply) # find all code blocks in text
|
51 |
-
if len(matches) == 1:
|
52 |
-
return matches[0].strip('python') # code block
|
53 |
-
for match in matches:
|
54 |
-
if 'class TerminalFunction' in match:
|
55 |
-
return match.strip('python') # code block
|
56 |
-
raise RuntimeError("GPT is not generating proper code.")
|
57 |
-
|
58 |
-
def gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history):
|
59 |
-
# 输入
|
60 |
-
prompt_compose = [
|
61 |
-
f'Your job:\n'
|
62 |
-
f'1. write a single Python function, which takes a path of a `{file_type}` file as the only argument and returns a `string` containing the result of analysis or the path of generated files. \n',
|
63 |
-
f"2. You should write this function to perform following task: " + txt + "\n",
|
64 |
-
f"3. Wrap the output python function with markdown codeblock."
|
65 |
-
]
|
66 |
-
i_say = "".join(prompt_compose)
|
67 |
-
demo = []
|
68 |
-
|
69 |
-
# 第一步
|
70 |
-
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
71 |
-
inputs=i_say, inputs_show_user=i_say,
|
72 |
-
llm_kwargs=llm_kwargs, chatbot=chatbot, history=demo,
|
73 |
-
sys_prompt= r"You are a world-class programmer."
|
74 |
-
)
|
75 |
-
history.extend([i_say, gpt_say])
|
76 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
77 |
-
|
78 |
-
# 第二步
|
79 |
-
prompt_compose = [
|
80 |
-
"If previous stage is successful, rewrite the function you have just written to satisfy following templete: \n",
|
81 |
-
templete
|
82 |
-
]
|
83 |
-
i_say = "".join(prompt_compose); inputs_show_user = "If previous stage is successful, rewrite the function you have just written to satisfy executable templete. "
|
84 |
-
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
85 |
-
inputs=i_say, inputs_show_user=inputs_show_user,
|
86 |
-
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
87 |
-
sys_prompt= r"You are a programmer. You need to replace `...` with valid packages, do not give `...` in your answer!"
|
88 |
-
)
|
89 |
-
code_to_return = gpt_say
|
90 |
-
history.extend([i_say, gpt_say])
|
91 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
92 |
-
|
93 |
-
# # 第三步
|
94 |
-
# i_say = "Please list to packages to install to run the code above. Then show me how to use `try_install_deps` function to install them."
|
95 |
-
# i_say += 'For instance. `try_install_deps(["opencv-python", "scipy", "numpy"])`'
|
96 |
-
# installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
97 |
-
# inputs=i_say, inputs_show_user=inputs_show_user,
|
98 |
-
# llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
99 |
-
# sys_prompt= r"You are a programmer."
|
100 |
-
# )
|
101 |
-
|
102 |
-
# # # 第三步
|
103 |
-
# i_say = "Show me how to use `pip` to install packages to run the code above. "
|
104 |
-
# i_say += 'For instance. `pip install -r opencv-python scipy numpy`'
|
105 |
-
# installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
106 |
-
# inputs=i_say, inputs_show_user=i_say,
|
107 |
-
# llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
108 |
-
# sys_prompt= r"You are a programmer."
|
109 |
-
# )
|
110 |
-
installation_advance = ""
|
111 |
-
|
112 |
-
return code_to_return, installation_advance, txt, file_type, llm_kwargs, chatbot, history
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
def for_immediate_show_off_when_possible(file_type, fp, chatbot):
|
118 |
-
if file_type in ['png', 'jpg']:
|
119 |
-
image_path = os.path.abspath(fp)
|
120 |
-
chatbot.append(['这是一张图片, 展示如下:',
|
121 |
-
f'本地文件地址: <br/>`{image_path}`<br/>'+
|
122 |
-
f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
|
123 |
-
])
|
124 |
-
return chatbot
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
def have_any_recent_upload_files(chatbot):
|
129 |
-
_5min = 5 * 60
|
130 |
-
if not chatbot: return False # chatbot is None
|
131 |
-
most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
|
132 |
-
if not most_recent_uploaded: return False # most_recent_uploaded is None
|
133 |
-
if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
|
134 |
-
else: return False # most_recent_uploaded is too old
|
135 |
-
|
136 |
-
def get_recent_file_prompt_support(chatbot):
|
137 |
-
most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
|
138 |
-
path = most_recent_uploaded['path']
|
139 |
-
return path
|
140 |
-
|
141 |
-
@CatchException
|
142 |
-
def 函数动态生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
143 |
-
"""
|
144 |
-
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
145 |
-
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
146 |
-
plugin_kwargs 插件模型的参数,暂时没有用武之地
|
147 |
-
chatbot 聊天显示框的句柄,用于显示给用户
|
148 |
-
history 聊天历史,前情提要
|
149 |
-
system_prompt 给gpt的静默提醒
|
150 |
-
user_request 当前用户的请求信息(IP地址等)
|
151 |
-
"""
|
152 |
-
|
153 |
-
# 清空历史
|
154 |
-
history = []
|
155 |
-
|
156 |
-
# 基本信息:功能、贡献者
|
157 |
-
chatbot.append(["正在启动: 插件动态生成插件", "插件动态生成, 执行开始, 作者Binary-Husky."])
|
158 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
159 |
-
|
160 |
-
# ⭐ 文件上传区是否有东西
|
161 |
-
# 1. 如果有文件: 作为函数参数
|
162 |
-
# 2. 如果没有文件:需要用GPT提取参数 (太懒了,以后再写,虚空终端已经实现了类似的代码)
|
163 |
-
file_list = []
|
164 |
-
if get_plugin_arg(plugin_kwargs, key="file_path_arg", default=False):
|
165 |
-
file_path = get_plugin_arg(plugin_kwargs, key="file_path_arg", default=None)
|
166 |
-
file_list.append(file_path)
|
167 |
-
yield from update_ui_lastest_msg(f"当前文件: {file_path}", chatbot, history, 1)
|
168 |
-
elif have_any_recent_upload_files(chatbot):
|
169 |
-
file_dir = get_recent_file_prompt_support(chatbot)
|
170 |
-
file_list = glob.glob(os.path.join(file_dir, '**/*'), recursive=True)
|
171 |
-
yield from update_ui_lastest_msg(f"当前文件处理列表: {file_list}", chatbot, history, 1)
|
172 |
-
else:
|
173 |
-
chatbot.append(["文件检索", "没有发现任何近期上传的文件。"])
|
174 |
-
yield from update_ui_lastest_msg("没有发现任何近期上传的文件。", chatbot, history, 1)
|
175 |
-
return # 2. 如果没有文件
|
176 |
-
if len(file_list) == 0:
|
177 |
-
chatbot.append(["文件检索", "没有发现任何近期上传的文件。"])
|
178 |
-
yield from update_ui_lastest_msg("没有发现任何近期上传的文件。", chatbot, history, 1)
|
179 |
-
return # 2. 如果没有文件
|
180 |
-
|
181 |
-
# 读取文件
|
182 |
-
file_type = file_list[0].split('.')[-1]
|
183 |
-
|
184 |
-
# 粗心检查
|
185 |
-
if is_the_upload_folder(txt):
|
186 |
-
yield from update_ui_lastest_msg(f"请在输入框内填写需求, 然后再次点击该插件! 至于您的文件,不用担心, 文件路径 {txt} 已经被记忆. ", chatbot, history, 1)
|
187 |
-
return
|
188 |
-
|
189 |
-
# 开始干正事
|
190 |
-
MAX_TRY = 3
|
191 |
-
for j in range(MAX_TRY): # 最多重试5次
|
192 |
-
traceback = ""
|
193 |
-
try:
|
194 |
-
# ⭐ 开始啦 !
|
195 |
-
code, installation_advance, txt, file_type, llm_kwargs, chatbot, history = \
|
196 |
-
yield from gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history)
|
197 |
-
chatbot.append(["代码生成阶段结束", ""])
|
198 |
-
yield from update_ui_lastest_msg(f"正在验证上述代码的有效性 ...", chatbot, history, 1)
|
199 |
-
# ⭐ 分离代码块
|
200 |
-
code = get_code_block(code)
|
201 |
-
# ⭐ 检查模块
|
202 |
-
ok, traceback = try_make_module(code, chatbot)
|
203 |
-
# 搞定代码生成
|
204 |
-
if ok: break
|
205 |
-
except Exception as e:
|
206 |
-
if not traceback: traceback = trimmed_format_exc()
|
207 |
-
# 处理异常
|
208 |
-
if not traceback: traceback = trimmed_format_exc()
|
209 |
-
yield from update_ui_lastest_msg(f"第 {j+1}/{MAX_TRY} 次代码生成尝试, 失败了~ 别担心, 我们5秒后再试一次... \n\n此次我们的错误追踪是\n```\n{traceback}\n```\n", chatbot, history, 5)
|
210 |
-
|
211 |
-
# 代码生成结束, 开始执行
|
212 |
-
TIME_LIMIT = 15
|
213 |
-
yield from update_ui_lastest_msg(f"开始创建新进程并执行代码! 时间限制 {TIME_LIMIT} 秒. 请等待任务完成... ", chatbot, history, 1)
|
214 |
-
manager = multiprocessing.Manager()
|
215 |
-
return_dict = manager.dict()
|
216 |
-
|
217 |
-
# ⭐ 到最后一步了,开始逐个文件进行处理
|
218 |
-
for file_path in file_list:
|
219 |
-
if os.path.exists(file_path):
|
220 |
-
chatbot.append([f"正在处理文件: {file_path}", f"请稍等..."])
|
221 |
-
chatbot = for_immediate_show_off_when_possible(file_type, file_path, chatbot)
|
222 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
223 |
-
else:
|
224 |
-
continue
|
225 |
-
|
226 |
-
# ⭐⭐⭐ subprocess_worker ⭐⭐⭐
|
227 |
-
p = multiprocessing.Process(target=subprocess_worker, args=(code, file_path, return_dict))
|
228 |
-
# ⭐ 开始执行,时间限制TIME_LIMIT
|
229 |
-
p.start(); p.join(timeout=TIME_LIMIT)
|
230 |
-
if p.is_alive(): p.terminate(); p.join()
|
231 |
-
p.close()
|
232 |
-
res = return_dict['result']
|
233 |
-
success = return_dict['success']
|
234 |
-
traceback = return_dict['traceback']
|
235 |
-
if not success:
|
236 |
-
if not traceback: traceback = trimmed_format_exc()
|
237 |
-
chatbot.append(["执行失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
|
238 |
-
# chatbot.append(["如果是缺乏依赖,请参考以下建议", installation_advance])
|
239 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
240 |
-
return
|
241 |
-
|
242 |
-
# 顺利完成,收尾
|
243 |
-
res = str(res)
|
244 |
-
if os.path.exists(res):
|
245 |
-
chatbot.append(["执行成功了,结果是一个有效文件", "结果:" + res])
|
246 |
-
new_file_path = promote_file_to_downloadzone(res, chatbot=chatbot)
|
247 |
-
chatbot = for_immediate_show_off_when_possible(file_type, new_file_path, chatbot)
|
248 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
249 |
-
else:
|
250 |
-
chatbot.append(["执行成功了,结果是一个字符串", "结果:" + res])
|
251 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
252 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/命令行助手.py
CHANGED
@@ -4,7 +4,7 @@ from .crazy_utils import input_clipping
|
|
4 |
import copy, json
|
5 |
|
6 |
@CatchException
|
7 |
-
def 命令行助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt,
|
8 |
"""
|
9 |
txt 输入栏用户输入的文本, 例如需要翻译的一段话, 再例如一个包含了待处理文件的路径
|
10 |
llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
|
@@ -12,7 +12,7 @@ def 命令行助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
|
|
12 |
chatbot 聊天显示框的句柄, 用于显示给用户
|
13 |
history 聊天历史, 前情提要
|
14 |
system_prompt 给gpt的静默提醒
|
15 |
-
|
16 |
"""
|
17 |
# 清空历史, 以免输入溢出
|
18 |
history = []
|
|
|
4 |
import copy, json
|
5 |
|
6 |
@CatchException
|
7 |
+
def 命令行助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
8 |
"""
|
9 |
txt 输入栏用户输入的文本, 例如需要翻译的一段话, 再例如一个包含了待处理文件的路径
|
10 |
llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
|
|
|
12 |
chatbot 聊天显示框的句柄, 用于显示给用户
|
13 |
history 聊天历史, 前情提要
|
14 |
system_prompt 给gpt的静默提醒
|
15 |
+
web_port 当前软件运行的端口号
|
16 |
"""
|
17 |
# 清空历史, 以免输入溢出
|
18 |
history = []
|
crazy_functions/图片生成.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1 |
from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log_folder
|
2 |
-
from
|
|
|
3 |
|
4 |
|
5 |
-
def gen_image(llm_kwargs, prompt, resolution="
|
6 |
import requests, json, time, os
|
7 |
-
from
|
8 |
|
9 |
-
proxies = get_conf('proxies')
|
10 |
# Set up OpenAI API key and model
|
11 |
api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
|
12 |
chat_endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
|
@@ -22,13 +23,8 @@ def gen_image(llm_kwargs, prompt, resolution="1024x1024", model="dall-e-2", qual
|
|
22 |
'prompt': prompt,
|
23 |
'n': 1,
|
24 |
'size': resolution,
|
25 |
-
'model': model,
|
26 |
'response_format': 'url'
|
27 |
}
|
28 |
-
if quality is not None:
|
29 |
-
data['quality'] = quality
|
30 |
-
if style is not None:
|
31 |
-
data['style'] = style
|
32 |
response = requests.post(url, headers=headers, json=data, proxies=proxies)
|
33 |
print(response.content)
|
34 |
try:
|
@@ -46,72 +42,23 @@ def gen_image(llm_kwargs, prompt, resolution="1024x1024", model="dall-e-2", qual
|
|
46 |
return image_url, file_path+file_name
|
47 |
|
48 |
|
49 |
-
def edit_image(llm_kwargs, prompt, image_path, resolution="1024x1024", model="dall-e-2"):
|
50 |
-
import requests, json, time, os
|
51 |
-
from request_llms.bridge_all import model_info
|
52 |
-
|
53 |
-
proxies = get_conf('proxies')
|
54 |
-
api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
|
55 |
-
chat_endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
|
56 |
-
# 'https://api.openai.com/v1/chat/completions'
|
57 |
-
img_endpoint = chat_endpoint.replace('chat/completions','images/edits')
|
58 |
-
# # Generate the image
|
59 |
-
url = img_endpoint
|
60 |
-
n = 1
|
61 |
-
headers = {
|
62 |
-
'Authorization': f"Bearer {api_key}",
|
63 |
-
}
|
64 |
-
make_transparent(image_path, image_path+'.tsp.png')
|
65 |
-
make_square_image(image_path+'.tsp.png', image_path+'.tspsq.png')
|
66 |
-
resize_image(image_path+'.tspsq.png', image_path+'.ready.png', max_size=1024)
|
67 |
-
image_path = image_path+'.ready.png'
|
68 |
-
with open(image_path, 'rb') as f:
|
69 |
-
file_content = f.read()
|
70 |
-
files = {
|
71 |
-
'image': (os.path.basename(image_path), file_content),
|
72 |
-
# 'mask': ('mask.png', open('mask.png', 'rb'))
|
73 |
-
'prompt': (None, prompt),
|
74 |
-
"n": (None, str(n)),
|
75 |
-
'size': (None, resolution),
|
76 |
-
}
|
77 |
-
|
78 |
-
response = requests.post(url, headers=headers, files=files, proxies=proxies)
|
79 |
-
print(response.content)
|
80 |
-
try:
|
81 |
-
image_url = json.loads(response.content.decode('utf8'))['data'][0]['url']
|
82 |
-
except:
|
83 |
-
raise RuntimeError(response.content.decode())
|
84 |
-
# 文件保存到本地
|
85 |
-
r = requests.get(image_url, proxies=proxies)
|
86 |
-
file_path = f'{get_log_folder()}/image_gen/'
|
87 |
-
os.makedirs(file_path, exist_ok=True)
|
88 |
-
file_name = 'Image' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.png'
|
89 |
-
with open(file_path+file_name, 'wb+') as f: f.write(r.content)
|
90 |
-
|
91 |
-
|
92 |
-
return image_url, file_path+file_name
|
93 |
-
|
94 |
|
95 |
@CatchException
|
96 |
-
def 图片生成
|
97 |
"""
|
98 |
-
txt
|
99 |
-
llm_kwargs gpt
|
100 |
-
plugin_kwargs
|
101 |
-
chatbot
|
102 |
-
history
|
103 |
system_prompt 给gpt的静默提醒
|
104 |
-
|
105 |
"""
|
106 |
-
history = [] #
|
107 |
-
|
108 |
-
|
109 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新
|
110 |
-
return
|
111 |
-
chatbot.append(("您正在调用“图像生成”插件。", "[Local Message] 生成图像, 请先把模型切换至gpt-*。如果中文Prompt效果不理想, 请尝试英文Prompt。正在处理中 ....."))
|
112 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
113 |
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
114 |
-
resolution = plugin_kwargs.get("advanced_arg", '
|
115 |
image_url, image_path = gen_image(llm_kwargs, prompt, resolution)
|
116 |
chatbot.append([prompt,
|
117 |
f'图像中转网址: <br/>`{image_url}`<br/>'+
|
@@ -119,158 +66,4 @@ def 图片生成_DALLE2(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys
|
|
119 |
f'本地文件地址: <br/>`{image_path}`<br/>'+
|
120 |
f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
|
121 |
])
|
122 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新
|
123 |
-
|
124 |
-
|
125 |
-
@CatchException
|
126 |
-
def 图片生成_DALLE3(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
127 |
-
history = [] # 清空历史,以免输入溢出
|
128 |
-
if prompt.strip() == "":
|
129 |
-
chatbot.append((prompt, "[Local Message] 图像生成提示为空白,请在“输入区”输入图像生成提示。"))
|
130 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新
|
131 |
-
return
|
132 |
-
chatbot.append(("您正在调用“图像生成”插件。", "[Local Message] 生成图像, 请先把模型切换至gpt-*。如果中文Prompt效果不理想, 请尝试英文Prompt。正在处理中 ....."))
|
133 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
134 |
-
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
135 |
-
resolution_arg = plugin_kwargs.get("advanced_arg", '1024x1024-standard-vivid').lower()
|
136 |
-
parts = resolution_arg.split('-')
|
137 |
-
resolution = parts[0] # 解析分辨率
|
138 |
-
quality = 'standard' # 质量与风格默认值
|
139 |
-
style = 'vivid'
|
140 |
-
# 遍历检查是否有额外参数
|
141 |
-
for part in parts[1:]:
|
142 |
-
if part in ['hd', 'standard']:
|
143 |
-
quality = part
|
144 |
-
elif part in ['vivid', 'natural']:
|
145 |
-
style = part
|
146 |
-
image_url, image_path = gen_image(llm_kwargs, prompt, resolution, model="dall-e-3", quality=quality, style=style)
|
147 |
-
chatbot.append([prompt,
|
148 |
-
f'图像中转网址: <br/>`{image_url}`<br/>'+
|
149 |
-
f'中转网址预览: <br/><div align="center"><img src="{image_url}"></div>'
|
150 |
-
f'本地文件地址: <br/>`{image_path}`<br/>'+
|
151 |
-
f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
|
152 |
-
])
|
153 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新
|
154 |
-
|
155 |
-
|
156 |
-
class ImageEditState(GptAcademicState):
|
157 |
-
# 尚未完成
|
158 |
-
def get_image_file(self, x):
|
159 |
-
import os, glob
|
160 |
-
if len(x) == 0: return False, None
|
161 |
-
if not os.path.exists(x): return False, None
|
162 |
-
if x.endswith('.png'): return True, x
|
163 |
-
file_manifest = [f for f in glob.glob(f'{x}/**/*.png', recursive=True)]
|
164 |
-
confirm = (len(file_manifest) >= 1 and file_manifest[0].endswith('.png') and os.path.exists(file_manifest[0]))
|
165 |
-
file = None if not confirm else file_manifest[0]
|
166 |
-
return confirm, file
|
167 |
-
|
168 |
-
def lock_plugin(self, chatbot):
|
169 |
-
chatbot._cookies['lock_plugin'] = 'crazy_functions.图片生成->图片修改_DALLE2'
|
170 |
-
self.dump_state(chatbot)
|
171 |
-
|
172 |
-
def unlock_plugin(self, chatbot):
|
173 |
-
self.reset()
|
174 |
-
chatbot._cookies['lock_plugin'] = None
|
175 |
-
self.dump_state(chatbot)
|
176 |
-
|
177 |
-
def get_resolution(self, x):
|
178 |
-
return (x in ['256x256', '512x512', '1024x1024']), x
|
179 |
-
|
180 |
-
def get_prompt(self, x):
|
181 |
-
confirm = (len(x)>=5) and (not self.get_resolution(x)[0]) and (not self.get_image_file(x)[0])
|
182 |
-
return confirm, x
|
183 |
-
|
184 |
-
def reset(self):
|
185 |
-
self.req = [
|
186 |
-
{'value':None, 'description': '请先上传图像(必须是.png格式), 然后再次点击本插件', 'verify_fn': self.get_image_file},
|
187 |
-
{'value':None, 'description': '请输入分辨率,可选:256x256, 512x512 或 1024x1024, 然后再次点击本插件', 'verify_fn': self.get_resolution},
|
188 |
-
{'value':None, 'description': '请输入修改需求,建议您使用英文提示词, 然后再次点击本插件', 'verify_fn': self.get_prompt},
|
189 |
-
]
|
190 |
-
self.info = ""
|
191 |
-
|
192 |
-
def feed(self, prompt, chatbot):
|
193 |
-
for r in self.req:
|
194 |
-
if r['value'] is None:
|
195 |
-
confirm, res = r['verify_fn'](prompt)
|
196 |
-
if confirm:
|
197 |
-
r['value'] = res
|
198 |
-
self.dump_state(chatbot)
|
199 |
-
break
|
200 |
-
return self
|
201 |
-
|
202 |
-
def next_req(self):
|
203 |
-
for r in self.req:
|
204 |
-
if r['value'] is None:
|
205 |
-
return r['description']
|
206 |
-
return "已经收集到所有信息"
|
207 |
-
|
208 |
-
def already_obtained_all_materials(self):
|
209 |
-
return all([x['value'] is not None for x in self.req])
|
210 |
-
|
211 |
-
@CatchException
|
212 |
-
def 图片修改_DALLE2(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
213 |
-
# 尚未完成
|
214 |
-
history = [] # 清空历史
|
215 |
-
state = ImageEditState.get_state(chatbot, ImageEditState)
|
216 |
-
state = state.feed(prompt, chatbot)
|
217 |
-
state.lock_plugin(chatbot)
|
218 |
-
if not state.already_obtained_all_materials():
|
219 |
-
chatbot.append(["图片修改\n\n1. 上传图片(图片中需要修改的位置用橡皮擦擦除为纯白色,即RGB=255,255,255)\n2. 输入分辨率 \n3. 输入修改需求", state.next_req()])
|
220 |
-
yield from update_ui(chatbot=chatbot, history=history)
|
221 |
-
return
|
222 |
-
|
223 |
-
image_path = state.req[0]['value']
|
224 |
-
resolution = state.req[1]['value']
|
225 |
-
prompt = state.req[2]['value']
|
226 |
-
chatbot.append(["图片修改, 执行中", f"图片:`{image_path}`<br/>分辨率:`{resolution}`<br/>修改需求:`{prompt}`"])
|
227 |
-
yield from update_ui(chatbot=chatbot, history=history)
|
228 |
-
image_url, image_path = edit_image(llm_kwargs, prompt, image_path, resolution)
|
229 |
-
chatbot.append([prompt,
|
230 |
-
f'图像中转网址: <br/>`{image_url}`<br/>'+
|
231 |
-
f'中转网址预览: <br/><div align="center"><img src="{image_url}"></div>'
|
232 |
-
f'本地文件地址: <br/>`{image_path}`<br/>'+
|
233 |
-
f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
|
234 |
-
])
|
235 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新
|
236 |
-
state.unlock_plugin(chatbot)
|
237 |
-
|
238 |
-
def make_transparent(input_image_path, output_image_path):
|
239 |
-
from PIL import Image
|
240 |
-
image = Image.open(input_image_path)
|
241 |
-
image = image.convert("RGBA")
|
242 |
-
data = image.getdata()
|
243 |
-
new_data = []
|
244 |
-
for item in data:
|
245 |
-
if item[0] == 255 and item[1] == 255 and item[2] == 255:
|
246 |
-
new_data.append((255, 255, 255, 0))
|
247 |
-
else:
|
248 |
-
new_data.append(item)
|
249 |
-
image.putdata(new_data)
|
250 |
-
image.save(output_image_path, "PNG")
|
251 |
-
|
252 |
-
def resize_image(input_path, output_path, max_size=1024):
|
253 |
-
from PIL import Image
|
254 |
-
with Image.open(input_path) as img:
|
255 |
-
width, height = img.size
|
256 |
-
if width > max_size or height > max_size:
|
257 |
-
if width >= height:
|
258 |
-
new_width = max_size
|
259 |
-
new_height = int((max_size / width) * height)
|
260 |
-
else:
|
261 |
-
new_height = max_size
|
262 |
-
new_width = int((max_size / height) * width)
|
263 |
-
|
264 |
-
resized_img = img.resize(size=(new_width, new_height))
|
265 |
-
resized_img.save(output_path)
|
266 |
-
else:
|
267 |
-
img.save(output_path)
|
268 |
-
|
269 |
-
def make_square_image(input_path, output_path):
|
270 |
-
from PIL import Image
|
271 |
-
with Image.open(input_path) as img:
|
272 |
-
width, height = img.size
|
273 |
-
size = max(width, height)
|
274 |
-
new_img = Image.new("RGBA", (size, size), color="black")
|
275 |
-
new_img.paste(img, ((size - width) // 2, (size - height) // 2))
|
276 |
-
new_img.save(output_path)
|
|
|
1 |
from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log_folder
|
2 |
+
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
3 |
+
import datetime
|
4 |
|
5 |
|
6 |
+
def gen_image(llm_kwargs, prompt, resolution="256x256"):
|
7 |
import requests, json, time, os
|
8 |
+
from request_llm.bridge_all import model_info
|
9 |
|
10 |
+
proxies, = get_conf('proxies')
|
11 |
# Set up OpenAI API key and model
|
12 |
api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
|
13 |
chat_endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
|
|
|
23 |
'prompt': prompt,
|
24 |
'n': 1,
|
25 |
'size': resolution,
|
|
|
26 |
'response_format': 'url'
|
27 |
}
|
|
|
|
|
|
|
|
|
28 |
response = requests.post(url, headers=headers, json=data, proxies=proxies)
|
29 |
print(response.content)
|
30 |
try:
|
|
|
42 |
return image_url, file_path+file_name
|
43 |
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
@CatchException
|
47 |
+
def 图片生成(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
48 |
"""
|
49 |
+
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
50 |
+
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
51 |
+
plugin_kwargs 插件模型的参数,暂时没有用武之地
|
52 |
+
chatbot 聊天显示框的句柄,用于显示给用户
|
53 |
+
history 聊天历史,前情提要
|
54 |
system_prompt 给gpt的静默提醒
|
55 |
+
web_port 当前软件运行的端口号
|
56 |
"""
|
57 |
+
history = [] # 清空历史,以免输入溢出
|
58 |
+
chatbot.append(("这是什么功能?", "[Local Message] 生成图像, 请先把模型切换至gpt-*或者api2d-*。如果中文效果不理想, 请尝试英文Prompt。正在处理中 ....."))
|
59 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
|
|
|
|
|
|
|
|
60 |
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
61 |
+
resolution = plugin_kwargs.get("advanced_arg", '256x256')
|
62 |
image_url, image_path = gen_image(llm_kwargs, prompt, resolution)
|
63 |
chatbot.append([prompt,
|
64 |
f'图像中转网址: <br/>`{image_url}`<br/>'+
|
|
|
66 |
f'本地文件地址: <br/>`{image_path}`<br/>'+
|
67 |
f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
|
68 |
])
|
69 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/多智能体.py
DELETED
@@ -1,101 +0,0 @@
|
|
1 |
-
# 本源代码中, ⭐ = 关键步骤
|
2 |
-
"""
|
3 |
-
测试:
|
4 |
-
- show me the solution of $x^2=cos(x)$, solve this problem with figure, and plot and save image to t.jpg
|
5 |
-
|
6 |
-
"""
|
7 |
-
|
8 |
-
|
9 |
-
from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, ProxyNetworkActivate
|
10 |
-
from toolbox import get_conf, select_api_key, update_ui_lastest_msg, Singleton
|
11 |
-
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_plugin_arg
|
12 |
-
from crazy_functions.crazy_utils import input_clipping, try_install_deps
|
13 |
-
from crazy_functions.agent_fns.persistent import GradioMultiuserManagerForPersistentClasses
|
14 |
-
from crazy_functions.agent_fns.auto_agent import AutoGenMath
|
15 |
-
import time
|
16 |
-
|
17 |
-
def remove_model_prefix(llm):
|
18 |
-
if llm.startswith('api2d-'): llm = llm.replace('api2d-', '')
|
19 |
-
if llm.startswith('azure-'): llm = llm.replace('azure-', '')
|
20 |
-
return llm
|
21 |
-
|
22 |
-
|
23 |
-
@CatchException
|
24 |
-
def 多智能体终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
25 |
-
"""
|
26 |
-
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
27 |
-
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
28 |
-
plugin_kwargs 插件模型的参数
|
29 |
-
chatbot 聊天显示框的句柄,用于显示给用户
|
30 |
-
history 聊天历史,前情提要
|
31 |
-
system_prompt 给gpt的静默提醒
|
32 |
-
user_request 当前用户的请求信息(IP地址等)
|
33 |
-
"""
|
34 |
-
# 检查当前的模型是否符合要求
|
35 |
-
supported_llms = [
|
36 |
-
"gpt-3.5-turbo-16k",
|
37 |
-
'gpt-3.5-turbo-1106',
|
38 |
-
"gpt-4",
|
39 |
-
"gpt-4-32k",
|
40 |
-
'gpt-4-1106-preview',
|
41 |
-
"azure-gpt-3.5-turbo-16k",
|
42 |
-
"azure-gpt-3.5-16k",
|
43 |
-
"azure-gpt-4",
|
44 |
-
"azure-gpt-4-32k",
|
45 |
-
]
|
46 |
-
from request_llms.bridge_all import model_info
|
47 |
-
if model_info[llm_kwargs['llm_model']]["max_token"] < 8000: # 至少是8k上下文的模型
|
48 |
-
chatbot.append([f"处理任务: {txt}", f"当前插件只支持{str(supported_llms)}, 当前模型{llm_kwargs['llm_model']}的最大上下文长度太短, 不能支撑AutoGen运行。"])
|
49 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
50 |
-
return
|
51 |
-
if model_info[llm_kwargs['llm_model']]["endpoint"] is not None: # 如果不是本地模型,加载API_KEY
|
52 |
-
llm_kwargs['api_key'] = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
|
53 |
-
|
54 |
-
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
55 |
-
try:
|
56 |
-
import autogen
|
57 |
-
if get_conf("AUTOGEN_USE_DOCKER"):
|
58 |
-
import docker
|
59 |
-
except:
|
60 |
-
chatbot.append([ f"处理任务: {txt}",
|
61 |
-
f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pyautogen docker```。"])
|
62 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
63 |
-
return
|
64 |
-
|
65 |
-
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
66 |
-
try:
|
67 |
-
import autogen
|
68 |
-
import glob, os, time, subprocess
|
69 |
-
if get_conf("AUTOGEN_USE_DOCKER"):
|
70 |
-
subprocess.Popen(["docker", "--version"])
|
71 |
-
except:
|
72 |
-
chatbot.append([f"处理任务: {txt}", f"缺少docker运行环境!"])
|
73 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
74 |
-
return
|
75 |
-
|
76 |
-
# 解锁插件
|
77 |
-
chatbot.get_cookies()['lock_plugin'] = None
|
78 |
-
persistent_class_multi_user_manager = GradioMultiuserManagerForPersistentClasses()
|
79 |
-
user_uuid = chatbot.get_cookies().get('uuid')
|
80 |
-
persistent_key = f"{user_uuid}->多智能体终端"
|
81 |
-
if persistent_class_multi_user_manager.already_alive(persistent_key):
|
82 |
-
# 当已经存在一个正在运行的多智能体终端时,直接将用户输入传递给它,而不是再次启动一个新的多智能体终端
|
83 |
-
print('[debug] feed new user input')
|
84 |
-
executor = persistent_class_multi_user_manager.get(persistent_key)
|
85 |
-
exit_reason = yield from executor.main_process_ui_control(txt, create_or_resume="resume")
|
86 |
-
else:
|
87 |
-
# 运行多智能体终端 (首次)
|
88 |
-
print('[debug] create new executor instance')
|
89 |
-
history = []
|
90 |
-
chatbot.append(["正在启动: 多智能体终端", "插件动态生成, 执行开始, 作者 Microsoft & Binary-Husky."])
|
91 |
-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
92 |
-
executor = AutoGenMath(llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)
|
93 |
-
persistent_class_multi_user_manager.set(persistent_key, executor)
|
94 |
-
exit_reason = yield from executor.main_process_ui_control(txt, create_or_resume="create")
|
95 |
-
|
96 |
-
if exit_reason == "wait_feedback":
|
97 |
-
# 当用户点击了“等待反馈”按钮时,将executor存储到cookie中,等待用户的再次调用
|
98 |
-
executor.chatbot.get_cookies()['lock_plugin'] = 'crazy_functions.多智能体->多智能体终端'
|
99 |
-
else:
|
100 |
-
executor.chatbot.get_cookies()['lock_plugin'] = None
|
101 |
-
yield from update_ui(chatbot=executor.chatbot, history=executor.history) # 更新状态
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crazy_functions/对话历史存档.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1 |
-
from toolbox import CatchException, update_ui, promote_file_to_downloadzone, get_log_folder
|
|
|
2 |
import re
|
3 |
|
4 |
-
f_prefix = 'GPT-Academic对话存档'
|
5 |
-
|
6 |
def write_chat_to_file(chatbot, history=None, file_name=None):
|
7 |
"""
|
8 |
将对话记录history以Markdown格式写入文件中。如果没有指定文件名,则使用当前时间生成文件名。
|
@@ -10,8 +9,8 @@ def write_chat_to_file(chatbot, history=None, file_name=None):
|
|
10 |
import os
|
11 |
import time
|
12 |
if file_name is None:
|
13 |
-
file_name =
|
14 |
-
fp = os.path.join(get_log_folder(
|
15 |
with open(fp, 'w', encoding='utf8') as f:
|
16 |
from themes.theme import advanced_css
|
17 |
f.write(f'<!DOCTYPE html><head><meta charset="utf-8"><title>对话历史</title><style>{advanced_css}</style></head>')
|
@@ -69,7 +68,7 @@ def read_file_to_chat(chatbot, history, file_name):
|
|
69 |
return chatbot, history
|
70 |
|
71 |
@CatchException
|
72 |
-
def 对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt,
|
73 |
"""
|
74 |
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
75 |
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
@@ -77,11 +76,11 @@ def 对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_
|
|
77 |
chatbot 聊天显示框的句柄,用于显示给用户
|
78 |
history 聊天历史,前情提要
|
79 |
system_prompt 给gpt的静默提醒
|
80 |
-
|
81 |
"""
|
82 |
|
83 |
chatbot.append(("保存当前对话",
|
84 |
-
f"[Local Message] {write_chat_to_file(chatbot, history)}
|
85 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
86 |
|
87 |
def hide_cwd(str):
|
@@ -91,7 +90,7 @@ def hide_cwd(str):
|
|
91 |
return str.replace(current_path, replace_path)
|
92 |
|
93 |
@CatchException
|
94 |
-
def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt,
|
95 |
"""
|
96 |
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
97 |
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
@@ -99,7 +98,7 @@ def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
|
|
99 |
chatbot 聊天显示框的句柄,用于显示给用户
|
100 |
history 聊天历史,前情提要
|
101 |
system_prompt 给gpt的静默提醒
|
102 |
-
|
103 |
"""
|
104 |
from .crazy_utils import get_files_from_everything
|
105 |
success, file_manifest, _ = get_files_from_everything(txt, type='.html')
|
@@ -107,12 +106,7 @@ def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
|
|
107 |
if not success:
|
108 |
if txt == "": txt = '空空如也的输入栏'
|
109 |
import glob
|
110 |
-
local_history = "<br/>".join([
|
111 |
-
"`"+hide_cwd(f)+f" ({gen_file_preview(f)})"+"`"
|
112 |
-
for f in glob.glob(
|
113 |
-
f'{get_log_folder(get_user(chatbot), plugin_name="chat_history")}/**/{f_prefix}*.html',
|
114 |
-
recursive=True
|
115 |
-
)])
|
116 |
chatbot.append([f"正在查找对话历史文件(html格式): {txt}", f"找不到任何html文件: {txt}。但本地存储了以下历史文件,您可以将任意一个文件路径粘贴到输入区,然后重试:<br/>{local_history}"])
|
117 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
118 |
return
|
@@ -126,7 +120,7 @@ def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
|
|
126 |
return
|
127 |
|
128 |
@CatchException
|
129 |
-
def 删除所有本地对话历史记录(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt,
|
130 |
"""
|
131 |
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
132 |
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
@@ -134,16 +128,12 @@ def 删除所有本地对话历史记录(txt, llm_kwargs, plugin_kwargs, chatbot
|
|
134 |
chatbot 聊天显示框的句柄,用于显示给用户
|
135 |
history 聊天历史,前情提要
|
136 |
system_prompt 给gpt的静默提醒
|
137 |
-
|
138 |
"""
|
139 |
|
140 |
import glob, os
|
141 |
-
local_history = "<br/>".join([
|
142 |
-
|
143 |
-
for f in glob.glob(
|
144 |
-
f'{get_log_folder(get_user(chatbot), plugin_name="chat_history")}/**/{f_prefix}*.html', recursive=True
|
145 |
-
)])
|
146 |
-
for f in glob.glob(f'{get_log_folder(get_user(chatbot), plugin_name="chat_history")}/**/{f_prefix}*.html', recursive=True):
|
147 |
os.remove(f)
|
148 |
chatbot.append([f"删除所有历史对话文件", f"已删除<br/>{local_history}"])
|
149 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
|
|
1 |
+
from toolbox import CatchException, update_ui, promote_file_to_downloadzone, get_log_folder
|
2 |
+
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
3 |
import re
|
4 |
|
|
|
|
|
5 |
def write_chat_to_file(chatbot, history=None, file_name=None):
|
6 |
"""
|
7 |
将对话记录history以Markdown格式写入文件中。如果没有指定文件名,则使用当前时间生成文件名。
|
|
|
9 |
import os
|
10 |
import time
|
11 |
if file_name is None:
|
12 |
+
file_name = 'chatGPT对话历史' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.html'
|
13 |
+
fp = os.path.join(get_log_folder(), file_name)
|
14 |
with open(fp, 'w', encoding='utf8') as f:
|
15 |
from themes.theme import advanced_css
|
16 |
f.write(f'<!DOCTYPE html><head><meta charset="utf-8"><title>对话历史</title><style>{advanced_css}</style></head>')
|
|
|
68 |
return chatbot, history
|
69 |
|
70 |
@CatchException
|
71 |
+
def 对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
72 |
"""
|
73 |
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
74 |
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
|
|
76 |
chatbot 聊天显示框的句柄,用于显示给用户
|
77 |
history 聊天历史,前情提要
|
78 |
system_prompt 给gpt的静默提醒
|
79 |
+
web_port 当前软件运行的端口号
|
80 |
"""
|
81 |
|
82 |
chatbot.append(("保存当前对话",
|
83 |
+
f"[Local Message] {write_chat_to_file(chatbot, history)},您可以调用“载入对话历史存档”还原当下的对话。\n警告!被保存的对话历史可以被使用该系统的任何人查阅。"))
|
84 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
85 |
|
86 |
def hide_cwd(str):
|
|
|
90 |
return str.replace(current_path, replace_path)
|
91 |
|
92 |
@CatchException
|
93 |
+
def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
94 |
"""
|
95 |
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
96 |
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
|
|
98 |
chatbot 聊天显示框的句柄,用于显示给用户
|
99 |
history 聊天历史,前情提要
|
100 |
system_prompt 给gpt的静默提醒
|
101 |
+
web_port 当前软件运行的端口号
|
102 |
"""
|
103 |
from .crazy_utils import get_files_from_everything
|
104 |
success, file_manifest, _ = get_files_from_everything(txt, type='.html')
|
|
|
106 |
if not success:
|
107 |
if txt == "": txt = '空空如也的输入栏'
|
108 |
import glob
|
109 |
+
local_history = "<br/>".join(["`"+hide_cwd(f)+f" ({gen_file_preview(f)})"+"`" for f in glob.glob(f'{get_log_folder()}/**/chatGPT对话历史*.html', recursive=True)])
|
|
|
|
|
|
|
|
|
|
|
110 |
chatbot.append([f"正在查找对话历史文件(html格式): {txt}", f"找不到任何html文件: {txt}。但本地存储了以下历史文件,您可以将任意一个文件路径粘贴到输入区,然后重试:<br/>{local_history}"])
|
111 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
112 |
return
|
|
|
120 |
return
|
121 |
|
122 |
@CatchException
|
123 |
+
def 删除所有本地对话历史记录(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
124 |
"""
|
125 |
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
126 |
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
|
|
128 |
chatbot 聊天显示框的句柄,用于显示给用户
|
129 |
history 聊天历史,前情提要
|
130 |
system_prompt 给gpt的静默提醒
|
131 |
+
web_port 当前软件运行的端口号
|
132 |
"""
|
133 |
|
134 |
import glob, os
|
135 |
+
local_history = "<br/>".join(["`"+hide_cwd(f)+"`" for f in glob.glob(f'{get_log_folder()}/**/chatGPT对话历史*.html', recursive=True)])
|
136 |
+
for f in glob.glob(f'{get_log_folder()}/**/chatGPT对话历史*.html', recursive=True):
|
|
|
|
|
|
|
|
|
137 |
os.remove(f)
|
138 |
chatbot.append([f"删除所有历史对话文件", f"已删除<br/>{local_history}"])
|
139 |
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|