This view is limited to 50 files because it contains too many changes.  See the raw diff here.
Files changed (50) hide show
  1. .pre-commit-config.yaml +0 -32
  2. Dockerfile +11 -11
  3. README.md +139 -214
  4. app.py +89 -231
  5. check_proxy.py +17 -24
  6. config.py +27 -140
  7. core_functional.py +44 -121
  8. crazy_functional.py +246 -406
  9. crazy_functions/Langchain知识库.py +2 -2
  10. crazy_functions/Latex全文润色.py +17 -17
  11. crazy_functions/Latex全文翻译.py +12 -12
  12. crazy_functions/Latex输出PDF.py +0 -484
  13. crazy_functions/Latex输出PDF结果.py +14 -20
  14. crazy_functions/agent_fns/auto_agent.py +0 -23
  15. crazy_functions/agent_fns/echo_agent.py +0 -19
  16. crazy_functions/agent_fns/general.py +0 -138
  17. crazy_functions/agent_fns/persistent.py +0 -16
  18. crazy_functions/agent_fns/pipe.py +0 -194
  19. crazy_functions/agent_fns/watchdog.py +0 -28
  20. crazy_functions/chatglm微调工具.py +4 -4
  21. crazy_functions/crazy_utils.py +258 -57
  22. crazy_functions/diagram_fns/file_tree.py +0 -122
  23. crazy_functions/game_fns/game_ascii_art.py +0 -42
  24. crazy_functions/game_fns/game_interactive_story.py +0 -212
  25. crazy_functions/game_fns/game_utils.py +0 -35
  26. crazy_functions/gen_fns/gen_fns_shared.py +0 -70
  27. crazy_functions/ipc_fns/mp.py +0 -37
  28. crazy_functions/latex_fns/latex_actions.py +11 -31
  29. crazy_functions/latex_fns/latex_toolbox.py +115 -345
  30. crazy_functions/live_audio/aliyunASR.py +6 -138
  31. crazy_functions/live_audio/audio_io.py +1 -1
  32. crazy_functions/multi_stage/multi_stage_utils.py +0 -93
  33. crazy_functions/pdf_fns/breakdown_txt.py +0 -125
  34. crazy_functions/pdf_fns/parse_pdf.py +5 -146
  35. crazy_functions/pdf_fns/parse_word.py +0 -85
  36. crazy_functions/pdf_fns/report_gen_html.py +0 -58
  37. crazy_functions/pdf_fns/report_template.html +0 -0
  38. crazy_functions/vector_fns/__init__.py +0 -0
  39. crazy_functions/vector_fns/general_file_loader.py +0 -70
  40. crazy_functions/vector_fns/vector_database.py +0 -338
  41. crazy_functions/vt_fns/vt_call_plugin.py +1 -1
  42. crazy_functions/vt_fns/vt_modify_config.py +3 -3
  43. crazy_functions/下载arxiv论文翻译摘要.py +6 -6
  44. crazy_functions/互动小游戏.py +0 -40
  45. crazy_functions/交互功能函数模板.py +2 -2
  46. crazy_functions/函数动态生成.py +0 -252
  47. crazy_functions/命令行助手.py +2 -2
  48. crazy_functions/图片生成.py +17 -224
  49. crazy_functions/多智能体.py +0 -101
  50. crazy_functions/对话历史存档.py +14 -24
.pre-commit-config.yaml DELETED
@@ -1,32 +0,0 @@
1
- default_language_version:
2
- python: python3
3
- exclude: 'dotnet'
4
- ci:
5
- autofix_prs: true
6
- autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
7
- autoupdate_schedule: 'quarterly'
8
-
9
- repos:
10
- - repo: https://github.com/pre-commit/pre-commit-hooks
11
- rev: v4.4.0
12
- hooks:
13
- - id: check-ast
14
- # - id: check-yaml
15
- - id: check-toml
16
- - id: check-json
17
- - id: check-byte-order-marker
18
- exclude: .gitignore
19
- - id: check-merge-conflict
20
- - id: detect-private-key
21
- - id: trailing-whitespace
22
- - id: end-of-file-fixer
23
- - id: no-commit-to-branch
24
- - repo: https://github.com/psf/black
25
- rev: 23.3.0
26
- hooks:
27
- - id: black
28
- # - repo: https://github.com/charliermarsh/ruff-pre-commit
29
- # rev: v0.0.261
30
- # hooks:
31
- # - id: ruff
32
- # args: ["--fix"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile CHANGED
@@ -1,34 +1,34 @@
1
- # 此Dockerfile适用于“无本地模型”的迷你运行环境构建
2
- # 如果需要使用chatglm等本地模型或者latex运行依赖,请参考 docker-compose.yml
3
- # - 如何构建: 先修改 `config.py`, 然后 `docker build -t gpt-academic . `
4
- # - 如何运行(Linux下): `docker run --rm -it --net=host gpt-academic `
5
- # - 如何运行(其他操作系统,选择任意一个固定端口50923): `docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic `
6
  FROM python:3.11
7
 
8
 
9
- # 非必要步骤,更换pip源 (以下三行,可以删除)
10
  RUN echo '[global]' > /etc/pip.conf && \
11
  echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
12
  echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf
13
 
14
 
15
- # 进入工作路径(必要)
16
  WORKDIR /gpt
17
 
18
 
19
- # 安装大部分依赖,利用Docker缓存加速以后的构建 (以下三行,可以删除)
20
  COPY requirements.txt ./
 
21
  RUN pip3 install -r requirements.txt
22
 
23
 
24
- # 装载项目文件,安装剩余依赖(必要)
25
  COPY . .
26
  RUN pip3 install -r requirements.txt
27
 
28
 
29
- # 非必要步骤,用于预热模块(可以删除)
30
  RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
31
 
32
 
33
- # 启动(必要)
34
  CMD ["python3", "-u", "main.py"]
 
1
+ # 此Dockerfile适用于“无本地模型”的环境构建,如果需要使用chatglm等本地模型或者latex运行依赖,请参考 docker-compose.yml
2
+ # 如何构建: 先修改 `config.py`, 然后 `docker build -t gpt-academic . `
3
+ # 如何运行(Linux下): `docker run --rm -it --net=host gpt-academic `
4
+ # 如何运行(其他操作系统,选择任意一个固定端口50923): `docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic `
 
5
  FROM python:3.11
6
 
7
 
8
+ # 非必要步骤,更换pip源
9
  RUN echo '[global]' > /etc/pip.conf && \
10
  echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
11
  echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf
12
 
13
 
14
+ # 进入工作路径
15
  WORKDIR /gpt
16
 
17
 
18
+ # 安装大部分依赖,利用Docker缓存加速以后的构建
19
  COPY requirements.txt ./
20
+ COPY ./docs/gradio-3.32.2-py3-none-any.whl ./docs/gradio-3.32.2-py3-none-any.whl
21
  RUN pip3 install -r requirements.txt
22
 
23
 
24
+ # 装载项目文件,安装剩余依赖
25
  COPY . .
26
  RUN pip3 install -r requirements.txt
27
 
28
 
29
+ # 非必要步骤,用于预热模块
30
  RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
31
 
32
 
33
+ # 启动
34
  CMD ["python3", "-u", "main.py"]
README.md CHANGED
@@ -11,96 +11,73 @@ pinned: false
11
 
12
  # ChatGPT 学术优化
13
  > **Note**
14
- >
15
- > 2023.11.12: 某些依赖包尚不兼容python 3.12,推荐python 3.11。
16
- >
17
- > 2023.12.26: 安装依赖时,请选择`requirements.txt`中**指定的版本**。 安装命令:`pip install -r requirements.txt`。本项目完全开源免费,您可通过订阅[在线服务](https://github.com/binary-husky/gpt_academic/wiki/online)的方式鼓励本项目的发展。
18
-
19
- <br>
20
-
21
- <div align=center>
22
- <h1 aligh="center">
23
- <img src="docs/logo.png" width="40"> GPT 学术优化 (GPT Academic)
24
- </h1>
25
-
26
- [![Github][Github-image]][Github-url]
27
- [![License][License-image]][License-url]
28
- [![Releases][Releases-image]][Releases-url]
29
- [![Installation][Installation-image]][Installation-url]
30
- [![Wiki][Wiki-image]][Wiki-url]
31
- [![PR][PRs-image]][PRs-url]
32
-
33
- [Github-image]: https://img.shields.io/badge/github-12100E.svg?style=flat-square
34
- [License-image]: https://img.shields.io/github/license/binary-husky/gpt_academic?label=License&style=flat-square&color=orange
35
- [Releases-image]: https://img.shields.io/github/release/binary-husky/gpt_academic?label=Release&style=flat-square&color=blue
36
- [Installation-image]: https://img.shields.io/badge/dynamic/json?color=blue&url=https://raw.githubusercontent.com/binary-husky/gpt_academic/master/version&query=$.version&label=Installation&style=flat-square
37
- [Wiki-image]: https://img.shields.io/badge/wiki-项目文档-black?style=flat-square
38
- [PRs-image]: https://img.shields.io/badge/PRs-welcome-pink?style=flat-square
39
-
40
- [Github-url]: https://github.com/binary-husky/gpt_academic
41
- [License-url]: https://github.com/binary-husky/gpt_academic/blob/master/LICENSE
42
- [Releases-url]: https://github.com/binary-husky/gpt_academic/releases
43
- [Installation-url]: https://github.com/binary-husky/gpt_academic#installation
44
- [Wiki-url]: https://github.com/binary-husky/gpt_academic/wiki
45
- [PRs-url]: https://github.com/binary-husky/gpt_academic/pulls
46
 
47
 
48
- </div>
49
- <br>
50
 
51
- **如果喜欢这个项目,请给它一个Star;如果您发明了好用的快捷键或插件,欢迎发pull requests!**
52
 
53
- If you like this project, please give it a Star.
54
- Read this in [English](docs/README.English.md) | [日本語](docs/README.Japanese.md) | [한국어](docs/README.Korean.md) | [Русский](docs/README.Russian.md) | [Français](docs/README.French.md). All translations have been provided by the project itself. To translate this project to arbitrary language with GPT, read and run [`multi_language.py`](multi_language.py) (experimental).
55
- <br>
56
 
57
- > [!NOTE]
58
- > 1.本项目中每个文件的功能都在[自译解报告](https://github.com/binary-husky/gpt_academic/wiki/GPT‐Academic项目自译解报告)`self_analysis.md`详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题请查阅wiki。
59
- > [![常规安装方法](https://img.shields.io/static/v1?label=&message=常规安装方法&color=gray)](#installation) [![一键安装脚本](https://img.shields.io/static/v1?label=&message=一键安装脚本&color=gray)](https://github.com/binary-husky/gpt_academic/releases) [![配置说明](https://img.shields.io/static/v1?label=&message=配置说明&color=gray)](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明) [![wiki](https://img.shields.io/static/v1?label=&message=wiki&color=gray)]([https://github.com/binary-husky/gpt_academic/wiki/项目配置说明](https://github.com/binary-husky/gpt_academic/wiki))
60
  >
61
- > 2.本项目兼容并鼓励尝试国内中文大语言基座模型如通义千问,智谱GLM等。支持多个api-key共存,可在配置文件中填写如`API_KEY="openai-key1,openai-key2,azure-key3,api2d-key4"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交即可生效。
 
 
 
 
 
62
 
63
- <br><br>
64
 
65
  <div align="center">
66
 
67
  功能(⭐= 近期新增功能) | 描述
68
  --- | ---
69
- ⭐[接入新模型](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E5%88%87%E6%8D%A2%E6%A8%A1%E5%9E%8B) | 百度[千帆](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu)与文心一言, 通义千问[Qwen](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary),上海AI-Lab[书生](https://github.com/InternLM/InternLM),讯飞[星火](https://xinghuo.xfyun.cn/),[LLaMa2](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf),[智谱GLM4](https://open.bigmodel.cn/),DALLE3, [DeepseekCoder](https://coder.deepseek.com/)
70
- ⭐支持mermaid图像渲染 | 支持让GPT生成[流程图](https://www.bilibili.com/video/BV18c41147H9/)、状态转移图、甘特图、饼状图、GitGraph等等(3.7版本)
71
- ⭐Arxiv论文精细翻译 ([Docker](https://github.com/binary-husky/gpt_academic/pkgs/container/gpt_academic_with_latex)) | [插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),目前最好的论文翻译工具
72
- ⭐[实时语音对话输入](https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md) | [插件] 异步[监听音频](https://www.bilibili.com/video/BV1AV4y187Uy/),自动断句,自动寻找回答时机
73
- ⭐AutoGen多智能体插件 | [插件] 借助微软AutoGen,探索多Agent的智能涌现可能!
74
- ⭐虚空终端插件 | [插件] 能够使用自然语言直接调度本项目其他插件
75
- 润色、翻译、代码解释 | 一键润色、翻译、查找论文语法错误、解释代码
76
  [自定义快捷键](https://www.bilibili.com/video/BV14s4y1E7jN) | 支持自定义快捷键
77
- 模块化设计 | 支持自定义强大的[插件](https://github.com/binary-husky/gpt_academic/tree/master/crazy_functions),插件支持[热更新](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)
78
- [程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [插件] 一键剖析Python/C/C++/Java/Lua/...项目树 或 [自我剖析](https://www.bilibili.com/video/BV1cj411A7VW)
79
- 读论文、[翻译](https://www.bilibili.com/video/BV1KT411x7Wn)论文 | [插件] 一键解读latex/pdf论文全文并生成摘要
80
- Latex全文[翻译](https://www.bilibili.com/video/BV1nk4y1Y7Js/)、[润色](https://www.bilibili.com/video/BV1FT411H7c5/) | [插件] 一键翻译或润色latex论文
81
- 批量注释生成 | [插件] 一键批量生成函数注释
82
- Markdown[中英互译](https://www.bilibili.com/video/BV1yo4y157jV/) | [插件] 看到上面5种语言的[README](https://github.com/binary-husky/gpt_academic/blob/master/docs/README_EN.md)了吗?就是出自他的手笔
83
- [PDF论文全文翻译功能](https://www.bilibili.com/video/BV1KT411x7Wn) | [插件] PDF论文提取题目&摘要+翻译全文(多线程)
84
- [Arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [插件] 输入arxiv文章url即可一键翻译摘要+下载PDF
85
- Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼写纠错+输出对照PDF
86
- [谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [插件] 给定任意谷歌学术搜索页面URL,让gpt帮你[写relatedworks](https://www.bilibili.com/video/BV1GP411U7Az/)
87
- 互联网信息聚合+GPT | [插件] 一键[让GPT从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck)回答问题,让信息永不过时
 
 
 
 
88
  公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮
 
89
  启动暗色[主题](https://github.com/binary-husky/gpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题
90
- [多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持 | 同时被GPT3.5、GPT4、[清华ChatGLM2](https://github.com/THUDM/ChatGLM2-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)伺候的感觉一定会很不错吧?
 
91
  更多LLM模型接入,支持[huggingface部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入Newbing接口(新必应),引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama)和[盘古α](https://openi.org.cn/pangu/)
92
  ⭐[void-terminal](https://github.com/binary-husky/void-terminal) pip包 | 脱离GUI,在Python中直接调用本项目的所有函数插件(开发中)
 
93
  更多新功能展示 (图像生成等) …… | 见本文档结尾处 ……
94
  </div>
95
 
96
 
97
  - 新界面(修改`config.py`中的LAYOUT选项即可实现“左右布局”和“上下布局”的切换)
98
  <div align="center">
99
- <img src="https://user-images.githubusercontent.com/96192199/279702205-d81137c3-affd-4cd1-bb5e-b15610389762.gif" width="700" >
100
  </div>
101
 
102
 
103
- - 所有按钮都通过读取functional.py动态生成,可随意加自定义功能,解放剪贴板
104
  <div align="center">
105
  <img src="https://user-images.githubusercontent.com/96192199/231975334-b4788e91-4887-412f-8b43-2b9c5f41d248.gif" width="700" >
106
  </div>
@@ -110,99 +87,66 @@ Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼
110
  <img src="https://user-images.githubusercontent.com/96192199/231980294-f374bdcb-3309-4560-b424-38ef39f04ebd.gif" width="700" >
111
  </div>
112
 
113
- - 如果输出包含公式,会以tex形式和渲染形式同时显示,方便复制和阅读
114
  <div align="center">
115
  <img src="https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png" width="700" >
116
  </div>
117
 
118
- - 懒得看项目代码?直接把整个工程炫ChatGPT嘴里
119
  <div align="center">
120
  <img src="https://user-images.githubusercontent.com/96192199/226935232-6b6a73ce-8900-4aee-93f9-733c7e6fef53.png" width="700" >
121
  </div>
122
 
123
- - 多种大语言模型混合调用(ChatGLM + OpenAI-GPT3.5 + GPT4)
124
  <div align="center">
125
  <img src="https://user-images.githubusercontent.com/96192199/232537274-deca0563-7aa6-4b5d-94a2-b7c453c47794.png" width="700" >
126
  </div>
127
 
128
- <br><br>
129
-
130
  # Installation
131
-
132
- ```mermaid
133
- flowchart TD
134
- A{"安装方法"} --> W1("I. 🔑直接运行 (Windows, Linux or MacOS)")
135
- W1 --> W11["1. Python pip包管理依赖"]
136
- W1 --> W12["2. Anaconda包管理依赖(推荐⭐)"]
137
-
138
- A --> W2["II. 🐳使用Docker (Windows, Linux or MacOS)"]
139
-
140
- W2 --> k1["1. 部署项目全部能力的大镜像(推荐⭐)"]
141
- W2 --> k2["2. 仅在线模型(GPT, GLM4等)镜像"]
142
- W2 --> k3["3. 在线模型 + Latex的大镜像"]
143
-
144
- A --> W4["IV. 🚀其他部署方法"]
145
- W4 --> C1["1. Windows/MacOS 一键安装运行脚本(推荐⭐)"]
146
- W4 --> C2["2. Huggingface, Sealos远程部署"]
147
- W4 --> C4["3. ... 其他 ..."]
148
- ```
149
-
150
- ### 安装方法I:直接运行 (Windows, Linux or MacOS)
151
 
152
  1. 下载项目
 
 
 
 
153
 
154
- ```sh
155
- git clone --depth=1 https://github.com/binary-husky/gpt_academic.git
156
- cd gpt_academic
157
- ```
158
-
159
- 2. 配置API_KEY等变量
160
-
161
- 在`config.py`中,配置API KEY等变量。[特殊网络环境设置方法](https://github.com/binary-husky/gpt_academic/issues/1)、[Wiki-项目配置说明](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)。
162
 
163
- 「 程序会优先检查是否存在名为`config_private.py`的私密配置文件,并用其中的配置覆盖`config.py`的同名配置。如您能理解以上读取逻辑,我们强烈建议您在`config.py`同路径下创建一个名为`config_private.py`的新配置文件,并使用`config_private.py`配置项目,从而确保自动更新时不会丢失配置 」。
164
 
165
- 支持通过`环境变量`配置项目,环境变量的书写格式参考`docker-compose.yml`文件或者我们的[Wiki页面](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)。配置读取优先级: `环境变量` > `config_private.py` > `config.py` 」。
166
 
167
 
168
  3. 安装依赖
169
- ```sh
170
- # (选择I: 如熟悉python, python推荐版本 3.9 ~ 3.11)备注:使用官方pip源或者阿里pip源, 临时换源方法:python -m pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/
171
- python -m pip install -r requirements.txt
172
 
173
- # (选择II: 使用Anaconda)步骤也是类似的 (https://www.bilibili.com/video/BV1rc411W7Dr):
174
- conda create -n gptac_venv python=3.11 # 创建anaconda环境
175
- conda activate gptac_venv # 激活anaconda环境
176
- python -m pip install -r requirements.txt # 这个步骤和pip安装一样的步骤
177
- ```
178
 
179
 
180
  <details><summary>如果需要支持清华ChatGLM2/复旦MOSS/RWKV作为后端,请点击展开此处</summary>
181
  <p>
182
 
183
- 【可选步骤】如果需要支持清华ChatGLM3/复旦MOSS作为后端,需要额外安装更多依赖(前提条件:熟悉Python + 用过Pytorch + 电脑配置够强):
184
-
185
  ```sh
186
- # 【可选步骤I】支持清华ChatGLM3。清华ChatGLM备注:如果遇到"Call ChatGLM fail 不能正常加载ChatGLM的参数" 错误,参考如下: 1:以上默认安装的为torch+cpu版,使用cuda需要卸载torch重新安装torch+cuda; 2:如因本机配置不够无法加载模型,可以修改request_llm/bridge_chatglm.py中的模型精度, 将 AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) 都修改为 AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
187
- python -m pip install -r request_llms/requirements_chatglm.txt
188
 
189
  # 【可选步骤II】支持复旦MOSS
190
- python -m pip install -r request_llms/requirements_moss.txt
191
- git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llms/moss # 注意执行此行代码时,必须处于项目根路径
192
 
193
  # 【可选步骤III】支持RWKV Runner
194
  参考wiki:https://github.com/binary-husky/gpt_academic/wiki/%E9%80%82%E9%85%8DRWKV-Runner
195
 
196
  # 【可选步骤IV】确保config.py配置文件的AVAIL_LLM_MODELS包含了期望的模型,目前支持的全部模型如下(jittorllms系列目前仅支持docker方案):
197
- AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss"] # + ["jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
198
-
199
- # 【可选步骤V】支持本地模型INT8,INT4量化(这里所指的模型本身不是量化版本,目前deepseek-coder支持,后面测试后会加入更多模型量化选择)
200
- pip install bitsandbyte
201
- # windows用户安装bitsandbytes需要使用下面bitsandbytes-windows-webui
202
- python -m pip install bitsandbytes --prefer-binary --extra-index-url=https://jllllll.github.io/bitsandbytes-windows-webui
203
- pip install -U git+https://github.com/huggingface/transformers.git
204
- pip install -U git+https://github.com/huggingface/accelerate.git
205
- pip install peft
206
  ```
207
 
208
  </p>
@@ -211,86 +155,102 @@ pip install peft
211
 
212
 
213
  4. 运行
214
- ```sh
215
- python main.py
216
- ```
217
 
218
  ### 安装方法II:使用Docker
219
 
220
- 0. 部署项目的全部能力(这个是包含cuda和latex的大型镜像。但如果您网速慢、硬盘小,则不推荐该方法部署完整项目)
221
- [![fullcapacity](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-all-capacity.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-all-capacity.yml)
222
 
223
- ``` sh
224
- # 修改docker-compose.yml,保留方案0并删除其他方案。然后运行:
225
- docker-compose up
226
- ```
227
-
228
- 1. 仅ChatGPT + GLM4 + 文心一言+spark等在线模型(推荐大多数人选择)
229
  [![basic](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml)
230
  [![basiclatex](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml)
231
  [![basicaudio](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml)
232
 
233
- ``` sh
234
- # 修改docker-compose.yml,保留方案1并删除其他方案。然后运行:
235
- docker-compose up
236
- ```
237
 
238
- P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以直接使用方案4或者方案0获取Latex功能。
 
 
 
 
239
 
240
- 2. ChatGPT + GLM3 + MOSS + LLAMA2 + 通义千问(需要熟悉[Nvidia Docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian)运行时)
 
 
 
 
 
 
 
241
  [![chatglm](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml)
242
 
243
- ``` sh
244
- # 修改docker-compose.yml,保留方案2并删除其他方案。然后运行:
245
- docker-compose up
246
- ```
 
 
 
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
249
- ### 安装方法III:其他部署方法
250
- 1. **Windows一键运行脚本**。
251
- 完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本。脚本贡献来源:[oobabooga](https://github.com/oobabooga/one-click-installers)。
252
 
253
- 2. 使用第三方API、Azure等、文心一言、星火等,见[Wiki页面](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)
 
254
 
255
- 3. 云服务器远程部署避坑指南。
256
- 请访问[云服务器远程部署wiki](https://github.com/binary-husky/gpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97)
257
 
258
- 4. 在其他平台部署&二级网址部署
259
- - 使用Sealos[一键部署](https://github.com/binary-husky/gpt_academic/issues/993)。
260
- - 使用WSL2(Windows Subsystem for Linux 子系统)。请访问[部署wiki-2](https://github.com/binary-husky/gpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2)
261
- - 如何在二级网址(如`http://localhost/subpath`)下运行。请访问[FastAPI运行说明](docs/WithFastapi.md)
 
 
 
262
 
263
- <br><br>
264
 
265
  # Advanced Usage
266
  ### I:自定义新的便捷按钮(学术快捷键)
267
-
268
- 任意文本编辑器打开`core_functional.py`,添加如下条目,然后重启程序。(如果按钮已存在,那么可以直接修改(前缀、后缀都已支持热修改),无需重启程序即可生效。)
269
  例如
270
-
271
- ```python
272
  "超级英译中": {
273
  # 前缀,会被加在你的输入之前。例如,用来描述你的要求,例如翻译、解释代码、润色等等
274
- "Prefix": "请翻译把下面一段内容成中文,然后用一个markdown表格逐一解释文中出现的专有名词:\n\n",
275
-
276
  # 后缀,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来。
277
  "Suffix": "",
278
  },
279
  ```
280
-
281
  <div align="center">
282
  <img src="https://user-images.githubusercontent.com/96192199/226899272-477c2134-ed71-4326-810c-29891fe4a508.png" width="500" >
283
  </div>
284
 
285
  ### II:自定义函数插件
 
286
  编写强大的函数插件来执行任何你想得到的和想不到的任务。
287
  本项目的插件编写、调试难度很低,只要您具备一定的python基础知识,就可以仿照我们提供的模板实现自己的插件功能。
288
  详情请参考[函数插件指南](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)。
289
 
290
- <br><br>
291
 
292
- # Updates
293
- ### I:动态
294
 
295
  1. 对话保存功能。在函数插件区调用 `保存当前的对话` 即可将当前对话保存为可读+可复原的html文件,
296
  另外在函数插件区(下拉菜单)调用 `载入对话历史存档` ,即可还原之前的会话。
@@ -331,23 +291,28 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h
331
  <img src="https://user-images.githubusercontent.com/96192199/236432361-67739153-73e8-43fe-8111-b61296edabd9.png" width="500" >
332
  </div>
333
 
334
- 7. OpenAI图像生成
 
 
 
 
 
335
  <div align="center">
336
  <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/bc7ab234-ad90-48a0-8d62-f703d9e74665" width="500" >
337
  </div>
338
 
339
- 8. 基于mermaid的流图、脑图绘制
340
  <div align="center">
341
- <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/c518b82f-bd53-46e2-baf5-ad1b081c1da4" width="500" >
342
  </div>
343
 
344
- 9. Latex全文校对纠错
345
  <div align="center">
346
  <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/651ccd98-02c9-4464-91e1-77a6b7d1b033" height="200" > ===>
347
  <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/476f66d9-7716-4537-b5c1-735372c25adb" height="200">
348
  </div>
349
 
350
- 10. 语言、主题切换
351
  <div align="center">
352
  <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/b6799499-b6fb-4f0c-9c8e-1b441872f4e8" width="500" >
353
  </div>
@@ -355,14 +320,7 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h
355
 
356
 
357
  ### II:版本:
358
- - version 3.80(TODO): 优化AutoGen插件主题并设计一系列衍生插件
359
- - version 3.70: 引入Mermaid绘图,实现GPT画脑图等功能
360
- - version 3.60: 引入AutoGen作为新一代插件的基石
361
- - version 3.57: 支持GLM3,星火v3,文心一言v4,修复本地模型的并发BUG
362
- - version 3.56: 支持动态追加基础功能按钮,新汇报PDF汇总页面
363
- - version 3.55: 重构前端界面,引入悬浮窗口与菜单栏
364
- - version 3.54: 新增动态代码解释器(Code Interpreter)(待完善)
365
- - version 3.53: 支持动态选择不同界面主题,提高稳定性&解决多用户冲突问题
366
  - version 3.50: 使用自然语言调用本项目的所有函数插件(虚空终端),支持插件分类,改进UI,设计新主题
367
  - version 3.49: 支持百度千帆平台和文心一言
368
  - version 3.48: 支持阿里达摩院通义千问,上海AI-Lab书生,讯飞星火
@@ -376,58 +334,25 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h
376
  - version 3.0: 对chatglm���其他小型llm的支持
377
  - version 2.6: 重构了插件结构,提高了交互性,加入更多插件
378
  - version 2.5: 自更新,解决总结大工程源代码时文本过长、token溢出的问题
379
- - version 2.4: 新增PDF全文翻译功能; 新增输入区切换位置的功能
380
  - version 2.3: 增强多线程交互性
381
  - version 2.2: 函数插件支持热重载
382
  - version 2.1: 可折叠式布局
383
  - version 2.0: 引入模块化函数插件
384
  - version 1.0: 基础功能
385
 
386
- GPT Academic开发者QQ群:`610599535`
387
 
388
  - 已知问题
389
  - 某些浏览器翻译插件干扰此软件前端的运行
390
- - 官方Gradio目前有很多兼容性问题,请**务必使用`requirement.txt`安装Gradio**
391
-
392
- ```mermaid
393
- timeline LR
394
- title GPT-Academic项目发展历程
395
- section 2.x
396
- 1.0~2.2: 基础功能: 引入模块化函数插件: 可折叠式布局: 函数插件支持热重载
397
- 2.3~2.5: 增强多线程交互性: 新增PDF全文翻译功能: 新增输入区切换位置的功能: 自更新
398
- 2.6: 重构了插件结构: 提高了交互性: 加入更多插件
399
- section 3.x
400
- 3.0~3.1: 对chatglm支持: 对其他小型llm支持: 支持同时问询多个gpt模型: 支持多个apikey负载均衡
401
- 3.2~3.3: 函数插件支持更多参数接口: 保存对话功能: 解读任意语言代码: 同时询问任意的LLM组合: 互联网信息综合功能
402
- 3.4: 加入arxiv论文翻译: 加入latex论文批改功能
403
- 3.44: 正式支持Azure: 优化界面易用性
404
- 3.46: 自定义ChatGLM2微调模型: 实时语音对话
405
- 3.49: 支持阿里达摩院通义千问: 上海AI-Lab书生: 讯飞星火: 支持百度千帆平台 & 文心一言
406
- 3.50: 虚空终端: 支持插件分类: 改进UI: 设计新主题
407
- 3.53: 动态选择不同界面主题: 提高稳定性: 解决多用户冲突问题
408
- 3.55: 动态代码解释器: 重构前端界面: 引入悬浮窗口与菜单栏
409
- 3.56: 动态追加基础功能按钮: 新汇报PDF汇总页面
410
- 3.57: GLM3, 星火v3: 支持文心一言v4: 修复本地模型的并发BUG
411
- 3.60: 引入AutoGen
412
- 3.70: 引入Mermaid绘图: 实现GPT画脑图等功能
413
- 3.80(TODO): 优化AutoGen插件主题: 设计衍生插件
414
-
415
- ```
416
-
417
 
418
  ### III:主题
419
  可以通过修改`THEME`选项(config.py)变更主题
420
  1. `Chuanhu-Small-and-Beautiful` [网址](https://github.com/GaiZhenbiao/ChuanhuChatGPT/)
421
 
422
 
423
- ### IV:本项目的开发分支
424
-
425
- 1. `master` 分支: 主分支,稳定版
426
- 2. `frontier` 分支: 开发分支,测试版
427
- 3. 如何[接入其他大模型](request_llms/README.md)
428
- 4. 访问GPT-Academic的[在线服务并支持我们](https://github.com/binary-husky/gpt_academic/wiki/online)
429
-
430
- ### V:参考与学习
431
 
432
  ```
433
  代码中参考了很多其他优秀项目中的设计,顺序不分先后:
 
11
 
12
  # ChatGPT 学术优化
13
  > **Note**
14
+ >
15
+ > 2023.7.8: Gradio, Pydantic依赖调整,已修改 `requirements.txt`。请及时**更新代码**,安装依赖时,请严格选择`requirements.txt`中**指定的版本**
16
+ >
17
+ > `pip install -r requirements.txt`
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
+ # <div align=center><img src="docs/logo.png" width="40"> GPT 学术优化 (GPT Academic)</div>
 
21
 
22
+ **如果喜欢这个项目,请给它一个Star;如果您发明了好用的快捷键或函数插件,欢迎发pull requests!**
23
 
24
+ If you like this project, please give it a Star. If you've come up with more useful academic shortcuts or functional plugins, feel free to open an issue or pull request. We also have a README in [English|](docs/README_EN.md)[日本語|](docs/README_JP.md)[한국어|](https://github.com/mldljyh/ko_gpt_academic)[Русский|](docs/README_RS.md)[Français](docs/README_FR.md) translated by this project itself.
25
+ To translate this project to arbitrary language with GPT, read and run [`multi_language.py`](multi_language.py) (experimental).
 
26
 
27
+ > **Note**
 
 
28
  >
29
+ > 1.请注意只有 **高亮** 标识的函数插件(按钮)才支持读取文件,部分插件位于插件区的**下拉菜单**中。另外我们以**最高优先级**欢迎和处理任何新插件的PR。
30
+ >
31
+ > 2.本项目中每个文件的功能都在[自译解报告`self_analysis.md`](https://github.com/binary-husky/gpt_academic/wiki/GPT‐Academic项目自译解报告)详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题[`wiki`](https://github.com/binary-husky/gpt_academic/wiki)。[安装方法](#installation) | [配置说明](https://github.com/binary-husky/gpt_academic/wiki/%E9%A1%B9%E7%9B%AE%E9%85%8D%E7%BD%AE%E8%AF%B4%E6%98%8E)。
32
+ >
33
+ > 3.本项目兼容并鼓励尝试国产大语言模型ChatGLM和Moss等等。支持多个api-key共存,可在配置文件中填写如`API_KEY="openai-key1,openai-key2,azure-key3,api2d-key4"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交后即可生效。
34
+
35
 
36
+
37
 
38
  <div align="center">
39
 
40
  功能(⭐= 近期新增功能) | 描述
41
  --- | ---
42
+ ⭐[接入新模型](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E5%88%87%E6%8D%A2%E6%A8%A1%E5%9E%8B) | 百度[千帆](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu)与文心一言, [通义千问](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary),上海AI-Lab[书生](https://github.com/InternLM/InternLM),讯飞[星火](https://xinghuo.xfyun.cn/),[LLaMa2](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf)
43
+ 一键润色 | 支持一键润色、一键查找论文语法错误
44
+ 一键中英互译 | 一键中英互译
45
+ 一键代码解释 | 显示代码、解释代码、生成代码、给代码加注释
 
 
 
46
  [自定义快捷键](https://www.bilibili.com/video/BV14s4y1E7jN) | 支持自定义快捷键
47
+ 模块化设计 | 支持自定义强大的[函数插件](https://github.com/binary-husky/gpt_academic/tree/master/crazy_functions),插件支持[热更新](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)
48
+ [自我程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] [一键读懂](https://github.com/binary-husky/gpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)本项目的源代码
49
+ [程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] 一键可以剖析其他Python/C/C++/Java/Lua/...项目树
50
+ 读论文、[翻译](https://www.bilibili.com/video/BV1KT411x7Wn)论文 | [函数插件] 一键解读latex/pdf论文全文并生成摘要
51
+ Latex全文[翻译](https://www.bilibili.com/video/BV1nk4y1Y7Js/)、[润色](https://www.bilibili.com/video/BV1FT411H7c5/) | [函数插件] 一键翻译或润色latex论文
52
+ 批量注释生成 | [函数插件] 一键批量生成函数注释
53
+ Markdown[中英互译](https://www.bilibili.com/video/BV1yo4y157jV/) | [函数插件] 看到上面5种语言的[README](https://github.com/binary-husky/gpt_academic/blob/master/docs/README_EN.md)了吗?
54
+ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报
55
+ [PDF论文全文翻译功能](https://www.bilibili.com/video/BV1KT411x7Wn) | [函数插件] PDF论文提取题目&摘要+翻译全文(多线程)
56
+ [Arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [函数插件] 输入arxiv文章url即可一键翻译摘要+下载PDF
57
+ Latex论文一键校对 | [函数插件] 仿Grammarly对Latex文章进行语法、拼写纠错+输出对照PDF
58
+ [谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [函数插件] 给定任意谷歌学术搜索页面URL,让gpt帮你[写relatedworks](https://www.bilibili.com/video/BV1GP411U7Az/)
59
+ 互联网信息聚合+GPT | [函数插件] 一键[让GPT从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck)回答问题,让信息永不过时
60
+ ⭐Arxiv论文精细翻译 ([Docker](https://github.com/binary-husky/gpt_academic/pkgs/container/gpt_academic_with_latex)) | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),目前最好的论文翻译工具
61
+ ⭐[实时语音对话输入](https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md) | [函数插件] 异步[监听音频](https://www.bilibili.com/video/BV1AV4y187Uy/),自动断句,自动寻找回答时机
62
  公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮
63
+ 多线程函数插件支持 | 支持多线调用chatgpt,一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序
64
  启动暗色[主题](https://github.com/binary-husky/gpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题
65
+ [多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持 | 同时被GPT3.5、GPT4、[清华ChatGLM2](https://github.com/THUDM/ChatGLM2-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)同时伺候的感觉一定会很不错吧?
66
+ ⭐ChatGLM2微调模型 | 支持加载ChatGLM2微调模型,提供ChatGLM2微调辅助插件
67
  更多LLM模型接入,支持[huggingface部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入Newbing接口(新必应),引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama)和[盘古α](https://openi.org.cn/pangu/)
68
  ⭐[void-terminal](https://github.com/binary-husky/void-terminal) pip包 | 脱离GUI,在Python中直接调用本项目的所有函数插件(开发中)
69
+ ⭐虚空终端插件 | [函数插件] 用自然语言,直接调度本项目其他插件
70
  更多新功能展示 (图像生成等) …… | 见本文档结尾处 ……
71
  </div>
72
 
73
 
74
  - 新界面(修改`config.py`中的LAYOUT选项即可实现“左右布局”和“上下布局”的切换)
75
  <div align="center">
76
+ <img src="https://user-images.githubusercontent.com/96192199/230361456-61078362-a966-4eb5-b49e-3c62ef18b860.gif" width="700" >
77
  </div>
78
 
79
 
80
+ - 所有按钮都通过读取functional.py动态生成,可随意加自定义功能,解放粘贴板
81
  <div align="center">
82
  <img src="https://user-images.githubusercontent.com/96192199/231975334-b4788e91-4887-412f-8b43-2b9c5f41d248.gif" width="700" >
83
  </div>
 
87
  <img src="https://user-images.githubusercontent.com/96192199/231980294-f374bdcb-3309-4560-b424-38ef39f04ebd.gif" width="700" >
88
  </div>
89
 
90
+ - 如果输出包含公式,会同时以tex形式和渲染形式显示,方便复制和阅读
91
  <div align="center">
92
  <img src="https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png" width="700" >
93
  </div>
94
 
95
+ - 懒得看项目代码?整个工程直接给chatgpt炫嘴里
96
  <div align="center">
97
  <img src="https://user-images.githubusercontent.com/96192199/226935232-6b6a73ce-8900-4aee-93f9-733c7e6fef53.png" width="700" >
98
  </div>
99
 
100
+ - 多种大语言模型混合调用(ChatGLM + OpenAI-GPT3.5 + [API2D](https://api2d.com/)-GPT4)
101
  <div align="center">
102
  <img src="https://user-images.githubusercontent.com/96192199/232537274-deca0563-7aa6-4b5d-94a2-b7c453c47794.png" width="700" >
103
  </div>
104
 
 
 
105
  # Installation
106
+ ### 安装方法I:直接运行 (Windows, Linux or MacOS)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  1. 下载项目
109
+ ```sh
110
+ git clone --depth=1 https://github.com/binary-husky/gpt_academic.git
111
+ cd gpt_academic
112
+ ```
113
 
114
+ 2. 配置API_KEY
 
 
 
 
 
 
 
115
 
116
+ 在`config.py`中,配置API KEY等设置,[点击查看特殊网络环境设置方法](https://github.com/binary-husky/gpt_academic/issues/1)
117
 
118
+ (P.S. 程序运行时会优先检查是否存在名为`config_private.py`的私密配置文件,并用其中的配置覆盖`config.py`的同名配置。因此,如果您能理解我们的配置读取逻辑,我们强烈建议您在`config.py`旁边创建一个名为`config_private.py`的新配置文件,并把`config.py`中的配置转移(复制)到`config_private.py`中(仅复制您修改过的配置条目即可)。`config_private.py`不受git管控,可以让您的隐私信息更加安全。P.S.项目同样支持通过`环境变量`配置大多数选项,环境变量的书写格式参考`docker-compose`文件。读取优先级: `环境变量` > `config_private.py` > `config.py`)
119
 
120
 
121
  3. 安装依赖
122
+ ```sh
123
+ # (选择I: 如熟悉python)(python版本3.9以上,越新越好),备注:使用官方pip源或者阿里pip源,临时换源方法:python -m pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/
124
+ python -m pip install -r requirements.txt
125
 
126
+ # (选择II: 如不熟悉python)使用anaconda,步骤也是类似的 (https://www.bilibili.com/video/BV1rc411W7Dr):
127
+ conda create -n gptac_venv python=3.11 # 创建anaconda环境
128
+ conda activate gptac_venv # 激活anaconda环境
129
+ python -m pip install -r requirements.txt # 这个步骤和pip安装一样的步骤
130
+ ```
131
 
132
 
133
  <details><summary>如果需要支持清华ChatGLM2/复旦MOSS/RWKV作为后端,请点击展开此处</summary>
134
  <p>
135
 
136
+ 【可选步骤】如果需要支持清华ChatGLM2/复旦MOSS作为后端,需要额外安装更多依赖(前提条件:熟悉Python + 用过Pytorch + 电脑配置够强):
 
137
  ```sh
138
+ # 【可选步骤I】支持清华ChatGLM2。清华ChatGLM备注:如果遇到"Call ChatGLM fail 不能正常加载ChatGLM的参数" 错误,参考如下: 1:以上默认安装的为torch+cpu版,使用cuda需要卸载torch重新安装torch+cuda; 2:如因本机配置不够无法加载模型,可以修改request_llm/bridge_chatglm.py中的模型精度, 将 AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) 都修改为 AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
139
+ python -m pip install -r request_llm/requirements_chatglm.txt
140
 
141
  # 【可选步骤II】支持复旦MOSS
142
+ python -m pip install -r request_llm/requirements_moss.txt
143
+ git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llm/moss # 注意执行此行代码时,必须处于项目根路径
144
 
145
  # 【可选步骤III】支持RWKV Runner
146
  参考wiki:https://github.com/binary-husky/gpt_academic/wiki/%E9%80%82%E9%85%8DRWKV-Runner
147
 
148
  # 【可选步骤IV】确保config.py配置文件的AVAIL_LLM_MODELS包含了期望的模型,目前支持的全部模型如下(jittorllms系列目前仅支持docker方案):
149
+ AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "newbing", "moss"] # + ["jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
 
 
 
 
 
 
 
 
150
  ```
151
 
152
  </p>
 
155
 
156
 
157
  4. 运行
158
+ ```sh
159
+ python main.py
160
+ ```
161
 
162
  ### 安装方法II:使用Docker
163
 
164
+ [![fullcapacity](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-all-capacity.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml)
 
165
 
166
+ 1. 仅ChatGPT(推荐大多数人选择,等价于docker-compose方案1)
 
 
 
 
 
167
  [![basic](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml)
168
  [![basiclatex](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml)
169
  [![basicaudio](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml)
170
 
 
 
 
 
171
 
172
+ ``` sh
173
+ git clone --depth=1 https://github.com/binary-husky/gpt_academic.git # 下载项目
174
+ cd gpt_academic # 进入路径
175
+ nano config.py # 用任意文本编辑器编辑config.py, 配置 “Proxy”, “API_KEY” 以及 “WEB_PORT” (例如50923) 等
176
+ docker build -t gpt-academic . # 安装
177
 
178
+ #(最后一步-Linux操作系统)用`--net=host`更方便快捷
179
+ docker run --rm -it --net=host gpt-academic
180
+ #(最后一步-MacOS/Windows操作系统)只能用-p选项将容器上的端口(例如50923)暴露给主机上的端口
181
+ docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic
182
+ ```
183
+ P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以直接使用docker-compose获取Latex功能(修改docker-compose.yml,保留方案4并删除其他方案)。
184
+
185
+ 2. ChatGPT + ChatGLM2 + MOSS + LLAMA2 + 通义千问(需要熟悉[Nvidia Docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian)运行时)
186
  [![chatglm](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml)
187
 
188
+ ``` sh
189
+ # 修改docker-compose.yml,保留方案2并删除其他方案。修改docker-compose.yml中方案2的配置,参考其中注释即可
190
+ docker-compose up
191
+ ```
192
+
193
+ 3. ChatGPT + LLAMA + 盘古 + RWKV(需要熟悉[Nvidia Docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian)运行时)
194
+ [![jittorllms](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-jittorllms.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-jittorllms.yml)
195
 
196
+ ``` sh
197
+ # 修改docker-compose.yml,保留方案3并删除其他方案。修改docker-compose.yml中方案3的配置,参考其中注释即可
198
+ docker-compose up
199
+ ```
200
+
201
+
202
+ ### 安装方法III:其他部署姿势
203
+ 1. 一键运行脚本。
204
+ 完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本。
205
+ 脚本的贡献来源是[oobabooga](https://github.com/oobabooga/one-click-installers)。
206
+
207
+ 2. 使用docker-compose运行。
208
+ 请阅读docker-compose.yml后,按照其中的提示操作即可
209
 
210
+ 3. 如何使用反代URL
211
+ 按照`config.py`中的说明配置API_URL_REDIRECT即可。
 
212
 
213
+ 4. 微软云AzureAPI
214
+ 按照`config.py`中的说明配置即可(AZURE_ENDPOINT等四个配置)
215
 
216
+ 5. 远程云服务器部署(需要云服务器知识与经验)。
217
+ 请访问[部署wiki-1](https://github.com/binary-husky/gpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97)
218
 
219
+ 6. 使用Sealos[一键部署](https://github.com/binary-husky/gpt_academic/issues/993)。
220
+
221
+ 7. 使用WSL2(Windows Subsystem for Linux 子系统)。
222
+ 请访问[部署wiki-2](https://github.com/binary-husky/gpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2)
223
+
224
+ 8. 如何在二级网址(如`http://localhost/subpath`)下运行。
225
+ 请访问[FastAPI运行说明](docs/WithFastapi.md)
226
 
 
227
 
228
  # Advanced Usage
229
  ### I:自定义新的便捷按钮(学术快捷键)
230
+ 任意文本编辑器打开`core_functional.py`,添加条目如下,然后重启程序即可。(如果按钮已经添加成功并可见,那么前缀、后缀都支持热修改,无需重启程序即可生效。)
 
231
  例如
232
+ ```
 
233
  "超级英译中": {
234
  # 前缀,会被加在你的输入之前。例如,用来描述你的要求,例如翻译、解释代码、润色等等
235
+ "Prefix": "请翻译把下面一段内容成中文,然后用一个markdown表格逐一解释文中出现的专有名词:\n\n",
236
+
237
  # 后缀,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来。
238
  "Suffix": "",
239
  },
240
  ```
 
241
  <div align="center">
242
  <img src="https://user-images.githubusercontent.com/96192199/226899272-477c2134-ed71-4326-810c-29891fe4a508.png" width="500" >
243
  </div>
244
 
245
  ### II:自定义函数插件
246
+
247
  编写强大的函数插件来执行任何你想得到的和想不到的任务。
248
  本项目的插件编写、调试难度很低,只要您具备一定的python基础知识,就可以仿照我们提供的模板实现自己的插件功能。
249
  详情请参考[函数插件指南](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)。
250
 
 
251
 
252
+ # Latest Update
253
+ ### I:新功能动态
254
 
255
  1. 对话保存功能。在函数插件区调用 `保存当前的对话` 即可将当前对话保存为可读+可复原的html文件,
256
  另外在函数插件区(下拉菜单)调用 `载入对话历史存档` ,即可还原之前的会话。
 
291
  <img src="https://user-images.githubusercontent.com/96192199/236432361-67739153-73e8-43fe-8111-b61296edabd9.png" width="500" >
292
  </div>
293
 
294
+ 7. 新增MOSS大语言模型支持
295
+ <div align="center">
296
+ <img src="https://user-images.githubusercontent.com/96192199/236639178-92836f37-13af-4fdd-984d-b4450fe30336.png" width="500" >
297
+ </div>
298
+
299
+ 8. OpenAI图像生成
300
  <div align="center">
301
  <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/bc7ab234-ad90-48a0-8d62-f703d9e74665" width="500" >
302
  </div>
303
 
304
+ 9. OpenAI音频解析与总结
305
  <div align="center">
306
+ <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/709ccf95-3aee-498a-934a-e1c22d3d5d5b" width="500" >
307
  </div>
308
 
309
+ 10. Latex全文校对纠错
310
  <div align="center">
311
  <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/651ccd98-02c9-4464-91e1-77a6b7d1b033" height="200" > ===>
312
  <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/476f66d9-7716-4537-b5c1-735372c25adb" height="200">
313
  </div>
314
 
315
+ 11. 语言、主题切换
316
  <div align="center">
317
  <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/b6799499-b6fb-4f0c-9c8e-1b441872f4e8" width="500" >
318
  </div>
 
320
 
321
 
322
  ### II:版本:
323
+ - version 3.60(todo): 优化虚空终端,引入code interpreter和更多插件
 
 
 
 
 
 
 
324
  - version 3.50: 使用自然语言调用本项目的所有函数插件(虚空终端),支持插件分类,改进UI,设计新主题
325
  - version 3.49: 支持百度千帆平台和文心一言
326
  - version 3.48: 支持阿里达摩院通义千问,上海AI-Lab书生,讯飞星火
 
334
  - version 3.0: 对chatglm���其他小型llm的支持
335
  - version 2.6: 重构了插件结构,提高了交互性,加入更多插件
336
  - version 2.5: 自更新,解决总结大工程源代码时文本过长、token溢出的问题
337
+ - version 2.4: (1)新增PDF全文翻译功能; (2)新增输入区切换位置的功能; (3)新增垂直布局选项; (4)多线程函数插件优化。
338
  - version 2.3: 增强多线程交互性
339
  - version 2.2: 函数插件支持热重载
340
  - version 2.1: 可折叠式布局
341
  - version 2.0: 引入模块化函数插件
342
  - version 1.0: 基础功能
343
 
344
+ gpt_academic开发者QQ群-2:610599535
345
 
346
  - 已知问题
347
  - 某些浏览器翻译插件干扰此软件前端的运行
348
+ - 官方Gradio目前有很多兼容性Bug,请务必使用`requirement.txt`安装Gradio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
 
350
  ### III:主题
351
  可以通过修改`THEME`选项(config.py)变更主题
352
  1. `Chuanhu-Small-and-Beautiful` [网址](https://github.com/GaiZhenbiao/ChuanhuChatGPT/)
353
 
354
 
355
+ ### IV:参考与学习
 
 
 
 
 
 
 
356
 
357
  ```
358
  代码中参考了很多其他优秀项目中的设计,顺序不分先后:
app.py CHANGED
@@ -1,40 +1,24 @@
1
  import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
2
 
3
- help_menu_description = \
4
- """Github源代码开源和更新[地址🚀](https://github.com/binary-husky/gpt_academic),
5
- 感谢热情的[开发者们❤️](https://github.com/binary-husky/gpt_academic/graphs/contributors).
6
- </br></br>常见问题请查阅[项目Wiki](https://github.com/binary-husky/gpt_academic/wiki),
7
- 如遇到Bug请前往[Bug反馈](https://github.com/binary-husky/gpt_academic/issues).
8
- </br></br>普通对话使用说明: 1. 输入问题; 2. 点击提交
9
- </br></br>基础功能区使用说明: 1. 输入文本; 2. 点击任意基础功能区按钮
10
- </br></br>函数插件区使用说明: 1. 输入路径/问题, 或者上传文件; 2. 点击任意函数插件区按钮
11
- </br></br>虚空终端使用说明: 点击虚空终端, 然后根据提示输入指令, 再次点击虚空终端
12
- </br></br>如何保存对话: 点击保存当前的对话按钮
13
- </br></br>如何语音对话: 请阅读Wiki
14
- </br></br>如何临时更换API_KEY: 在输入区输入临时API_KEY后提交(网页刷新后失效)"""
15
-
16
  def main():
17
  import subprocess, sys
18
- subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'https://public.agent-matrix.com/publish/gradio-3.32.8-py3-none-any.whl'])
19
  import gradio as gr
20
- if gr.__version__ not in ['3.32.8']:
21
- raise ModuleNotFoundError("使用项目内置Gradio获取最优体验! 请运行 `pip install -r requirements.txt` 指令安装内置Gradio及其他依赖, 详情信息见requirements.txt.")
22
- from request_llms.bridge_all import predict
23
  from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, load_chat_cookies, DummyWith
24
- # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址
25
  proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION = get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION')
26
  CHATBOT_HEIGHT, LAYOUT, AVAIL_LLM_MODELS, AUTO_CLEAR_TXT = get_conf('CHATBOT_HEIGHT', 'LAYOUT', 'AVAIL_LLM_MODELS', 'AUTO_CLEAR_TXT')
27
- ENABLE_AUDIO, AUTO_CLEAR_TXT, PATH_LOGGING, AVAIL_THEMES, THEME, ADD_WAIFU = get_conf('ENABLE_AUDIO', 'AUTO_CLEAR_TXT', 'PATH_LOGGING', 'AVAIL_THEMES', 'THEME', 'ADD_WAIFU')
28
- DARK_MODE, NUM_CUSTOM_BASIC_BTN, SSL_KEYFILE, SSL_CERTFILE = get_conf('DARK_MODE', 'NUM_CUSTOM_BASIC_BTN', 'SSL_KEYFILE', 'SSL_CERTFILE')
29
- INIT_SYS_PROMPT = get_conf('INIT_SYS_PROMPT')
30
 
31
  # 如果WEB_PORT是-1, 则随机选取WEB端口
32
  PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
33
  from check_proxy import get_current_version
34
- from themes.theme import adjust_theme, advanced_css, theme_declaration, js_code_clear, js_code_reset, js_code_show_or_hide, js_code_show_or_hide_group2
35
- from themes.theme import js_code_for_css_changing, js_code_for_toggle_darkmode, js_code_for_persistent_cookie_init
36
- from themes.theme import load_dynamic_theme, to_cookie_str, from_cookie_str, init_cookie
37
  title_html = f"<h1 align=\"center\">GPT 学术优化 {get_current_version()}</h1>{theme_declaration}"
 
 
38
 
39
  # 问询记录, python 版本建议3.9+(越新越好)
40
  import logging, uuid
@@ -51,7 +35,7 @@ def main():
51
 
52
  # 高级函数插件
53
  from crazy_functional import get_crazy_functions
54
- DEFAULT_FN_GROUPS = get_conf('DEFAULT_FN_GROUPS')
55
  plugins = get_crazy_functions()
56
  all_plugin_groups = list(set([g for _, plugin in plugins.items() for g in plugin['Group'].split('|')]))
57
  match_group = lambda tags, groups: any([g in groups for g in tags.split('|')])
@@ -67,19 +51,16 @@ def main():
67
  proxy_info = check_proxy(proxies)
68
 
69
  gr_L1 = lambda: gr.Row().style()
70
- gr_L2 = lambda scale, elem_id: gr.Column(scale=scale, elem_id=elem_id, min_width=400)
71
  if LAYOUT == "TOP-DOWN":
72
  gr_L1 = lambda: DummyWith()
73
  gr_L2 = lambda scale, elem_id: gr.Row()
74
  CHATBOT_HEIGHT /= 2
75
 
76
  cancel_handles = []
77
- customize_btns = {}
78
- predefined_btns = {}
79
  with gr.Blocks(title="GPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
80
  gr.HTML(title_html)
81
  gr.HTML('''<center><a href="https://huggingface.co/spaces/qingxu98/gpt-academic?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>请您打开此页面后务必点击上方的“复制空间”(Duplicate Space)按钮!<font color="#FF00FF">使用时,先在输入框填入API-KEY然后回车。</font><br/>切忌在“复制空间”(Duplicate Space)之前填入API_KEY或进行提问,否则您的API_KEY将极可能被空间所有者攫取!<br/>支持任意数量的OpenAI的密钥和API2D的密钥共存,例如输入"OpenAI密钥1,API2D密钥2",然后提交,即可同时使用两种模型接口。</center>''')
82
- secret_css, dark_mode, py_pickle_cookie = gr.Textbox(visible=False), gr.Textbox(DARK_MODE, visible=False), gr.Textbox(visible=False)
83
  cookies = gr.State(load_chat_cookies())
84
  with gr_L1():
85
  with gr_L2(scale=2, elem_id="gpt-chat"):
@@ -89,45 +70,37 @@ def main():
89
  with gr_L2(scale=1, elem_id="gpt-panel"):
90
  with gr.Accordion("输入区", open=True, elem_id="input-panel") as area_input_primary:
91
  with gr.Row():
92
- txt = gr.Textbox(show_label=False, lines=2, placeholder="输入问题或API密钥,输入多个密钥时,用英文逗号间隔。支持多个OpenAI密钥共存。").style(container=False)
93
  with gr.Row():
94
- submitBtn = gr.Button("提交", elem_id="elem_submit", variant="primary")
95
  with gr.Row():
96
- resetBtn = gr.Button("重置", elem_id="elem_reset", variant="secondary"); resetBtn.style(size="sm")
97
- stopBtn = gr.Button("停止", elem_id="elem_stop", variant="secondary"); stopBtn.style(size="sm")
98
- clearBtn = gr.Button("清除", elem_id="elem_clear", variant="secondary", visible=False); clearBtn.style(size="sm")
99
- if ENABLE_AUDIO:
100
  with gr.Row():
101
- audio_mic = gr.Audio(source="microphone", type="numpy", elem_id="elem_audio", streaming=True, show_label=False).style(container=False)
102
  with gr.Row():
103
  status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。当前模型: {LLM_MODEL} \n {proxy_info}", elem_id="state-panel")
104
-
105
  with gr.Accordion("基础功能区", open=True, elem_id="basic-panel") as area_basic_fn:
106
  with gr.Row():
107
- for k in range(NUM_CUSTOM_BASIC_BTN):
108
- customize_btn = gr.Button("自定义按钮" + str(k+1), visible=False, variant="secondary", info_str=f'基础功能区: 自定义按钮')
109
- customize_btn.style(size="sm")
110
- customize_btns.update({"自定义按钮" + str(k+1): customize_btn})
111
  for k in functional:
112
  if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue
113
  variant = functional[k]["Color"] if "Color" in functional[k] else "secondary"
114
- functional[k]["Button"] = gr.Button(k, variant=variant, info_str=f'基础功能区: {k}')
115
  functional[k]["Button"].style(size="sm")
116
- predefined_btns.update({k: functional[k]["Button"]})
117
  with gr.Accordion("函数插件区", open=True, elem_id="plugin-panel") as area_crazy_fn:
118
  with gr.Row():
119
  gr.Markdown("插件可读取“输入区”文本/路径作为参数(上传文件自动修正路径)")
120
  with gr.Row(elem_id="input-plugin-group"):
121
- plugin_group_sel = gr.Dropdown(choices=all_plugin_groups, label='', show_label=False, value=DEFAULT_FN_GROUPS,
122
  multiselect=True, interactive=True, elem_classes='normal_mut_select').style(container=False)
123
  with gr.Row():
124
  for k, plugin in plugins.items():
125
  if not plugin.get("AsButton", True): continue
126
  visible = True if match_group(plugin['Group'], DEFAULT_FN_GROUPS) else False
127
  variant = plugins[k]["Color"] if "Color" in plugin else "secondary"
128
- info = plugins[k].get("Info", k)
129
- plugin['Button'] = plugins[k]['Button'] = gr.Button(k, variant=variant,
130
- visible=visible, info_str=f'函数插件区: {info}').style(size="sm")
131
  with gr.Row():
132
  with gr.Accordion("更多函数插件", open=True):
133
  dropdown_fn_list = []
@@ -138,143 +111,53 @@ def main():
138
  with gr.Row():
139
  dropdown = gr.Dropdown(dropdown_fn_list, value=r"打开插件列表", label="", show_label=False).style(container=False)
140
  with gr.Row():
141
- plugin_advanced_arg = gr.Textbox(show_label=True, label="高级参数输入区", visible=False,
142
  placeholder="这里是特殊函数插件的高级参数输入区").style(container=False)
143
  with gr.Row():
144
  switchy_bt = gr.Button(r"请先从插件列表中选择", variant="secondary").style(size="sm")
145
  with gr.Row():
146
- with gr.Accordion("点击展开“文件下载区”。", open=False) as area_file_up:
147
- file_upload = gr.Files(label="任何文件, 推荐上传压缩文件(zip, tar)", file_count="multiple", elem_id="elem_upload")
148
-
149
- with gr.Floating(init_x="0%", init_y="0%", visible=True, width=None, drag="forbidden", elem_id="tooltip"):
150
- with gr.Row():
151
- with gr.Tab("上传文件", elem_id="interact-panel"):
152
- gr.Markdown("请上传本地文件/压缩包供“函数插件区”功能调用。请注意: 上传文件后会自动把输入区修改为相应路径。")
153
- file_upload_2 = gr.Files(label="任何文件, 推荐上传压缩文件(zip, tar)", file_count="multiple", elem_id="elem_upload_float")
154
-
155
- with gr.Tab("更换模型", elem_id="interact-panel"):
156
- md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型/请求源").style(container=False)
157
  top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
158
  temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
159
- max_length_sl = gr.Slider(minimum=256, maximum=1024*32, value=4096, step=128, interactive=True, label="Local LLM MaxLength",)
160
- system_prompt = gr.Textbox(show_label=True, lines=2, placeholder=f"System Prompt", label="System prompt", value=INIT_SYS_PROMPT)
161
-
162
- with gr.Tab("界面外观", elem_id="interact-panel"):
163
- theme_dropdown = gr.Dropdown(AVAIL_THEMES, value=THEME, label="更换UI主题").style(container=False)
164
- checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "浮动输入区", "输入清除键", "插件参数区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区", elem_id='cbs').style(container=False)
165
- opt = ["自定义菜单"]
166
- value=[]
167
- if ADD_WAIFU: opt += ["添加Live2D形象"]; value += ["添加Live2D形象"]
168
- checkboxes_2 = gr.CheckboxGroup(opt, value=value, label="显示/隐藏自定义菜单", elem_id='cbsc').style(container=False)
169
- dark_mode_btn = gr.Button("切换界面明暗 ☀", variant="secondary").style(size="sm")
170
- dark_mode_btn.click(None, None, None, _js=js_code_for_toggle_darkmode)
171
- with gr.Tab("帮助", elem_id="interact-panel"):
172
- gr.Markdown(help_menu_description)
173
-
174
- with gr.Floating(init_x="20%", init_y="50%", visible=False, width="40%", drag="top") as area_input_secondary:
175
- with gr.Accordion("浮动输入区", open=True, elem_id="input-panel2"):
176
- with gr.Row() as row:
177
- row.style(equal_height=True)
178
- with gr.Column(scale=10):
179
- txt2 = gr.Textbox(show_label=False, placeholder="Input question here.",
180
- elem_id='user_input_float', lines=8, label="输入区2").style(container=False)
181
- with gr.Column(scale=1, min_width=40):
182
- submitBtn2 = gr.Button("提交", variant="primary"); submitBtn2.style(size="sm")
183
  resetBtn2 = gr.Button("重置", variant="secondary"); resetBtn2.style(size="sm")
184
  stopBtn2 = gr.Button("停止", variant="secondary"); stopBtn2.style(size="sm")
185
- clearBtn2 = gr.Button("清除", elem_id="elem_clear2", variant="secondary", visible=False); clearBtn2.style(size="sm")
186
-
187
-
188
- with gr.Floating(init_x="20%", init_y="50%", visible=False, width="40%", drag="top") as area_customize:
189
- with gr.Accordion("自定义菜单", open=True, elem_id="edit-panel"):
190
- with gr.Row() as row:
191
- with gr.Column(scale=10):
192
- AVAIL_BTN = [btn for btn in customize_btns.keys()] + [k for k in functional]
193
- basic_btn_dropdown = gr.Dropdown(AVAIL_BTN, value="自定义按钮1", label="选择一个需要自定义基础功能区按钮").style(container=False)
194
- basic_fn_title = gr.Textbox(show_label=False, placeholder="输入新按钮名称", lines=1).style(container=False)
195
- basic_fn_prefix = gr.Textbox(show_label=False, placeholder="输入新提示前缀", lines=4).style(container=False)
196
- basic_fn_suffix = gr.Textbox(show_label=False, placeholder="输入新提示后缀", lines=4).style(container=False)
197
- with gr.Column(scale=1, min_width=70):
198
- basic_fn_confirm = gr.Button("确认并保存", variant="primary"); basic_fn_confirm.style(size="sm")
199
- basic_fn_clean = gr.Button("恢复默认", variant="primary"); basic_fn_clean.style(size="sm")
200
- def assign_btn(persistent_cookie_, cookies_, basic_btn_dropdown_, basic_fn_title, basic_fn_prefix, basic_fn_suffix, clean_up=False):
201
- ret = {}
202
- # 读取之前的自定义按钮
203
- customize_fn_overwrite_ = cookies_['customize_fn_overwrite']
204
- # 更新新的自定义按钮
205
- customize_fn_overwrite_.update({
206
- basic_btn_dropdown_:
207
- {
208
- "Title":basic_fn_title,
209
- "Prefix":basic_fn_prefix,
210
- "Suffix":basic_fn_suffix,
211
- }
212
- }
213
- )
214
- if clean_up:
215
- customize_fn_overwrite_ = {}
216
- cookies_.update(customize_fn_overwrite_) # 更新cookie
217
- visible = (not clean_up) and (basic_fn_title != "")
218
- if basic_btn_dropdown_ in customize_btns:
219
- # 是自定义按钮,不是预定义按钮
220
- ret.update({customize_btns[basic_btn_dropdown_]: gr.update(visible=visible, value=basic_fn_title)})
221
- else:
222
- # 是预定义按钮
223
- ret.update({predefined_btns[basic_btn_dropdown_]: gr.update(visible=visible, value=basic_fn_title)})
224
- ret.update({cookies: cookies_})
225
- try: persistent_cookie_ = from_cookie_str(persistent_cookie_) # persistent cookie to dict
226
- except: persistent_cookie_ = {}
227
- persistent_cookie_["custom_bnt"] = customize_fn_overwrite_ # dict update new value
228
- persistent_cookie_ = to_cookie_str(persistent_cookie_) # persistent cookie to dict
229
- ret.update({py_pickle_cookie: persistent_cookie_}) # write persistent cookie
230
- return ret
231
-
232
- # update btn
233
- h = basic_fn_confirm.click(assign_btn, [py_pickle_cookie, cookies, basic_btn_dropdown, basic_fn_title, basic_fn_prefix, basic_fn_suffix],
234
- [py_pickle_cookie, cookies, *customize_btns.values(), *predefined_btns.values()])
235
- h.then(None, [py_pickle_cookie], None, _js="""(py_pickle_cookie)=>{setCookie("py_pickle_cookie", py_pickle_cookie, 365);}""")
236
- # clean up btn
237
- h2 = basic_fn_clean.click(assign_btn, [py_pickle_cookie, cookies, basic_btn_dropdown, basic_fn_title, basic_fn_prefix, basic_fn_suffix, gr.State(True)],
238
- [py_pickle_cookie, cookies, *customize_btns.values(), *predefined_btns.values()])
239
- h2.then(None, [py_pickle_cookie], None, _js="""(py_pickle_cookie)=>{setCookie("py_pickle_cookie", py_pickle_cookie, 365);}""")
240
-
241
- def persistent_cookie_reload(persistent_cookie_, cookies_):
242
- ret = {}
243
- for k in customize_btns:
244
- ret.update({customize_btns[k]: gr.update(visible=False, value="")})
245
-
246
- try: persistent_cookie_ = from_cookie_str(persistent_cookie_) # persistent cookie to dict
247
- except: return ret
248
-
249
- customize_fn_overwrite_ = persistent_cookie_.get("custom_bnt", {})
250
- cookies_['customize_fn_overwrite'] = customize_fn_overwrite_
251
- ret.update({cookies: cookies_})
252
-
253
- for k,v in persistent_cookie_["custom_bnt"].items():
254
- if v['Title'] == "": continue
255
- if k in customize_btns: ret.update({customize_btns[k]: gr.update(visible=True, value=v['Title'])})
256
- else: ret.update({predefined_btns[k]: gr.update(visible=True, value=v['Title'])})
257
- return ret
258
 
259
  # 功能区显示开关与功能区的互动
260
  def fn_area_visibility(a):
261
  ret = {}
262
- ret.update({area_input_primary: gr.update(visible=("浮动输入区" not in a))})
263
- ret.update({area_input_secondary: gr.update(visible=("浮动输入区" in a))})
 
 
 
 
264
  ret.update({plugin_advanced_arg: gr.update(visible=("插件参数区" in a))})
265
- if "浮动输入区" in a: ret.update({txt: gr.update(value="")})
266
- return ret
267
- checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, txt, txt2, plugin_advanced_arg] )
268
- checkboxes.select(None, [checkboxes], None, _js=js_code_show_or_hide)
269
-
270
- # 功能区显示开关与功能区的互动
271
- def fn_area_visibility_2(a):
272
- ret = {}
273
- ret.update({area_customize: gr.update(visible=("自定义菜单" in a))})
274
  return ret
275
- checkboxes_2.select(fn_area_visibility_2, [checkboxes_2], [area_customize] )
276
- checkboxes_2.select(None, [checkboxes_2], None, _js=js_code_show_or_hide_group2)
277
-
278
  # 整理反复出现的控件句柄组合
279
  input_combo = [cookies, max_length_sl, md_dropdown, txt, txt2, top_p, temperature, chatbot, history, system_prompt, plugin_advanced_arg]
280
  output_combo = [cookies, chatbot, history, status]
@@ -284,28 +167,22 @@ def main():
284
  cancel_handles.append(txt2.submit(**predict_args))
285
  cancel_handles.append(submitBtn.click(**predict_args))
286
  cancel_handles.append(submitBtn2.click(**predict_args))
287
- resetBtn.click(None, None, [chatbot, history, status], _js=js_code_reset) # 先在前端快速清除chatbot&status
288
- resetBtn2.click(None, None, [chatbot, history, status], _js=js_code_reset) # 先在前端快速清除chatbot&status
289
- resetBtn.click(lambda: ([], [], "已重置"), None, [chatbot, history, status]) # 再在后端清除history
290
- resetBtn2.click(lambda: ([], [], "已重置"), None, [chatbot, history, status]) # 再在后端清除history
291
- clearBtn.click(None, None, [txt, txt2], _js=js_code_clear)
292
- clearBtn2.click(None, None, [txt, txt2], _js=js_code_clear)
293
  if AUTO_CLEAR_TXT:
294
- submitBtn.click(None, None, [txt, txt2], _js=js_code_clear)
295
- submitBtn2.click(None, None, [txt, txt2], _js=js_code_clear)
296
- txt.submit(None, None, [txt, txt2], _js=js_code_clear)
297
- txt2.submit(None, None, [txt, txt2], _js=js_code_clear)
298
  # 基础功能区的回调函数注册
299
  for k in functional:
300
  if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue
301
  click_handle = functional[k]["Button"].click(fn=ArgsGeneralWrapper(predict), inputs=[*input_combo, gr.State(True), gr.State(k)], outputs=output_combo)
302
  cancel_handles.append(click_handle)
303
- for btn in customize_btns.values():
304
- click_handle = btn.click(fn=ArgsGeneralWrapper(predict), inputs=[*input_combo, gr.State(True), gr.State(btn.value)], outputs=output_combo)
305
- cancel_handles.append(click_handle)
306
  # 文件上传区,接收文件后与chatbot的互动
307
- file_upload.upload(on_file_uploaded, [file_upload, chatbot, txt, txt2, checkboxes, cookies], [chatbot, txt, txt2, cookies]).then(None, None, None, _js=r"()=>{toast_push('上传完毕 ...'); cancel_loading_status();}")
308
- file_upload_2.upload(on_file_uploaded, [file_upload_2, chatbot, txt, txt2, checkboxes, cookies], [chatbot, txt, txt2, cookies]).then(None, None, None, _js=r"()=>{toast_push('上传完毕 ...'); cancel_loading_status();}")
309
  # 函数插件-固定按钮区
310
  for k in plugins:
311
  if not plugins[k].get("AsButton", True): continue
@@ -315,34 +192,16 @@ def main():
315
  # 函数插件-下拉菜单与随变按钮的互动
316
  def on_dropdown_changed(k):
317
  variant = plugins[k]["Color"] if "Color" in plugins[k] else "secondary"
318
- info = plugins[k].get("Info", k)
319
- ret = {switchy_bt: gr.update(value=k, variant=variant, info_str=f'函数插件区: {info}')}
320
  if plugins[k].get("AdvancedArgs", False): # 是否唤起高级插件参数区
321
  ret.update({plugin_advanced_arg: gr.update(visible=True, label=f"插件[{k}]的高级参数说明:" + plugins[k].get("ArgsReminder", [f"没有提供高级参数功能说明"]))})
322
  else:
323
  ret.update({plugin_advanced_arg: gr.update(visible=False, label=f"插件[{k}]不需要高级参数。")})
324
  return ret
325
  dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt, plugin_advanced_arg] )
326
-
327
  def on_md_dropdown_changed(k):
328
  return {chatbot: gr.update(label="当前模型:"+k)}
329
  md_dropdown.select(on_md_dropdown_changed, [md_dropdown], [chatbot] )
330
-
331
- def on_theme_dropdown_changed(theme, secret_css):
332
- adjust_theme, css_part1, _, adjust_dynamic_theme = load_dynamic_theme(theme)
333
- if adjust_dynamic_theme:
334
- css_part2 = adjust_dynamic_theme._get_theme_css()
335
- else:
336
- css_part2 = adjust_theme()._get_theme_css()
337
- return css_part2 + css_part1
338
-
339
- theme_handle = theme_dropdown.select(on_theme_dropdown_changed, [theme_dropdown, secret_css], [secret_css])
340
- theme_handle.then(
341
- None,
342
- [secret_css],
343
- None,
344
- _js=js_code_for_css_changing
345
- )
346
  # 随变按钮的回调函数注册
347
  def route(request: gr.Request, k, *args, **kwargs):
348
  if k in [r"打开插件列表", r"请先从插件列表中选择"]: return
@@ -360,53 +219,52 @@ def main():
360
  if not group_list: # 处理特殊情况:没有选择任何插件组
361
  return [*[plugin['Button'].update(visible=False) for _, plugin in plugins_as_btn.items()], gr.Dropdown.update(choices=[])]
362
  for k, plugin in plugins.items():
363
- if plugin.get("AsButton", True):
364
  btn_list.append(plugin['Button'].update(visible=match_group(plugin['Group'], group_list))) # 刷新按钮
365
  if plugin.get('AdvancedArgs', False): dropdown_fn_list.append(k) # 对于需要高级参数的插件,亦在下拉菜单中显示
366
  elif match_group(plugin['Group'], group_list): fns_list.append(k) # 刷新下拉列表
367
  return [*btn_list, gr.Dropdown.update(choices=fns_list)]
368
  plugin_group_sel.select(fn=on_group_change, inputs=[plugin_group_sel], outputs=[*[plugin['Button'] for name, plugin in plugins_as_btn.items()], dropdown])
369
- if ENABLE_AUDIO:
370
  from crazy_functions.live_audio.audio_io import RealtimeAudioDistribution
371
  rad = RealtimeAudioDistribution()
372
  def deal_audio(audio, cookies):
373
  rad.feed(cookies['uuid'].hex, audio)
374
  audio_mic.stream(deal_audio, inputs=[audio_mic, cookies])
375
 
376
-
377
- demo.load(init_cookie, inputs=[cookies], outputs=[cookies])
378
- demo.load(persistent_cookie_reload, inputs = [py_pickle_cookie, cookies],
379
- outputs = [py_pickle_cookie, cookies, *customize_btns.values(), *predefined_btns.values()], _js=js_code_for_persistent_cookie_init)
380
- demo.load(None, inputs=[dark_mode], outputs=None, _js="""(dark_mode)=>{apply_cookie_for_checkbox(dark_mode);}""") # 配置暗色主题或亮色主题
381
- demo.load(None, inputs=[gr.Textbox(LAYOUT, visible=False)], outputs=None, _js='(LAYOUT)=>{GptAcademicJavaScriptInit(LAYOUT);}')
382
-
383
  # gradio的inbrowser触发不太稳定,回滚代码到原始的浏览器打开函数
384
- def run_delayed_tasks():
385
  import threading, webbrowser, time
386
  print(f"如果浏览器没有自动打开,请复制并转到以下URL:")
387
- if DARK_MODE: print(f"\t「暗色主题已启用(支持动态切换主题)」: http://localhost:{PORT}")
388
- else: print(f"\t「亮色主题已启用(支持动态切换主题)」: http://localhost:{PORT}")
389
-
390
- def auto_updates(): time.sleep(0); auto_update()
391
- def open_browser(): time.sleep(2); webbrowser.open_new_tab(f"http://localhost:{PORT}")
392
- def warm_up_mods(): time.sleep(6); warm_up_modules()
393
-
394
- threading.Thread(target=auto_updates, name="self-upgrade", daemon=True).start() # 查看自动更新
395
- threading.Thread(target=open_browser, name="open-browser", daemon=True).start() # 打开浏览器页面
396
- threading.Thread(target=warm_up_mods, name="warm-up", daemon=True).start() # 预热tiktoken模块
397
-
398
- run_delayed_tasks()
399
  demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", share=False, favicon_path="docs/logo.png", blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile"])
400
 
401
-
402
  # 如果需要在二级路径下运行
403
- # CUSTOM_PATH = get_conf('CUSTOM_PATH')
404
- # if CUSTOM_PATH != "/":
405
  # from toolbox import run_gradio_in_subpath
406
  # run_gradio_in_subpath(demo, auth=AUTHENTICATION, port=PORT, custom_path=CUSTOM_PATH)
407
- # else:
408
  # demo.launch(server_name="0.0.0.0", server_port=PORT, auth=AUTHENTICATION, favicon_path="docs/logo.png",
409
- # blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile",f"{PATH_LOGGING}/admin"])
410
 
411
  if __name__ == "__main__":
412
  main()
 
1
  import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  def main():
4
  import subprocess, sys
5
+ subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'gradio-stable-fork'])
6
  import gradio as gr
7
+ from request_llm.bridge_all import predict
 
 
8
  from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, load_chat_cookies, DummyWith
9
+ # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
10
  proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION = get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION')
11
  CHATBOT_HEIGHT, LAYOUT, AVAIL_LLM_MODELS, AUTO_CLEAR_TXT = get_conf('CHATBOT_HEIGHT', 'LAYOUT', 'AVAIL_LLM_MODELS', 'AUTO_CLEAR_TXT')
12
+ ENABLE_AUDIO, AUTO_CLEAR_TXT, PATH_LOGGING = get_conf('ENABLE_AUDIO', 'AUTO_CLEAR_TXT', 'PATH_LOGGING')
 
 
13
 
14
  # 如果WEB_PORT是-1, 则随机选取WEB端口
15
  PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
16
  from check_proxy import get_current_version
17
+ from themes.theme import adjust_theme, advanced_css, theme_declaration
18
+ initial_prompt = "Serve me as a writing and programming assistant."
 
19
  title_html = f"<h1 align=\"center\">GPT 学术优化 {get_current_version()}</h1>{theme_declaration}"
20
+ description = "代码开源和更新[地址🚀](https://github.com/binary-husky/gpt_academic),"
21
+ description += "感谢热情的[开发者们❤️](https://github.com/binary-husky/gpt_academic/graphs/contributors)"
22
 
23
  # 问询记录, python 版本建议3.9+(越新越好)
24
  import logging, uuid
 
35
 
36
  # 高级函数插件
37
  from crazy_functional import get_crazy_functions
38
+ DEFAULT_FN_GROUPS, = get_conf('DEFAULT_FN_GROUPS')
39
  plugins = get_crazy_functions()
40
  all_plugin_groups = list(set([g for _, plugin in plugins.items() for g in plugin['Group'].split('|')]))
41
  match_group = lambda tags, groups: any([g in groups for g in tags.split('|')])
 
51
  proxy_info = check_proxy(proxies)
52
 
53
  gr_L1 = lambda: gr.Row().style()
54
+ gr_L2 = lambda scale, elem_id: gr.Column(scale=scale, elem_id=elem_id)
55
  if LAYOUT == "TOP-DOWN":
56
  gr_L1 = lambda: DummyWith()
57
  gr_L2 = lambda scale, elem_id: gr.Row()
58
  CHATBOT_HEIGHT /= 2
59
 
60
  cancel_handles = []
 
 
61
  with gr.Blocks(title="GPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
62
  gr.HTML(title_html)
63
  gr.HTML('''<center><a href="https://huggingface.co/spaces/qingxu98/gpt-academic?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>请您打开此页面后务必点击上方的“复制空间”(Duplicate Space)按钮!<font color="#FF00FF">使用时,先在输入框填入API-KEY然后回车。</font><br/>切忌在“复制空间”(Duplicate Space)之前填入API_KEY或进行提问,否则您的API_KEY将极可能被空间所有者攫取!<br/>支持任意数量的OpenAI的密钥和API2D的密钥共存,例如输入"OpenAI密钥1,API2D密钥2",然后提交,即可同时使用两种模型接口。</center>''')
 
64
  cookies = gr.State(load_chat_cookies())
65
  with gr_L1():
66
  with gr_L2(scale=2, elem_id="gpt-chat"):
 
70
  with gr_L2(scale=1, elem_id="gpt-panel"):
71
  with gr.Accordion("输入区", open=True, elem_id="input-panel") as area_input_primary:
72
  with gr.Row():
73
+ txt = gr.Textbox(show_label=False, lines=2, placeholder="输入问题或API密钥,输入多个密钥时,用英文逗号间隔。支持OpenAI密钥和API2D密钥共存。").style(container=False)
74
  with gr.Row():
75
+ submitBtn = gr.Button("提交", variant="primary")
76
  with gr.Row():
77
+ resetBtn = gr.Button("重置", variant="secondary"); resetBtn.style(size="sm")
78
+ stopBtn = gr.Button("停止", variant="secondary"); stopBtn.style(size="sm")
79
+ clearBtn = gr.Button("清除", variant="secondary", visible=False); clearBtn.style(size="sm")
80
+ if ENABLE_AUDIO:
81
  with gr.Row():
82
+ audio_mic = gr.Audio(source="microphone", type="numpy", streaming=True, show_label=False).style(container=False)
83
  with gr.Row():
84
  status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。当前模型: {LLM_MODEL} \n {proxy_info}", elem_id="state-panel")
 
85
  with gr.Accordion("基础功能区", open=True, elem_id="basic-panel") as area_basic_fn:
86
  with gr.Row():
 
 
 
 
87
  for k in functional:
88
  if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue
89
  variant = functional[k]["Color"] if "Color" in functional[k] else "secondary"
90
+ functional[k]["Button"] = gr.Button(k, variant=variant)
91
  functional[k]["Button"].style(size="sm")
 
92
  with gr.Accordion("函数插件区", open=True, elem_id="plugin-panel") as area_crazy_fn:
93
  with gr.Row():
94
  gr.Markdown("插件可读取“输入区”文本/路径作为参数(上传文件自动修正路径)")
95
  with gr.Row(elem_id="input-plugin-group"):
96
+ plugin_group_sel = gr.Dropdown(choices=all_plugin_groups, label='', show_label=False, value=DEFAULT_FN_GROUPS,
97
  multiselect=True, interactive=True, elem_classes='normal_mut_select').style(container=False)
98
  with gr.Row():
99
  for k, plugin in plugins.items():
100
  if not plugin.get("AsButton", True): continue
101
  visible = True if match_group(plugin['Group'], DEFAULT_FN_GROUPS) else False
102
  variant = plugins[k]["Color"] if "Color" in plugin else "secondary"
103
+ plugin['Button'] = plugins[k]['Button'] = gr.Button(k, variant=variant, visible=visible).style(size="sm")
 
 
104
  with gr.Row():
105
  with gr.Accordion("更多函数插件", open=True):
106
  dropdown_fn_list = []
 
111
  with gr.Row():
112
  dropdown = gr.Dropdown(dropdown_fn_list, value=r"打开插件列表", label="", show_label=False).style(container=False)
113
  with gr.Row():
114
+ plugin_advanced_arg = gr.Textbox(show_label=True, label="高级参数输入区", visible=False,
115
  placeholder="这里是特殊函数插件的高级参数输入区").style(container=False)
116
  with gr.Row():
117
  switchy_bt = gr.Button(r"请先从插件列表中选择", variant="secondary").style(size="sm")
118
  with gr.Row():
119
+ with gr.Accordion("点击展开“文件上传区”。上传本地文件/压缩包供函数插件调用。", open=False) as area_file_up:
120
+ file_upload = gr.Files(label="任何文件, 但推荐上传压缩文件(zip, tar)", file_count="multiple")
121
+ with gr.Accordion("更换模型 & SysPrompt & 交互界面布局", open=(LAYOUT == "TOP-DOWN"), elem_id="interact-panel"):
122
+ system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
 
 
 
 
 
 
 
123
  top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
124
  temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
125
+ max_length_sl = gr.Slider(minimum=256, maximum=8192, value=4096, step=1, interactive=True, label="Local LLM MaxLength",)
126
+ checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区", "输入清除键", "插件参数区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
127
+ md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型/请求源").style(container=False)
128
+ dark_mode_btn = gr.Button("Toggle Dark Mode ☀", variant="secondary").style(size="sm")
129
+ dark_mode_btn.click(None, None, None, _js="""() => {
130
+ if (document.querySelectorAll('.dark').length) {
131
+ document.querySelectorAll('.dark').forEach(el => el.classList.remove('dark'));
132
+ } else {
133
+ document.querySelector('body').classList.add('dark');
134
+ }
135
+ }""",
136
+ )
137
+ gr.Markdown(description)
138
+ with gr.Accordion("备选输入区", open=True, visible=False, elem_id="input-panel2") as area_input_secondary:
139
+ with gr.Row():
140
+ txt2 = gr.Textbox(show_label=False, placeholder="Input question here.", label="输入区2").style(container=False)
141
+ with gr.Row():
142
+ submitBtn2 = gr.Button("提交", variant="primary")
143
+ with gr.Row():
 
 
 
 
 
144
  resetBtn2 = gr.Button("重置", variant="secondary"); resetBtn2.style(size="sm")
145
  stopBtn2 = gr.Button("停止", variant="secondary"); stopBtn2.style(size="sm")
146
+ clearBtn2 = gr.Button("清除", variant="secondary", visible=False); clearBtn2.style(size="sm")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
  # 功能区显示开关与功能区的互动
149
  def fn_area_visibility(a):
150
  ret = {}
151
+ ret.update({area_basic_fn: gr.update(visible=("基础功能区" in a))})
152
+ ret.update({area_crazy_fn: gr.update(visible=("函数插件区" in a))})
153
+ ret.update({area_input_primary: gr.update(visible=("底部输入区" not in a))})
154
+ ret.update({area_input_secondary: gr.update(visible=("底部输入区" in a))})
155
+ ret.update({clearBtn: gr.update(visible=("输入清除键" in a))})
156
+ ret.update({clearBtn2: gr.update(visible=("输入清除键" in a))})
157
  ret.update({plugin_advanced_arg: gr.update(visible=("插件参数区" in a))})
158
+ if "底部输入区" in a: ret.update({txt: gr.update(value="")})
 
 
 
 
 
 
 
 
159
  return ret
160
+ checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, txt, txt2, clearBtn, clearBtn2, plugin_advanced_arg] )
 
 
161
  # 整理反复出现的控件句柄组合
162
  input_combo = [cookies, max_length_sl, md_dropdown, txt, txt2, top_p, temperature, chatbot, history, system_prompt, plugin_advanced_arg]
163
  output_combo = [cookies, chatbot, history, status]
 
167
  cancel_handles.append(txt2.submit(**predict_args))
168
  cancel_handles.append(submitBtn.click(**predict_args))
169
  cancel_handles.append(submitBtn2.click(**predict_args))
170
+ resetBtn.click(lambda: ([], [], "已重置"), None, [chatbot, history, status])
171
+ resetBtn2.click(lambda: ([], [], "已重置"), None, [chatbot, history, status])
172
+ clearBtn.click(lambda: ("",""), None, [txt, txt2])
173
+ clearBtn2.click(lambda: ("",""), None, [txt, txt2])
 
 
174
  if AUTO_CLEAR_TXT:
175
+ submitBtn.click(lambda: ("",""), None, [txt, txt2])
176
+ submitBtn2.click(lambda: ("",""), None, [txt, txt2])
177
+ txt.submit(lambda: ("",""), None, [txt, txt2])
178
+ txt2.submit(lambda: ("",""), None, [txt, txt2])
179
  # 基础功能区的回调函数注册
180
  for k in functional:
181
  if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue
182
  click_handle = functional[k]["Button"].click(fn=ArgsGeneralWrapper(predict), inputs=[*input_combo, gr.State(True), gr.State(k)], outputs=output_combo)
183
  cancel_handles.append(click_handle)
 
 
 
184
  # 文件上传区,接收文件后与chatbot的互动
185
+ file_upload.upload(on_file_uploaded, [file_upload, chatbot, txt, txt2, checkboxes, cookies], [chatbot, txt, txt2, cookies])
 
186
  # 函数插件-固定按钮区
187
  for k in plugins:
188
  if not plugins[k].get("AsButton", True): continue
 
192
  # 函数插件-下拉菜单与随变按钮的互动
193
  def on_dropdown_changed(k):
194
  variant = plugins[k]["Color"] if "Color" in plugins[k] else "secondary"
195
+ ret = {switchy_bt: gr.update(value=k, variant=variant)}
 
196
  if plugins[k].get("AdvancedArgs", False): # 是否唤起高级插件参数区
197
  ret.update({plugin_advanced_arg: gr.update(visible=True, label=f"插件[{k}]的高级参数说明:" + plugins[k].get("ArgsReminder", [f"没有提供高级参数功能说明"]))})
198
  else:
199
  ret.update({plugin_advanced_arg: gr.update(visible=False, label=f"插件[{k}]不需要高级参数。")})
200
  return ret
201
  dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt, plugin_advanced_arg] )
 
202
  def on_md_dropdown_changed(k):
203
  return {chatbot: gr.update(label="当前模型:"+k)}
204
  md_dropdown.select(on_md_dropdown_changed, [md_dropdown], [chatbot] )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  # 随变按钮的回调函数注册
206
  def route(request: gr.Request, k, *args, **kwargs):
207
  if k in [r"打开插件列表", r"请先从插件列表中选择"]: return
 
219
  if not group_list: # 处理特殊情况:没有选择任何插件组
220
  return [*[plugin['Button'].update(visible=False) for _, plugin in plugins_as_btn.items()], gr.Dropdown.update(choices=[])]
221
  for k, plugin in plugins.items():
222
+ if plugin.get("AsButton", True):
223
  btn_list.append(plugin['Button'].update(visible=match_group(plugin['Group'], group_list))) # 刷新按钮
224
  if plugin.get('AdvancedArgs', False): dropdown_fn_list.append(k) # 对于需要高级参数的插件,亦在下拉菜单中显示
225
  elif match_group(plugin['Group'], group_list): fns_list.append(k) # 刷新下拉列表
226
  return [*btn_list, gr.Dropdown.update(choices=fns_list)]
227
  plugin_group_sel.select(fn=on_group_change, inputs=[plugin_group_sel], outputs=[*[plugin['Button'] for name, plugin in plugins_as_btn.items()], dropdown])
228
+ if ENABLE_AUDIO:
229
  from crazy_functions.live_audio.audio_io import RealtimeAudioDistribution
230
  rad = RealtimeAudioDistribution()
231
  def deal_audio(audio, cookies):
232
  rad.feed(cookies['uuid'].hex, audio)
233
  audio_mic.stream(deal_audio, inputs=[audio_mic, cookies])
234
 
235
+ def init_cookie(cookies, chatbot):
236
+ # 为每一位访问的用户赋予一个独一无二的uuid编码
237
+ cookies.update({'uuid': uuid.uuid4()})
238
+ return cookies
239
+ demo.load(init_cookie, inputs=[cookies, chatbot], outputs=[cookies])
240
+ demo.load(lambda: 0, inputs=None, outputs=None, _js='()=>{ChatBotHeight();}')
241
+
242
  # gradio的inbrowser触发不太稳定,回滚代码到原始的浏览器打开函数
243
+ def auto_opentab_delay():
244
  import threading, webbrowser, time
245
  print(f"如果浏览器没有自动打开,请复制并转到以下URL:")
246
+ print(f"\t(亮色主题): http://localhost:{PORT}")
247
+ print(f"\t(暗色主题): http://localhost:{PORT}/?__theme=dark")
248
+ def open():
249
+ time.sleep(2) # 打开浏览器
250
+ DARK_MODE, = get_conf('DARK_MODE')
251
+ if DARK_MODE: webbrowser.open_new_tab(f"http://localhost:{PORT}/?__theme=dark")
252
+ else: webbrowser.open_new_tab(f"http://localhost:{PORT}")
253
+ threading.Thread(target=open, name="open-browser", daemon=True).start()
254
+ threading.Thread(target=auto_update, name="self-upgrade", daemon=True).start()
255
+ threading.Thread(target=warm_up_modules, name="warm-up", daemon=True).start()
256
+
257
+ auto_opentab_delay()
258
  demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", share=False, favicon_path="docs/logo.png", blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile"])
259
 
 
260
  # 如果需要在二级路径下运行
261
+ # CUSTOM_PATH, = get_conf('CUSTOM_PATH')
262
+ # if CUSTOM_PATH != "/":
263
  # from toolbox import run_gradio_in_subpath
264
  # run_gradio_in_subpath(demo, auth=AUTHENTICATION, port=PORT, custom_path=CUSTOM_PATH)
265
+ # else:
266
  # demo.launch(server_name="0.0.0.0", server_port=PORT, auth=AUTHENTICATION, favicon_path="docs/logo.png",
267
+ # blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile"])
268
 
269
  if __name__ == "__main__":
270
  main()
check_proxy.py CHANGED
@@ -5,6 +5,7 @@ def check_proxy(proxies):
5
  try:
6
  response = requests.get("https://ipapi.co/json/", proxies=proxies, timeout=4)
7
  data = response.json()
 
8
  if 'country_name' in data:
9
  country = data['country_name']
10
  result = f"代理配置 {proxies_https}, 代理所在地:{country}"
@@ -45,9 +46,9 @@ def backup_and_download(current_version, remote_version):
45
  return new_version_dir
46
  os.makedirs(new_version_dir)
47
  shutil.copytree('./', backup_dir, ignore=lambda x, y: ['history'])
48
- proxies = get_conf('proxies')
49
- try: r = requests.get('https://github.com/binary-husky/chatgpt_academic/archive/refs/heads/master.zip', proxies=proxies, stream=True)
50
- except: r = requests.get('https://public.gpt-academic.top/publish/master.zip', proxies=proxies, stream=True)
51
  zip_file_path = backup_dir+'/master.zip'
52
  with open(zip_file_path, 'wb+') as f:
53
  f.write(r.content)
@@ -110,10 +111,11 @@ def auto_update(raise_error=False):
110
  try:
111
  from toolbox import get_conf
112
  import requests
 
113
  import json
114
- proxies = get_conf('proxies')
115
- try: response = requests.get("https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version", proxies=proxies, timeout=5)
116
- except: response = requests.get("https://public.gpt-academic.top/publish/version", proxies=proxies, timeout=5)
117
  remote_json_data = json.loads(response.text)
118
  remote_version = remote_json_data['version']
119
  if remote_json_data["show_feature"]:
@@ -125,7 +127,8 @@ def auto_update(raise_error=False):
125
  current_version = json.loads(current_version)['version']
126
  if (remote_version - current_version) >= 0.01-1e-5:
127
  from colorful import print亮黄
128
- print亮黄(f'\n新版本可用。新版本:{remote_version},当前版本:{current_version}。{new_feature}')
 
129
  print('(1)Github更新地址:\nhttps://github.com/binary-husky/chatgpt_academic\n')
130
  user_instruction = input('(2)是否一键更新代码(Y+回车=确认,输入其他/无输入+回车=不更新)?')
131
  if user_instruction in ['Y', 'y']:
@@ -151,26 +154,16 @@ def auto_update(raise_error=False):
151
  print(msg)
152
 
153
  def warm_up_modules():
154
- print('正在执行一些模块的预热 ...')
155
- from toolbox import ProxyNetworkActivate
156
- from request_llms.bridge_all import model_info
157
- with ProxyNetworkActivate("Warmup_Modules"):
158
- enc = model_info["gpt-3.5-turbo"]['tokenizer']
159
- enc.encode("模块预热", disallowed_special=())
160
- enc = model_info["gpt-4"]['tokenizer']
161
- enc.encode("模块预热", disallowed_special=())
162
-
163
- def warm_up_vectordb():
164
- print('正在执行一些模块的预热 ...')
165
- from toolbox import ProxyNetworkActivate
166
- with ProxyNetworkActivate("Warmup_Modules"):
167
- import nltk
168
- with ProxyNetworkActivate("Warmup_Modules"): nltk.download("punkt")
169
 
170
-
171
  if __name__ == '__main__':
172
  import os
173
  os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
174
  from toolbox import get_conf
175
- proxies = get_conf('proxies')
176
  check_proxy(proxies)
 
5
  try:
6
  response = requests.get("https://ipapi.co/json/", proxies=proxies, timeout=4)
7
  data = response.json()
8
+ # print(f'查询代理的地理位置,返回的结果是{data}')
9
  if 'country_name' in data:
10
  country = data['country_name']
11
  result = f"代理配置 {proxies_https}, 代理所在地:{country}"
 
46
  return new_version_dir
47
  os.makedirs(new_version_dir)
48
  shutil.copytree('./', backup_dir, ignore=lambda x, y: ['history'])
49
+ proxies, = get_conf('proxies')
50
+ r = requests.get(
51
+ 'https://github.com/binary-husky/chatgpt_academic/archive/refs/heads/master.zip', proxies=proxies, stream=True)
52
  zip_file_path = backup_dir+'/master.zip'
53
  with open(zip_file_path, 'wb+') as f:
54
  f.write(r.content)
 
111
  try:
112
  from toolbox import get_conf
113
  import requests
114
+ import time
115
  import json
116
+ proxies, = get_conf('proxies')
117
+ response = requests.get(
118
+ "https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version", proxies=proxies, timeout=5)
119
  remote_json_data = json.loads(response.text)
120
  remote_version = remote_json_data['version']
121
  if remote_json_data["show_feature"]:
 
127
  current_version = json.loads(current_version)['version']
128
  if (remote_version - current_version) >= 0.01-1e-5:
129
  from colorful import print亮黄
130
+ print亮黄(
131
+ f'\n新版本可用。新版本:{remote_version},当前版本:{current_version}。{new_feature}')
132
  print('(1)Github更新地址:\nhttps://github.com/binary-husky/chatgpt_academic\n')
133
  user_instruction = input('(2)是否一键更新代码(Y+回车=确认,输入其他/无输入+回车=不更新)?')
134
  if user_instruction in ['Y', 'y']:
 
154
  print(msg)
155
 
156
  def warm_up_modules():
157
+ print('正在执行一些模块的预热...')
158
+ from request_llm.bridge_all import model_info
159
+ enc = model_info["gpt-3.5-turbo"]['tokenizer']
160
+ enc.encode("模块预热", disallowed_special=())
161
+ enc = model_info["gpt-4"]['tokenizer']
162
+ enc.encode("模块预热", disallowed_special=())
 
 
 
 
 
 
 
 
 
163
 
 
164
  if __name__ == '__main__':
165
  import os
166
  os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
167
  from toolbox import get_conf
168
+ proxies, = get_conf('proxies')
169
  check_proxy(proxies)
config.py CHANGED
@@ -2,8 +2,8 @@
2
  以下所有配置也都支持利用环境变量覆写,环境变量配置格式见docker-compose.yml。
3
  读取优先级:环境变量 > config_private.py > config.py
4
  --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
5
- All the following configurations also support using environment variables to override,
6
- and the environment variable configuration format can be seen in docker-compose.yml.
7
  Configuration reading priority: environment variable > config_private.py > config.py
8
  """
9
 
@@ -19,13 +19,13 @@ API_KEY = "此处填API密钥" # 可同时填写多个API-KEY,用英文逗
19
  USE_PROXY = False
20
  if USE_PROXY:
21
  """
22
- 代理网络的地址,打开你的代理软件查看代理协议(socks5h / http)、地址(localhost)和端口(11284)
23
  填写格式是 [协议]:// [地址] :[端口],填写之前不要忘记把USE_PROXY改成True,如果直接在海外服务器部署,此处不修改
24
  <配置教程&视频教程> https://github.com/binary-husky/gpt_academic/issues/1>
25
  [协议] 常见协议无非socks5h/http; 例如 v2**y 和 ss* 的默认本地协议是socks5h; 而cl**h 的默认本地协议是http
26
- [地址] localhost或者127.0.0.1localhost意思是代理软件安装在本机上)
27
  [端口] 在代理软件的设置里找。虽然不同的代理软件界面不一样,但端口号都应该在最显眼的位置上
28
  """
 
29
  proxies = {
30
  # [协议]:// [地址] :[端口]
31
  "http": "socks5h://localhost:11284", # 再例如 "http": "http://127.0.0.1:7890",
@@ -37,7 +37,7 @@ else:
37
  # ------------------------------------ 以下配置可以优化体验, 但大部分场合下并不需要修改 ------------------------------------
38
 
39
  # 重新URL重新定向,实现更换API_URL的作用(高危设置! 常规情况下不要修改! 通过修改此设置,您将把您的API-KEY和对话隐私完全暴露给您设定的中间人!)
40
- # 格式: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "在这里填写重定向的api.openai.com的URL"}
41
  # 举例: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "https://reverse-proxy-url/v1/chat/completions"}
42
  API_URL_REDIRECT = {}
43
 
@@ -50,11 +50,6 @@ DEFAULT_WORKER_NUM = 3
50
  # 色彩主题, 可选 ["Default", "Chuanhu-Small-and-Beautiful", "High-Contrast"]
51
  # 更多主题, 请查阅Gradio主题商店: https://huggingface.co/spaces/gradio/theme-gallery 可选 ["Gstaff/Xkcd", "NoCrypt/Miku", ...]
52
  THEME = "Chuanhu-Small-and-Beautiful"
53
- AVAIL_THEMES = ["Default", "Chuanhu-Small-and-Beautiful", "High-Contrast", "Gstaff/Xkcd", "NoCrypt/Miku"]
54
-
55
-
56
- # 默认的系统提示词(system prompt)
57
- INIT_SYS_PROMPT = "Serve me as a writing and programming assistant."
58
 
59
 
60
  # 对话窗的高度 (仅在LAYOUT="TOP-DOWN"时生效)
@@ -67,10 +62,7 @@ CODE_HIGHLIGHT = True
67
 
68
  # 窗口布局
69
  LAYOUT = "LEFT-RIGHT" # "LEFT-RIGHT"(左右布局) # "TOP-DOWN"(上下布局)
70
-
71
-
72
- # 暗色模式 / 亮色模式
73
- DARK_MODE = False
74
 
75
 
76
  # 发送请求到OpenAI后,等待多久判定为超时
@@ -89,41 +81,21 @@ LLM_MODEL = "gpt-3.5-turbo" # 可选 "chatglm"
89
  AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "api2d-gpt-3.5-turbo", "spark", "azure-gpt-3.5"]
90
 
91
  # 插件分类默认选项
92
- DEFAULT_FN_GROUPS = ['对话', '编程', '学术', '智能体']
93
 
94
 
95
  # 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 )
96
- LLM_MODEL = "gpt-3.5-turbo-16k" # 可选 ↓↓↓
97
- AVAIL_LLM_MODELS = ["gpt-4-1106-preview", "gpt-4-turbo-preview", "gpt-4-vision-preview",
98
- "gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
99
- "gpt-4", "gpt-4-32k", "azure-gpt-4", "glm-4", "glm-3-turbo",
100
- "gemini-pro", "chatglm3", "claude-2"]
101
- # P.S. 其他可用的模型还包括 [
102
- # "moss", "qwen-turbo", "qwen-plus", "qwen-max"
103
- # "zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen-local", "gpt-3.5-turbo-0613",
104
- # "gpt-3.5-turbo-16k-0613", "gpt-3.5-random", "api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k',
105
- # "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"
106
- # ]
107
-
108
-
109
- # 定义界面上“询问多个GPT模型”插件应该使用哪些模型,请从AVAIL_LLM_MODELS中选择,并在不同模型之间用`&`间隔,例如"gpt-3.5-turbo&chatglm3&azure-gpt-4"
110
- MULTI_QUERY_LLM_MODELS = "gpt-3.5-turbo&chatglm3"
111
-
112
-
113
- # 选择本地模型变体(只有当AVAIL_LLM_MODELS包含了对应本地模型时,才会起作用)
114
- # 如果你选择Qwen系列的模型,那么请在下面的QWEN_MODEL_SELECTION中指定具体的模型
115
- # 也可以是具体的模型路径
116
- QWEN_LOCAL_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8"
117
-
118
-
119
- # 接入通义千问在线大模型 https://dashscope.console.aliyun.com/
120
- DASHSCOPE_API_KEY = "" # 阿里灵积云API_KEY
121
 
122
 
123
  # 百度千帆(LLM_MODEL="qianfan")
124
  BAIDU_CLOUD_API_KEY = ''
125
  BAIDU_CLOUD_SECRET_KEY = ''
126
- BAIDU_CLOUD_QIANFAN_MODEL = 'ERNIE-Bot' # 可选 "ERNIE-Bot-4"(文心大模型4.0), "ERNIE-Bot"(文心一言), "ERNIE-Bot-turbo", "BLOOMZ-7B", "Llama-2-70B-Chat", "Llama-2-13B-Chat", "Llama-2-7B-Chat"
127
 
128
 
129
  # 如果使用ChatGLM2微调模型,请把 LLM_MODEL="chatglmft",并在此处指定模型路径
@@ -134,6 +106,7 @@ CHATGLM_PTUNING_CHECKPOINT = "" # 例如"/home/hmp/ChatGLM2-6B/ptuning/output/6b
134
  LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
135
  LOCAL_MODEL_QUANT = "FP16" # 默认 "FP16" "INT4" 启用量化INT4版本 "INT8" 启用量化INT8版本
136
 
 
137
  # 设置gradio的并行线程数(不需要修改)
138
  CONCURRENT_COUNT = 100
139
 
@@ -143,7 +116,7 @@ AUTO_CLEAR_TXT = False
143
 
144
 
145
  # 加一个live2d装饰
146
- ADD_WAIFU = True
147
 
148
 
149
  # 设置用户名和密码(不需要修改)(相关功能不稳定,与gradio版本和网络都相关,如果本地使用不建议加这个)
@@ -155,31 +128,22 @@ AUTHENTICATION = []
155
  CUSTOM_PATH = "/"
156
 
157
 
158
- # HTTPS 秘钥和证书(不需要修改)
159
- SSL_KEYFILE = ""
160
- SSL_CERTFILE = ""
161
-
162
-
163
  # 极少数情况下,openai的官方KEY需要伴随组织编码(格式如org-xxxxxxxxxxxxxxxxxxxxxxxx)使用
164
  API_ORG = ""
165
 
166
 
167
- # 如果需要使用Slack Claude,使用教程详情见 request_llms/README.md
168
- SLACK_CLAUDE_BOT_ID = ''
169
  SLACK_CLAUDE_USER_TOKEN = ''
170
 
171
 
172
- # 如果需要使用AZURE(方法一:单个azure模型部署)详情请见额外文档 docs\use_azure.md
173
  AZURE_ENDPOINT = "https://你亲手写的api名称.openai.azure.com/"
174
  AZURE_API_KEY = "填入azure openai api的密钥" # 建议直接在API_KEY处填写,该选项即将被弃用
175
  AZURE_ENGINE = "填入你亲手写的部署名" # 读 docs\use_azure.md
176
 
177
 
178
- # 如果需要使用AZURE(方法二:多个azure模型部署+动态切换)详情请见额外文档 docs\use_azure.md
179
- AZURE_CFG_ARRAY = {}
180
-
181
-
182
- # 使用Newbing (不推荐使用,未来将删除)
183
  NEWBING_STYLE = "creative" # ["creative", "balanced", "precise"]
184
  NEWBING_COOKIES = """
185
  put your new bing cookies here
@@ -200,79 +164,33 @@ XFYUN_API_SECRET = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
200
  XFYUN_API_KEY = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
201
 
202
 
203
- # 接入智谱大模型
204
- ZHIPUAI_API_KEY = ""
205
- ZHIPUAI_MODEL = "" # 此选项已废弃,不再需要填写
206
-
207
-
208
- # # 火山引擎YUNQUE大模型
209
- # YUNQUE_SECRET_KEY = ""
210
- # YUNQUE_ACCESS_KEY = ""
211
- # YUNQUE_MODEL = ""
212
-
213
-
214
  # Claude API KEY
215
  ANTHROPIC_API_KEY = ""
216
 
217
 
218
- # Mathpix 拥有执行PDF的OCR功能,但是需要注册账号
219
- MATHPIX_APPID = ""
220
- MATHPIX_APPKEY = ""
221
-
222
-
223
  # 自定义API KEY格式
224
  CUSTOM_API_KEY_PATTERN = ""
225
 
226
 
227
- # Google Gemini API-Key
228
- GEMINI_API_KEY = ''
229
-
230
-
231
  # HUGGINGFACE的TOKEN,下载LLAMA时起作用 https://huggingface.co/docs/hub/security-tokens
232
- HUGGINGFACE_ACCESS_TOKEN = ""
233
 
234
 
235
  # GROBID服务器地址(填写多个可以均衡负载),用于高质量地读取PDF文档
236
  # 获取方法:复制以下空间https://huggingface.co/spaces/qingxu98/grobid,设为public,然后GROBID_URL = "https://(你的hf用户名如qingxu98)-(你的填写的空间名如grobid).hf.space"
237
  GROBID_URLS = [
238
  "https://qingxu98-grobid.hf.space","https://qingxu98-grobid2.hf.space","https://qingxu98-grobid3.hf.space",
239
- "https://qingxu98-grobid4.hf.space","https://qingxu98-grobid5.hf.space", "https://qingxu98-grobid6.hf.space",
240
- "https://qingxu98-grobid7.hf.space", "https://qingxu98-grobid8.hf.space",
241
  ]
242
 
243
 
244
  # 是否允许通过自然语言描述修改本页的配置,该功能具有一定的危险性,默认关闭
245
  ALLOW_RESET_CONFIG = False
246
-
247
-
248
- # 在使用AutoGen插件时,是否使用Docker容器运行代码
249
- AUTOGEN_USE_DOCKER = False
250
-
251
-
252
  # 临时的上传文件夹位置,请勿修改
253
  PATH_PRIVATE_UPLOAD = "private_upload"
254
-
255
-
256
  # 日志文件夹的位置,请勿修改
257
  PATH_LOGGING = "gpt_log"
258
 
259
-
260
- # 除了连接OpenAI之外,还有哪些场合允许使用代理,请勿修改
261
- WHEN_TO_USE_PROXY = ["Download_LLM", "Download_Gradio_Theme", "Connect_Grobid",
262
- "Warmup_Modules", "Nougat_Download", "AutoGen"]
263
-
264
-
265
- # *实验性功能*: 自动检测并屏蔽失效的KEY,请勿使用
266
- BLOCK_INVALID_APIKEY = False
267
-
268
-
269
- # 启用插件热加载
270
- PLUGIN_HOT_RELOAD = False
271
-
272
-
273
- # 自定义按钮的最大数量限制
274
- NUM_CUSTOM_BASIC_BTN = 4
275
-
276
  """
277
  在线大模型配置关联关系示意图
278
 
@@ -282,16 +200,13 @@ NUM_CUSTOM_BASIC_BTN = 4
282
  │ ├── API_ORG(不常用)
283
  │ └── API_URL_REDIRECT(不常用)
284
 
285
- ├── "azure-gpt-3.5" 等azure模型(单个azure模型,不需要动态切换)
286
  │ ├── API_KEY
287
  │ ├── AZURE_ENDPOINT
288
  │ ├── AZURE_API_KEY
289
  │ ├── AZURE_ENGINE
290
  │ └── API_URL_REDIRECT
291
 
292
- ├── "azure-gpt-3.5" 等azure模型(多个azure模型,需要动态切换,高优先级)
293
- │ └── AZURE_CFG_ARRAY
294
-
295
  ├── "spark" 星火认知大模型 spark & sparkv2
296
  │ ├── XFYUN_APPID
297
  │ ├── XFYUN_API_SECRET
@@ -309,36 +224,11 @@ NUM_CUSTOM_BASIC_BTN = 4
309
  │ ├── BAIDU_CLOUD_API_KEY
310
  │ └── BAIDU_CLOUD_SECRET_KEY
311
 
312
- ├── "glm-4", "glm-3-turbo", "zhipuai" 智谱AI大模型
313
- │ └── ZHIPUAI_API_KEY
314
-
315
- ├── "qwen-turbo" 等通义千问大模型
316
- │ └── DASHSCOPE_API_KEY
317
-
318
- ├── "Gemini"
319
- │ └── GEMINI_API_KEY
320
-
321
- └── "newbing" Newbing接口不再稳定,不推荐使用
322
  ├── NEWBING_STYLE
323
  └── NEWBING_COOKIES
324
 
325
-
326
- 本地大模型示意图
327
-
328
- ├── "chatglm3"
329
- ├── "chatglm"
330
- ├── "chatglm_onnx"
331
- ├── "chatglmft"
332
- ├── "internlm"
333
- ├── "moss"
334
- ├── "jittorllms_pangualpha"
335
- ├── "jittorllms_llama"
336
- ├── "deepseekcoder"
337
- ├── "qwen-local"
338
- ├── RWKV的支持见Wiki
339
- └── "llama2"
340
-
341
-
342
  用户图形界面布局依赖关系示意图
343
 
344
  ├── CHATBOT_HEIGHT 对话窗的高度
@@ -349,7 +239,7 @@ NUM_CUSTOM_BASIC_BTN = 4
349
  ├── THEME 色彩主题
350
  ├── AUTO_CLEAR_TXT 是否在提交时自动清空输入框
351
  ├── ADD_WAIFU 加一个live2d装饰
352
- └── ALLOW_RESET_CONFIG 是否允许通过自然语言描述修改本页的配置,该功能具有一定的危险性
353
 
354
 
355
  插件在线服务配置依赖关系示意图
@@ -361,10 +251,7 @@ NUM_CUSTOM_BASIC_BTN = 4
361
  │ ├── ALIYUN_ACCESSKEY
362
  │ └── ALIYUN_SECRET
363
 
364
- └── PDF文档精准解析
365
- ├── GROBID_URLS
366
- ├── MATHPIX_APPID
367
- └── MATHPIX_APPKEY
368
-
369
 
370
  """
 
2
  以下所有配置也都支持利用环境变量覆写,环境变量配置格式见docker-compose.yml。
3
  读取优先级:环境变量 > config_private.py > config.py
4
  --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
5
+ All the following configurations also support using environment variables to override,
6
+ and the environment variable configuration format can be seen in docker-compose.yml.
7
  Configuration reading priority: environment variable > config_private.py > config.py
8
  """
9
 
 
19
  USE_PROXY = False
20
  if USE_PROXY:
21
  """
 
22
  填写格式是 [协议]:// [地址] :[端口],填写之前不要忘记把USE_PROXY改成True,如果直接在海外服务器部署,此处不修改
23
  <配置教程&视频教程> https://github.com/binary-husky/gpt_academic/issues/1>
24
  [协议] 常见协议无非socks5h/http; 例如 v2**y 和 ss* 的默认本地协议是socks5h; 而cl**h 的默认本地协议是http
25
+ [地址] 懂的都懂,不懂就填localhost或者127.0.0.1肯定错不了(localhost意思是代理软件安装在本机上)
26
  [端口] 在代理软件的设置里找。虽然不同的代理软件界面不一样,但端口号都应该在最显眼的位置上
27
  """
28
+ # 代理网络的地址,打开你的*学*网软件查看代理的协议(socks5h / http)、地址(localhost)和端口(11284)
29
  proxies = {
30
  # [协议]:// [地址] :[端口]
31
  "http": "socks5h://localhost:11284", # 再例如 "http": "http://127.0.0.1:7890",
 
37
  # ------------------------------------ 以下配置可以优化体验, 但大部分场合下并不需要修改 ------------------------------------
38
 
39
  # 重新URL重新定向,实现更换API_URL的作用(高危设置! 常规情况下不要修改! 通过修改此设置,您将把您的API-KEY和对话隐私完全暴露给您设定的中间人!)
40
+ # 格式: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "在这里填写重定向的api.openai.com的URL"}
41
  # 举例: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "https://reverse-proxy-url/v1/chat/completions"}
42
  API_URL_REDIRECT = {}
43
 
 
50
  # 色彩主题, 可选 ["Default", "Chuanhu-Small-and-Beautiful", "High-Contrast"]
51
  # 更多主题, 请查阅Gradio主题商店: https://huggingface.co/spaces/gradio/theme-gallery 可选 ["Gstaff/Xkcd", "NoCrypt/Miku", ...]
52
  THEME = "Chuanhu-Small-and-Beautiful"
 
 
 
 
 
53
 
54
 
55
  # 对话窗的高度 (仅在LAYOUT="TOP-DOWN"时生效)
 
62
 
63
  # 窗口布局
64
  LAYOUT = "LEFT-RIGHT" # "LEFT-RIGHT"(左右布局) # "TOP-DOWN"(上下布局)
65
+ DARK_MODE = True # 暗色模式 / 亮色模式
 
 
 
66
 
67
 
68
  # 发送请求到OpenAI后,等待多久判定为超时
 
81
  AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "api2d-gpt-3.5-turbo", "spark", "azure-gpt-3.5"]
82
 
83
  # 插件分类默认选项
84
+ DEFAULT_FN_GROUPS = ['对话', '编程', '学术']
85
 
86
 
87
  # 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 )
88
+ LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
89
+ AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "api2d-gpt-3.5-turbo",
90
+ "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "stack-claude"]
91
+ # P.S. 其他可用的模型还包括 ["qianfan", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613",
92
+ # "spark", "sparkv2", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
 
95
  # 百度千帆(LLM_MODEL="qianfan")
96
  BAIDU_CLOUD_API_KEY = ''
97
  BAIDU_CLOUD_SECRET_KEY = ''
98
+ BAIDU_CLOUD_QIANFAN_MODEL = 'ERNIE-Bot' # 可选 "ERNIE-Bot"(文心一言), "ERNIE-Bot-turbo", "BLOOMZ-7B", "Llama-2-70B-Chat", "Llama-2-13B-Chat", "Llama-2-7B-Chat"
99
 
100
 
101
  # 如果使用ChatGLM2微调模型,请把 LLM_MODEL="chatglmft",并在此处指定模型路径
 
106
  LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
107
  LOCAL_MODEL_QUANT = "FP16" # 默认 "FP16" "INT4" 启用量化INT4版本 "INT8" 启用量化INT8版本
108
 
109
+
110
  # 设置gradio的并行线程数(不需要修改)
111
  CONCURRENT_COUNT = 100
112
 
 
116
 
117
 
118
  # 加一个live2d装饰
119
+ ADD_WAIFU = False
120
 
121
 
122
  # 设置用户名和密码(不需要修改)(相关功能不稳定,与gradio版本和网络都相关,如果本地使用不建议加这个)
 
128
  CUSTOM_PATH = "/"
129
 
130
 
 
 
 
 
 
131
  # 极少数情况下,openai的官方KEY需要伴随组织编码(格式如org-xxxxxxxxxxxxxxxxxxxxxxxx)使用
132
  API_ORG = ""
133
 
134
 
135
+ # 如果需要使用Slack Claude,使用教程详情见 request_llm/README.md
136
+ SLACK_CLAUDE_BOT_ID = ''
137
  SLACK_CLAUDE_USER_TOKEN = ''
138
 
139
 
140
+ # 如果需要使用AZURE 详情请见额外文档 docs\use_azure.md
141
  AZURE_ENDPOINT = "https://你亲手写的api名称.openai.azure.com/"
142
  AZURE_API_KEY = "填入azure openai api的密钥" # 建议直接在API_KEY处填写,该选项即将被弃用
143
  AZURE_ENGINE = "填入你亲手写的部署名" # 读 docs\use_azure.md
144
 
145
 
146
+ # 使用Newbing
 
 
 
 
147
  NEWBING_STYLE = "creative" # ["creative", "balanced", "precise"]
148
  NEWBING_COOKIES = """
149
  put your new bing cookies here
 
164
  XFYUN_API_KEY = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
165
 
166
 
 
 
 
 
 
 
 
 
 
 
 
167
  # Claude API KEY
168
  ANTHROPIC_API_KEY = ""
169
 
170
 
 
 
 
 
 
171
  # 自定义API KEY格式
172
  CUSTOM_API_KEY_PATTERN = ""
173
 
174
 
 
 
 
 
175
  # HUGGINGFACE的TOKEN,下载LLAMA时起作用 https://huggingface.co/docs/hub/security-tokens
176
+ HUGGINGFACE_ACCESS_TOKEN = "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"
177
 
178
 
179
  # GROBID服务器地址(填写多个可以均衡负载),用于高质量地读取PDF文档
180
  # 获取方法:复制以下空间https://huggingface.co/spaces/qingxu98/grobid,设为public,然后GROBID_URL = "https://(你的hf用户名如qingxu98)-(你的填写的空间名如grobid).hf.space"
181
  GROBID_URLS = [
182
  "https://qingxu98-grobid.hf.space","https://qingxu98-grobid2.hf.space","https://qingxu98-grobid3.hf.space",
183
+ "https://shaocongma-grobid.hf.space","https://FBR123-grobid.hf.space", "https://yeku-grobid.hf.space",
 
184
  ]
185
 
186
 
187
  # 是否允许通过自然语言描述修改本页的配置,该功能具有一定的危险性,默认关闭
188
  ALLOW_RESET_CONFIG = False
 
 
 
 
 
 
189
  # 临时的上传文件夹位置,请勿修改
190
  PATH_PRIVATE_UPLOAD = "private_upload"
 
 
191
  # 日志文件夹的位置,请勿修改
192
  PATH_LOGGING = "gpt_log"
193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  """
195
  在线大模型配置关联关系示意图
196
 
 
200
  │ ├── API_ORG(不常用)
201
  │ └── API_URL_REDIRECT(不常用)
202
 
203
+ ├── "azure-gpt-3.5" 等azure模型
204
  │ ├── API_KEY
205
  │ ├── AZURE_ENDPOINT
206
  │ ├── AZURE_API_KEY
207
  │ ├── AZURE_ENGINE
208
  │ └── API_URL_REDIRECT
209
 
 
 
 
210
  ├── "spark" 星火认知大模型 spark & sparkv2
211
  │ ├── XFYUN_APPID
212
  │ ├── XFYUN_API_SECRET
 
224
  │ ├── BAIDU_CLOUD_API_KEY
225
  │ └── BAIDU_CLOUD_SECRET_KEY
226
 
227
+ ├── "newbing" Newbing接口不再稳定,不推荐使用
 
 
 
 
 
 
 
 
 
228
  ├── NEWBING_STYLE
229
  └── NEWBING_COOKIES
230
 
231
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  用户图形界面布局依赖关系示意图
233
 
234
  ├── CHATBOT_HEIGHT 对话窗的高度
 
239
  ├── THEME 色彩主题
240
  ├── AUTO_CLEAR_TXT 是否在提交时自动清空输入框
241
  ├── ADD_WAIFU 加一个live2d装饰
242
+ ├── ALLOW_RESET_CONFIG 是否允许通过自然语言描述修改本页的配置,该功能具有一定的危险性
243
 
244
 
245
  插件在线服务配置依赖关系示意图
 
251
  │ ├── ALIYUN_ACCESSKEY
252
  │ └── ALIYUN_SECRET
253
 
254
+ ├── PDF文档精准解析
255
+ │ └── GROBID_URLS
 
 
 
256
 
257
  """
core_functional.py CHANGED
@@ -3,143 +3,83 @@
3
  # 'stop' 颜色对应 theme.py 中的 color_er
4
  import importlib
5
  from toolbox import clear_line_break
6
- from toolbox import apply_gpt_academic_string_mask_langbased
7
- from toolbox import build_gpt_academic_masked_string_langbased
8
- from textwrap import dedent
9
 
10
  def get_core_functions():
11
  return {
12
-
13
- "学术语料润色": {
14
- # [1*] 前缀字符串,会被加在你的输入之前。例如,用来描述你的要求,例如翻译、解释代码、润色等等。
15
- # 这里填一个提示词字符串就行了,这里为了区分中英文情景搞复杂了一点
16
- "Prefix": build_gpt_academic_masked_string_langbased(
17
- text_show_english=
18
- r"Below is a paragraph from an academic paper. Polish the writing to meet the academic style, "
19
- r"improve the spelling, grammar, clarity, concision and overall readability. When necessary, rewrite the whole sentence. "
20
- r"Firstly, you should provide the polished paragraph. "
21
- r"Secondly, you should list all your modification and explain the reasons to do so in markdown table.",
22
- text_show_chinese=
23
- r"作为一名中文学术论文写作改进助理,你的任务是改进所提供文本的拼写、语法、清晰、简洁和整体可读性,"
24
- r"同时分解长句,减少重复,并提供改进建议。请先提供文本的更正版本,然后在markdown表格中列出修改的内容,并给出修改的理由:"
25
- ) + "\n\n",
26
- # [2*] 后缀字符串,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来
27
  "Suffix": r"",
28
- # [3] 按钮颜色 (可选参数,默认 secondary)
29
  "Color": r"secondary",
30
- # [4] 按钮是否可见 (可选参数,默认 True,即可见)
31
  "Visible": True,
32
- # [5] 是否在触发时清除历史 (可选参数,默认 False,即不处理之前的对话历史)
33
- "AutoClearHistory": False,
34
- # [6] 文本预处理 (可选参数,默认 None,举例:写个函数移除所有的换行符)
35
- "PreProcess": None,
36
  },
37
-
38
-
39
- "总结绘制脑图": {
40
- # 前缀,会被加在你的输入之前。例如,用来描述你的要求,例如翻译、解释代码、润色等等
41
- "Prefix": r"",
42
- # 后缀,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来
43
- "Suffix":
44
- # dedent() 函数用于去除多行字符串的缩进
45
- dedent("\n"+r'''
46
- ==============================
47
-
48
- 使用mermaid flowchart对以上文本进行总结,概括上述段落的内容以及内在逻辑关系,例如:
49
-
50
- 以下是对以上文本的总结,以mermaid flowchart的形式展示:
51
- ```mermaid
52
- flowchart LR
53
- A["节点名1"] --> B("节点名2")
54
- B --> C{"节点名3"}
55
- C --> D["节点名4"]
56
- C --> |"箭头名1"| E["节点名5"]
57
- C --> |"箭头名2"| F["节点名6"]
58
- ```
59
-
60
- 警告:
61
- (1)使用中文
62
- (2)节点名字使用引号包裹,如["Laptop"]
63
- (3)`|` 和 `"`之间不要存在空格
64
- (4)根据情况选择flowchart LR(从左到右)或者flowchart TD(从上到下)
65
- '''),
66
  },
67
-
68
-
69
  "查找语法错误": {
70
- "Prefix": r"Help me ensure that the grammar and the spelling is correct. "
71
- r"Do not try to polish the text, if no mistake is found, tell me that this paragraph is good. "
72
- r"If you find grammar or spelling mistakes, please list mistakes you find in a two-column markdown table, "
73
- r"put the original text the first column, "
74
- r"put the corrected text in the second column and highlight the key words you fixed. "
75
- r"Finally, please provide the proofreaded text.""\n\n"
76
  r"Example:""\n"
77
  r"Paragraph: How is you? Do you knows what is it?""\n"
78
  r"| Original sentence | Corrected sentence |""\n"
79
  r"| :--- | :--- |""\n"
80
  r"| How **is** you? | How **are** you? |""\n"
81
- r"| Do you **knows** what **is** **it**? | Do you **know** what **it** **is** ? |""\n\n"
82
  r"Below is a paragraph from an academic paper. "
83
  r"You need to report all grammar and spelling mistakes as the example before."
84
  + "\n\n",
85
  "Suffix": r"",
86
  "PreProcess": clear_line_break, # 预处理:清除换行符
87
  },
88
-
89
-
90
  "中译英": {
91
  "Prefix": r"Please translate following sentence to English:" + "\n\n",
92
  "Suffix": r"",
93
  },
94
-
95
-
96
- "学术英中互译": {
97
- "Prefix": build_gpt_academic_masked_string_langbased(
98
- text_show_chinese=
99
- r"I want you to act as a scientific English-Chinese translator, "
100
- r"I will provide you with some paragraphs in one language "
101
- r"and your task is to accurately and academically translate the paragraphs only into the other language. "
102
- r"Do not repeat the original provided paragraphs after translation. "
103
- r"You should use artificial intelligence tools, "
104
- r"such as natural language processing, and rhetorical knowledge "
105
- r"and experience about effective writing techniques to reply. "
106
- r"I'll give you my paragraphs as follows, tell me what language it is written in, and then translate:",
107
- text_show_english=
108
- r"你是经验丰富的翻译,请把以下学术文章段落翻译成中文,"
109
- r"并同时充分考虑中文的语法、清晰、简洁和整体可读性,"
110
- r"必要时,你可以修改整个句子的顺序以确保翻译后的段落符合中文的语言习惯。"
111
- r"你需要翻译的文本如下:"
112
- ) + "\n\n",
113
- "Suffix": r"",
114
  },
115
-
116
-
117
  "英译中": {
118
  "Prefix": r"翻译成地道的中文:" + "\n\n",
119
  "Suffix": r"",
120
- "Visible": False,
121
  },
122
-
123
-
124
  "找图片": {
125
- "Prefix": r"我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL,"
126
  r"然后请使用Markdown格式封装,并且不要有反斜线,不要用代码块。现在,请按以下描述给我发送图片:" + "\n\n",
127
  "Suffix": r"",
128
- "Visible": False,
129
  },
130
-
131
-
132
  "解释代码": {
133
  "Prefix": r"请解释以下代码:" + "\n```\n",
134
  "Suffix": "\n```\n",
135
  },
136
-
137
-
138
  "参考文献转Bib": {
139
- "Prefix": r"Here are some bibliography items, please transform them into bibtex style."
140
- r"Note that, reference styles maybe more than one kind, you should transform each item correctly."
141
- r"Items need to be transformed:" + "\n\n",
142
- "Visible": False,
143
  "Suffix": r"",
144
  }
145
  }
@@ -149,25 +89,8 @@ def handle_core_functionality(additional_fn, inputs, history, chatbot):
149
  import core_functional
150
  importlib.reload(core_functional) # 热更新prompt
151
  core_functional = core_functional.get_core_functions()
152
- addition = chatbot._cookies['customize_fn_overwrite']
153
- if additional_fn in addition:
154
- # 自定义功能
155
- inputs = addition[additional_fn]["Prefix"] + inputs + addition[additional_fn]["Suffix"]
156
- return inputs, history
157
- else:
158
- # 预制功能
159
- if "PreProcess" in core_functional[additional_fn]:
160
- if core_functional[additional_fn]["PreProcess"] is not None:
161
- inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
162
- # 为字符串加上上面定义的前缀和后缀。
163
- inputs = apply_gpt_academic_string_mask_langbased(
164
- string = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"],
165
- lang_reference = inputs,
166
- )
167
- if core_functional[additional_fn].get("AutoClearHistory", False):
168
- history = []
169
- return inputs, history
170
-
171
- if __name__ == "__main__":
172
- t = get_core_functions()["总结绘制脑图"]
173
- print(t["Prefix"] + t["Suffix"])
 
3
  # 'stop' 颜色对应 theme.py 中的 color_er
4
  import importlib
5
  from toolbox import clear_line_break
6
+
 
 
7
 
8
  def get_core_functions():
9
  return {
10
+ "英语学术润色": {
11
+ # 前缀,会被加在你的输入之前。例如,用来描述你的要求,例如翻译、解释代码、润色等等
12
+ "Prefix": r"Below is a paragraph from an academic paper. Polish the writing to meet the academic style, " +
13
+ r"improve the spelling, grammar, clarity, concision and overall readability. When necessary, rewrite the whole sentence. " +
14
+ r"Furthermore, list all modification and explain the reasons to do so in markdown table." + "\n\n",
15
+ # 后缀,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来
 
 
 
 
 
 
 
 
 
16
  "Suffix": r"",
17
+ # 按钮颜色 (默认 secondary)
18
  "Color": r"secondary",
19
+ # 按钮是否可见 (默认 True,即可见)
20
  "Visible": True,
21
+ # 是否在触发时清除历史 (默认 False,即不处理之前的对话历史)
22
+ "AutoClearHistory": False
 
 
23
  },
24
+ "中文学术润色": {
25
+ "Prefix": r"作为一名中文学术论文写作改进助理,你的任务是改进所提供文本的拼写、语法、清晰、简洁和整体可读性," +
26
+ r"同时分解长句,减少重复,并提供改进建议。请只提供文本的更正版本,避免包括解释。请编辑以下文本" + "\n\n",
27
+ "Suffix": r"",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  },
 
 
29
  "查找语法错误": {
30
+ "Prefix": r"Can you help me ensure that the grammar and the spelling is correct? " +
31
+ r"Do not try to polish the text, if no mistake is found, tell me that this paragraph is good." +
32
+ r"If you find grammar or spelling mistakes, please list mistakes you find in a two-column markdown table, " +
33
+ r"put the original text the first column, " +
34
+ r"put the corrected text in the second column and highlight the key words you fixed.""\n"
 
35
  r"Example:""\n"
36
  r"Paragraph: How is you? Do you knows what is it?""\n"
37
  r"| Original sentence | Corrected sentence |""\n"
38
  r"| :--- | :--- |""\n"
39
  r"| How **is** you? | How **are** you? |""\n"
40
+ r"| Do you **knows** what **is** **it**? | Do you **know** what **it** **is** ? |""\n"
41
  r"Below is a paragraph from an academic paper. "
42
  r"You need to report all grammar and spelling mistakes as the example before."
43
  + "\n\n",
44
  "Suffix": r"",
45
  "PreProcess": clear_line_break, # 预处理:清除换行符
46
  },
 
 
47
  "中译英": {
48
  "Prefix": r"Please translate following sentence to English:" + "\n\n",
49
  "Suffix": r"",
50
  },
51
+ "学术中英互译": {
52
+ "Prefix": r"I want you to act as a scientific English-Chinese translator, " +
53
+ r"I will provide you with some paragraphs in one language " +
54
+ r"and your task is to accurately and academically translate the paragraphs only into the other language. " +
55
+ r"Do not repeat the original provided paragraphs after translation. " +
56
+ r"You should use artificial intelligence tools, " +
57
+ r"such as natural language processing, and rhetorical knowledge " +
58
+ r"and experience about effective writing techniques to reply. " +
59
+ r"I'll give you my paragraphs as follows, tell me what language it is written in, and then translate:" + "\n\n",
60
+ "Suffix": "",
61
+ "Color": "secondary",
 
 
 
 
 
 
 
 
 
62
  },
 
 
63
  "英译中": {
64
  "Prefix": r"翻译成地道的中文:" + "\n\n",
65
  "Suffix": r"",
66
+ "Visible": False,
67
  },
 
 
68
  "找图片": {
69
+ "Prefix": r"我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL," +
70
  r"然后请使用Markdown格式封装,并且不要有反斜线,不要用代码块。现在,请按以下描述给我发送图片:" + "\n\n",
71
  "Suffix": r"",
72
+ "Visible": False,
73
  },
 
 
74
  "解释代码": {
75
  "Prefix": r"请解释以下代码:" + "\n```\n",
76
  "Suffix": "\n```\n",
77
  },
 
 
78
  "参考文献转Bib": {
79
+ "Prefix": r"Here are some bibliography items, please transform them into bibtex style." +
80
+ r"Note that, reference styles maybe more than one kind, you should transform each item correctly." +
81
+ r"Items need to be transformed:",
82
+ "Visible": False,
83
  "Suffix": r"",
84
  }
85
  }
 
89
  import core_functional
90
  importlib.reload(core_functional) # 热更新prompt
91
  core_functional = core_functional.get_core_functions()
92
+ if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
93
+ inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
94
+ if core_functional[additional_fn].get("AutoClearHistory", False):
95
+ history = []
96
+ return inputs, history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functional.py CHANGED
@@ -1,5 +1,4 @@
1
  from toolbox import HotReload # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效
2
- from toolbox import trimmed_format_exc
3
 
4
 
5
  def get_crazy_functions():
@@ -7,7 +6,6 @@ def get_crazy_functions():
7
  from crazy_functions.生成函数注释 import 批量生成函数注释
8
  from crazy_functions.解析项目源代码 import 解析项目本身
9
  from crazy_functions.解析项目源代码 import 解析一个Python项目
10
- from crazy_functions.解析项目源代码 import 解析一个Matlab项目
11
  from crazy_functions.解析项目源代码 import 解析一个C项目的头文件
12
  from crazy_functions.解析项目源代码 import 解析一个C项目
13
  from crazy_functions.解析项目源代码 import 解析一个Golang项目
@@ -32,122 +30,108 @@ def get_crazy_functions():
32
  from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入
33
  from crazy_functions.Latex全文润色 import Latex中文润色
34
  from crazy_functions.Latex全文润色 import Latex英文纠错
 
 
35
  from crazy_functions.批量Markdown翻译 import Markdown中译英
36
  from crazy_functions.虚空终端 import 虚空终端
37
- from crazy_functions.生成多种Mermaid图表 import 生成多种Mermaid图表
38
 
39
  function_plugins = {
40
  "虚空终端": {
41
- "Group": "对话|编程|学术|智能体",
42
  "Color": "stop",
43
  "AsButton": True,
44
- "Function": HotReload(虚空终端),
45
  },
46
  "解析整个Python项目": {
47
  "Group": "编程",
48
  "Color": "stop",
49
  "AsButton": True,
50
  "Info": "解析一个Python项目的所有源文件(.py) | 输入参数为路径",
51
- "Function": HotReload(解析一个Python项目),
52
  },
53
  "载入对话历史存档(先上传存档或输入路径)": {
54
  "Group": "对话",
55
  "Color": "stop",
56
  "AsButton": False,
57
  "Info": "载入对话历史存档 | 输入参数为路径",
58
- "Function": HotReload(载入对话历史存档),
59
  },
60
  "删除所有本地对话历史记录(谨慎操作)": {
61
  "Group": "对话",
62
  "AsButton": False,
63
  "Info": "删除所有本地对话历史记录,谨慎操作 | 不需要输入参数",
64
- "Function": HotReload(删除所有本地对话历史记录),
65
  },
66
  "清除所有缓存文件(谨慎操作)": {
67
  "Group": "对话",
68
  "Color": "stop",
69
  "AsButton": False, # 加入下拉菜单中
70
  "Info": "清除所有缓存文件,谨慎操作 | 不需要输入参数",
71
- "Function": HotReload(清除缓存),
72
- },
73
- "生成多种Mermaid图表(从当前对话或路径(.pdf/.md/.docx)中生产图表)": {
74
- "Group": "对话",
75
- "Color": "stop",
76
- "AsButton": False,
77
- "Info" : "基于当前对话或文件生成多种Mermaid图表,图表类型由模型判断",
78
- "Function": HotReload(生成多种Mermaid图表),
79
- "AdvancedArgs": True,
80
- "ArgsReminder": "请输入图类型对应的数字,不输入则为模型自行判断:1-流程图,2-序列图,3-类图,4-饼图,5-甘特图,6-状态图,7-实体关系图,8-象限提示图,9-思维导图",
81
  },
82
  "批量总结Word文档": {
83
  "Group": "学术",
84
  "Color": "stop",
85
  "AsButton": True,
86
  "Info": "批量总结word文档 | 输入参数为路径",
87
- "Function": HotReload(总结word文档),
88
- },
89
- "解析整个Matlab项目": {
90
- "Group": "编程",
91
- "Color": "stop",
92
- "AsButton": False,
93
- "Info": "解析一个Matlab项目的所有源文件(.m) | 输入参数为路径",
94
- "Function": HotReload(解析一个Matlab项目),
95
  },
96
  "解析整个C++项目头文件": {
97
  "Group": "编程",
98
  "Color": "stop",
99
  "AsButton": False, # 加入下拉菜单中
100
  "Info": "解析一个C++项目的所有头文件(.h/.hpp) | 输入参数为路径",
101
- "Function": HotReload(解析一个C项目的头文件),
102
  },
103
  "解析整个C++项目(.cpp/.hpp/.c/.h)": {
104
  "Group": "编程",
105
  "Color": "stop",
106
  "AsButton": False, # 加入下拉菜单中
107
  "Info": "解析一个C++项目的所有源文件(.cpp/.hpp/.c/.h)| 输入参数为路径",
108
- "Function": HotReload(解析一个C项目),
109
  },
110
  "解析整个Go项目": {
111
  "Group": "编程",
112
  "Color": "stop",
113
  "AsButton": False, # 加入下拉菜单中
114
  "Info": "解析一个Go项目的所有源文件 | 输入参数为路径",
115
- "Function": HotReload(解析一个Golang项目),
116
  },
117
  "解析整个Rust项目": {
118
  "Group": "编程",
119
  "Color": "stop",
120
  "AsButton": False, # 加入下拉菜单中
121
  "Info": "解析一个Rust项目的所有源文件 | 输入参数为路径",
122
- "Function": HotReload(解析一个Rust项目),
123
  },
124
  "解析整个Java项目": {
125
  "Group": "编程",
126
  "Color": "stop",
127
  "AsButton": False, # 加入下拉菜单中
128
  "Info": "解析一个Java项目的所有源文件 | 输入参数为路径",
129
- "Function": HotReload(解析一个Java项目),
130
  },
131
  "解析整个前端项目(js,ts,css等)": {
132
  "Group": "编程",
133
  "Color": "stop",
134
  "AsButton": False, # 加入下拉菜单中
135
  "Info": "解析一个前端项目的所有源文件(js,ts,css等) | 输入参数为路径",
136
- "Function": HotReload(解析一个前端项目),
137
  },
138
  "解析整个Lua项目": {
139
  "Group": "编程",
140
  "Color": "stop",
141
  "AsButton": False, # 加入下拉菜单中
142
  "Info": "解析一个Lua项目的所有源文件 | 输入参数为路径",
143
- "Function": HotReload(解析一个Lua项目),
144
  },
145
  "解析整个CSharp项目": {
146
  "Group": "编程",
147
  "Color": "stop",
148
  "AsButton": False, # 加入下拉菜单中
149
  "Info": "解析一个CSharp项目的所有源文件 | 输入参数为路径",
150
- "Function": HotReload(解析一个CSharp项目),
151
  },
152
  "解析Jupyter Notebook文件": {
153
  "Group": "编程",
@@ -163,530 +147,384 @@ def get_crazy_functions():
163
  "Color": "stop",
164
  "AsButton": False,
165
  "Info": "读取Tex论文并写摘要 | 输入参数为路径",
166
- "Function": HotReload(读文章写摘要),
167
  },
168
  "翻译README或MD": {
169
  "Group": "编程",
170
  "Color": "stop",
171
  "AsButton": True,
172
  "Info": "将Markdown翻译为中文 | 输入参数为路径或URL",
173
- "Function": HotReload(Markdown英译中),
174
  },
175
  "翻译Markdown或README(支持Github链接)": {
176
  "Group": "编程",
177
  "Color": "stop",
178
  "AsButton": False,
179
  "Info": "将Markdown或README翻译为中文 | 输入参数为路径或URL",
180
- "Function": HotReload(Markdown英译中),
181
  },
182
  "批量生成函数注释": {
183
  "Group": "编程",
184
  "Color": "stop",
185
  "AsButton": False, # 加入下拉菜单中
186
  "Info": "批量生成函数的注释 | 输入参数为路径",
187
- "Function": HotReload(批量生成函数注释),
188
  },
189
  "保存当前的对话": {
190
  "Group": "对话",
191
  "AsButton": True,
192
  "Info": "保存当前的对话 | 不需要输入参数",
193
- "Function": HotReload(对话历史存档),
194
  },
195
  "[多线程Demo]解析此项目本身(源码自译解)": {
196
  "Group": "对话|编程",
197
  "AsButton": False, # 加入下拉菜单中
198
  "Info": "多线程解析并翻译此项目的源码 | 不需要输入参数",
199
- "Function": HotReload(解析项目本身),
200
  },
201
- "历史上的今天": {
202
  "Group": "对话",
203
  "AsButton": True,
204
- "Info": "查看历史上的今天事件 (这是一个面向开发者的插件Demo) | 不需要输入参数",
205
- "Function": HotReload(高阶功能模板函数),
206
  },
207
  "精准翻译PDF论文": {
208
  "Group": "学术",
209
  "Color": "stop",
210
- "AsButton": True,
211
  "Info": "精准翻译PDF论文为中文 | 输入参数为路径",
212
- "Function": HotReload(批量翻译PDF文档),
213
  },
214
  "询问多个GPT模型": {
215
  "Group": "对话",
216
  "Color": "stop",
217
  "AsButton": True,
218
- "Function": HotReload(同时问询),
219
  },
220
  "批量总结PDF文档": {
221
  "Group": "学术",
222
  "Color": "stop",
223
  "AsButton": False, # 加入下拉菜单中
224
  "Info": "批量总结PDF文档的内容 | 输入参数为路径",
225
- "Function": HotReload(批量总结PDF文档),
226
  },
227
  "谷歌学术检索助手(输入谷歌学术搜索页url)": {
228
  "Group": "学术",
229
  "Color": "stop",
230
  "AsButton": False, # 加入下拉菜单中
231
  "Info": "使用谷歌学术检索助手搜索指定URL的结果 | 输入参数为谷歌学术搜索页的URL",
232
- "Function": HotReload(谷歌检索小助手),
233
  },
234
  "理解PDF文档内容 (模仿ChatPDF)": {
235
  "Group": "学术",
236
  "Color": "stop",
237
  "AsButton": False, # 加入下拉菜单中
238
  "Info": "理解PDF文档的内容并进行回答 | 输入参数为路径",
239
- "Function": HotReload(理解PDF文档内容标准文件输入),
240
  },
241
  "英文Latex项目全文润色(输入路径或上传压缩包)": {
242
  "Group": "学术",
243
  "Color": "stop",
244
  "AsButton": False, # 加入下拉菜单中
245
  "Info": "对英文Latex项目全文进行润色处理 | 输入参数为路径或上传压缩包",
246
- "Function": HotReload(Latex英文润色),
 
 
 
 
 
 
 
247
  },
248
-
249
  "中文Latex项目全文润色(输入路径或上传压缩包)": {
250
  "Group": "学术",
251
  "Color": "stop",
252
  "AsButton": False, # 加入下拉菜单中
253
  "Info": "对中文Latex项目全文进行润色处理 | 输入参数为路径或上传压缩包",
254
- "Function": HotReload(Latex中文润色),
255
- },
256
- # 已经被新插件取代
257
- # "英文Latex项目全文纠错(输入路径或上传压缩包)": {
258
- # "Group": "学术",
259
- # "Color": "stop",
260
- # "AsButton": False, # 加入下拉菜单中
261
- # "Info": "对英文Latex项目全文进行纠错处理 | 输入参数为路径或上传压缩包",
262
- # "Function": HotReload(Latex英文纠错),
263
- # },
264
- # 已经被新插件取代
265
- # "Latex项目全文中译英(输入路径或上传压缩包)": {
266
- # "Group": "学术",
267
- # "Color": "stop",
268
- # "AsButton": False, # 加入下拉菜单中
269
- # "Info": "对Latex项目全文进行中译英处理 | 输入参数为路径或上传压缩包",
270
- # "Function": HotReload(Latex中译英)
271
- # },
272
- # 已经被新插件取代
273
- # "Latex项目全文英译中(输入路径或上传压缩包)": {
274
- # "Group": "学术",
275
- # "Color": "stop",
276
- # "AsButton": False, # 加入下拉菜单中
277
- # "Info": "对Latex项目全文进行英译中处理 | 输入参数为路径或上传压缩包",
278
- # "Function": HotReload(Latex英译中)
279
- # },
280
  "批量Markdown中译英(输入路径或上传压缩包)": {
281
  "Group": "编程",
282
  "Color": "stop",
283
  "AsButton": False, # 加入下拉菜单中
284
  "Info": "批量将Markdown文件中文翻译为英文 | 输入参数为路径或上传压缩包",
285
- "Function": HotReload(Markdown中译英),
286
  },
287
  }
288
 
289
  # -=--=- 尚未充分测试的实验性插件 & 需要额外依赖的插件 -=--=-
290
  try:
291
  from crazy_functions.下载arxiv论文翻译摘要 import 下载arxiv论文并翻译摘要
292
-
293
- function_plugins.update(
294
- {
295
- "一键下载arxiv论文并翻译摘要(先在input输入编号,如1812.10695)": {
296
- "Group": "学术",
297
- "Color": "stop",
298
- "AsButton": False, # 加入下拉菜单中
299
- # "Info": "下载arxiv论文并翻译摘要 | 输入参数为arxiv编号如1812.10695",
300
- "Function": HotReload(下载arxiv论文并翻译摘要),
301
- }
302
  }
303
- )
304
  except:
305
- print(trimmed_format_exc())
306
- print("Load function plugin failed")
307
 
308
  try:
309
  from crazy_functions.联网的ChatGPT import 连接网络回答问题
310
-
311
- function_plugins.update(
312
- {
313
- "连接网络回答问题(输入问题后点击该插件,需要访问谷歌)": {
314
- "Group": "对话",
315
- "Color": "stop",
316
- "AsButton": False, # 加入下拉菜单中
317
- # "Info": "连接网络回答问题(需要访问谷歌)| 输入参数是一个问题",
318
- "Function": HotReload(连接网络回答问题),
319
- }
320
  }
321
- )
322
  from crazy_functions.联网的ChatGPT_bing版 import 连接bing搜索回答问题
323
-
324
- function_plugins.update(
325
- {
326
- "连接网络回答问题(中文Bing版,输入问题后点击该插件)": {
327
- "Group": "对话",
328
- "Color": "stop",
329
- "AsButton": False, # 加入下拉菜单中
330
- "Info": "连接网络回答问题(需要访问中文Bing)| 输入参数是一个问题",
331
- "Function": HotReload(连接bing搜索回答问题),
332
- }
333
  }
334
- )
335
  except:
336
- print(trimmed_format_exc())
337
- print("Load function plugin failed")
338
 
339
  try:
340
  from crazy_functions.解析项目源代码 import 解析任意code项目
341
-
342
- function_plugins.update(
343
- {
344
- "解析项目源代码(手动指定和筛选源代码文件类型)": {
345
- "Group": "编程",
346
- "Color": "stop",
347
- "AsButton": False,
348
- "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
349
- "ArgsReminder": '输入时用逗号隔开, *代表通配符, 加了^代表不匹配; 不输入代表全部匹配。例如: "*.c, ^*.cpp, config.toml, ^*.toml"', # 高级参数输入区的显示提示
350
- "Function": HotReload(解析任意code项目),
351
- },
352
- }
353
- )
354
  except:
355
- print(trimmed_format_exc())
356
- print("Load function plugin failed")
357
 
358
  try:
359
  from crazy_functions.询问多个大语言模型 import 同时问询_指定模型
360
-
361
- function_plugins.update(
362
- {
363
- "询问多个GPT模型(手动指定询问哪些模型)": {
364
- "Group": "对话",
365
- "Color": "stop",
366
- "AsButton": False,
367
- "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
368
- "ArgsReminder": "支持任意数量的llm接口,用&符号分隔。例如chatglm&gpt-3.5-turbo&gpt-4", # 高级参数输入区的显示提示
369
- "Function": HotReload(同时问询_指定模型),
370
- },
371
- }
372
- )
373
  except:
374
- print(trimmed_format_exc())
375
- print("Load function plugin failed")
376
 
377
  try:
378
- from crazy_functions.图片生成 import 图片生成_DALLE2, 图片生成_DALLE3, 图片修改_DALLE2
379
-
380
- function_plugins.update(
381
- {
382
- "图片生成_DALLE2 (先切换模型到gpt-*)": {
383
- "Group": "对话",
384
- "Color": "stop",
385
- "AsButton": False,
386
- "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
387
- "ArgsReminder": "在这里输入分辨率, 如1024x1024(默认),支持 256x256, 512x512, 1024x1024", # 高级参数输入区的显示提示
388
- "Info": "使用DALLE2生成图片 | 输入参数字符串,提供图像的内容",
389
- "Function": HotReload(图片生成_DALLE2),
390
- },
391
- }
392
- )
393
- function_plugins.update(
394
- {
395
- "图片生成_DALLE3 (先切换模型到gpt-*)": {
396
- "Group": "对话",
397
- "Color": "stop",
398
- "AsButton": False,
399
- "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
400
- "ArgsReminder": "在这里输入自定义参数「分辨率-质量(可选)-风格(可选)」, 参数示例「1024x1024-hd-vivid」 || 分辨率支持 「1024x1024」(默认) /「1792x1024」/「1024x1792」 || 质量支持 「-standard」(默认) /「-hd」 || 风格支持 「-vivid」(默认) /「-natural」", # 高级参数输入区的显示提示
401
- "Info": "使用DALLE3生成图片 | 输入参数字符串,提供图像的内容",
402
- "Function": HotReload(图片生成_DALLE3),
403
- },
404
- }
405
- )
406
- function_plugins.update(
407
- {
408
- "图片修改_DALLE2 (先切换模型到gpt-*)": {
409
- "Group": "对话",
410
- "Color": "stop",
411
- "AsButton": False,
412
- "AdvancedArgs": False, # 调用时,唤起高级参数输入区(默认False)
413
- # "Info": "使用DALLE2修改图片 | 输入参数字符串,提供图像的内容",
414
- "Function": HotReload(图片修改_DALLE2),
415
- },
416
- }
417
- )
418
  except:
419
- print(trimmed_format_exc())
420
- print("Load function plugin failed")
421
 
422
  try:
423
  from crazy_functions.总结音视频 import 总结音视频
424
-
425
- function_plugins.update(
426
- {
427
- "批量总结音视频(输入路径或上传压缩包)": {
428
- "Group": "对话",
429
- "Color": "stop",
430
- "AsButton": False,
431
- "AdvancedArgs": True,
432
- "ArgsReminder": "调用openai api 使用whisper-1模型, 目前支持的格式:mp4, m4a, wav, mpga, mpeg, mp3。此处可以输入解析提示,例如:解析为简体中文(默认)。",
433
- "Info": "批量总结音频或视频 | 输入参数为路径",
434
- "Function": HotReload(总结音视频),
435
- }
436
  }
437
- )
438
  except:
439
- print(trimmed_format_exc())
440
- print("Load function plugin failed")
441
 
442
  try:
443
  from crazy_functions.数学动画生成manim import 动画生成
444
-
445
- function_plugins.update(
446
- {
447
- "数学动画生成(Manim)": {
448
- "Group": "对话",
449
- "Color": "stop",
450
- "AsButton": False,
451
- "Info": "按照自然语言描述生成一个动画 | 输入参数是一段话",
452
- "Function": HotReload(动画生成),
453
- }
454
  }
455
- )
456
  except:
457
- print(trimmed_format_exc())
458
- print("Load function plugin failed")
459
 
460
  try:
461
  from crazy_functions.批量Markdown翻译 import Markdown翻译指定语言
462
-
463
- function_plugins.update(
464
- {
465
- "Markdown翻译(指定翻译成何种语言)": {
466
- "Group": "编程",
467
- "Color": "stop",
468
- "AsButton": False,
469
- "AdvancedArgs": True,
470
- "ArgsReminder": "请输入要翻译成哪种语言,默认为Chinese。",
471
- "Function": HotReload(Markdown翻译指定语言),
472
- }
473
  }
474
- )
475
  except:
476
- print(trimmed_format_exc())
477
- print("Load function plugin failed")
478
 
479
  try:
480
- from crazy_functions.知识库问答 import 知识库文件注入
481
-
482
- function_plugins.update(
483
- {
484
- "构建知识库(先上传文件素材,再运行此插件)": {
485
- "Group": "对话",
486
- "Color": "stop",
487
- "AsButton": False,
488
- "AdvancedArgs": True,
489
- "ArgsReminder": "此处待注入的知识库名称id, 默认为default。文件进入知识库后可长期保存。可以通过再次调用本插件的方式,向知识库追加更多文档。",
490
- "Function": HotReload(知识库文件注入),
491
- }
492
  }
493
- )
494
  except:
495
- print(trimmed_format_exc())
496
- print("Load function plugin failed")
497
 
498
  try:
499
- from crazy_functions.知识库问答 import 读取知识库作答
500
-
501
- function_plugins.update(
502
- {
503
- "知识库文件注入(构建知识库后,再运行此插件)": {
504
- "Group": "对话",
505
- "Color": "stop",
506
- "AsButton": False,
507
- "AdvancedArgs": True,
508
- "ArgsReminder": "待提取的知识库名称id, 默认为default, 您需要构建知识库后再运行此插件。",
509
- "Function": HotReload(读取知识库作答),
510
- }
511
  }
512
- )
513
  except:
514
- print(trimmed_format_exc())
515
- print("Load function plugin failed")
516
 
517
  try:
518
  from crazy_functions.交互功能函数模板 import 交互功能模板函数
519
-
520
- function_plugins.update(
521
- {
522
- "交互功能模板Demo函数(查找wallhaven.cc的壁纸)": {
523
- "Group": "对话",
524
- "Color": "stop",
525
- "AsButton": False,
526
- "Function": HotReload(交互功能模板函数),
527
- }
528
  }
529
- )
530
  except:
531
- print(trimmed_format_exc())
532
- print("Load function plugin failed")
533
 
534
  try:
535
- from crazy_functions.Latex输出PDF import Latex英文纠错加PDF对比
536
- from crazy_functions.Latex输出PDF import Latex翻译中文并重新编译PDF
537
- from crazy_functions.Latex输出PDF import PDF翻译中文并重新编译PDF
538
-
539
- function_plugins.update(
540
- {
541
- "Latex英文纠错+高亮修正位置 [需Latex]": {
542
- "Group": "学术",
543
- "Color": "stop",
544
- "AsButton": False,
545
- "AdvancedArgs": True,
546
- "ArgsReminder": "如果有必要, 请在此处追加更细致的矫错指令(使用英文)。",
547
- "Function": HotReload(Latex英文纠错加PDF对比),
548
- },
549
- "Arxiv论文精细翻译(输入arxivID)[需Latex]": {
550
- "Group": "学术",
551
- "Color": "stop",
552
- "AsButton": False,
553
- "AdvancedArgs": True,
554
- "ArgsReminder": r"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "
555
- r"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: "
556
- r'If the term "agent" is used in this section, it should be translated to "智能体". ',
557
- "Info": "Arixv论文精细翻译 | 输入参数arxiv论文的ID,比如1812.10695",
558
- "Function": HotReload(Latex翻译中文并重新编译PDF),
559
- },
560
- "本地Latex论文精细翻译(上传Latex项目)[需Latex]": {
561
- "Group": "学术",
562
- "Color": "stop",
563
- "AsButton": False,
564
- "AdvancedArgs": True,
565
- "ArgsReminder": r"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "
566
- r"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: "
567
- r'If the term "agent" is used in this section, it should be translated to "智能体". ',
568
- "Info": "本地Latex论文精细翻译 | 输入参数是路径",
569
- "Function": HotReload(Latex翻译中文并重新编译PDF),
570
- },
571
- "PDF翻译中文并重新编译PDF(上传PDF)[需Latex]": {
572
- "Group": "学术",
573
- "Color": "stop",
574
- "AsButton": False,
575
- "AdvancedArgs": True,
576
- "ArgsReminder": r"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "
577
- r"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: "
578
- r'If the term "agent" is used in this section, it should be translated to "智能体". ',
579
- "Info": "PDF翻译中文,并重新编译PDF | 输入参数为路径",
580
- "Function": HotReload(PDF翻译中文并重新编译PDF)
581
- }
582
  }
583
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
584
  except:
585
- print(trimmed_format_exc())
586
- print("Load function plugin failed")
587
 
588
  try:
589
  from toolbox import get_conf
590
-
591
- ENABLE_AUDIO = get_conf("ENABLE_AUDIO")
592
  if ENABLE_AUDIO:
593
  from crazy_functions.语音助手 import 语音助手
594
-
595
- function_plugins.update(
596
- {
597
- "实时语音对话": {
598
- "Group": "对话",
599
- "Color": "stop",
600
- "AsButton": True,
601
- "Info": "这是一个时刻聆听着的语音对话助手 | 没有输入参数",
602
- "Function": HotReload(语音助手),
603
- }
604
- }
605
- )
606
- except:
607
- print(trimmed_format_exc())
608
- print("Load function plugin failed")
609
-
610
- try:
611
- from crazy_functions.批量翻译PDF文档_NOUGAT import 批量翻译PDF文档
612
-
613
- function_plugins.update(
614
- {
615
- "精准翻译PDF文档(NOUGAT)": {
616
- "Group": "学术",
617
- "Color": "stop",
618
- "AsButton": False,
619
- "Function": HotReload(批量翻译PDF文档),
620
- }
621
- }
622
- )
623
- except:
624
- print(trimmed_format_exc())
625
- print("Load function plugin failed")
626
-
627
- try:
628
- from crazy_functions.函数动态生成 import 函数动态生成
629
-
630
- function_plugins.update(
631
- {
632
- "动态代码解释器(CodeInterpreter)": {
633
- "Group": "智能体",
634
  "Color": "stop",
635
- "AsButton": False,
636
- "Function": HotReload(函数动态生成),
 
637
  }
638
- }
639
- )
640
  except:
641
- print(trimmed_format_exc())
642
- print("Load function plugin failed")
643
 
644
  try:
645
- from crazy_functions.多智能体 import 多智能体终端
646
-
647
- function_plugins.update(
648
- {
649
- "AutoGen多智能体终端(仅供测试)": {
650
- "Group": "智能体",
651
- "Color": "stop",
652
- "AsButton": False,
653
- "Function": HotReload(多智能体终端),
654
- }
655
  }
656
- )
657
  except:
658
- print(trimmed_format_exc())
659
- print("Load function plugin failed")
660
-
661
- try:
662
- from crazy_functions.互动小游戏 import 随机小游戏
663
 
664
- function_plugins.update(
665
- {
666
- "随机互动小游戏(仅供测试)": {
667
- "Group": "智能体",
668
- "Color": "stop",
669
- "AsButton": False,
670
- "Function": HotReload(随机小游戏),
671
- }
672
- }
673
- )
674
- except:
675
- print(trimmed_format_exc())
676
- print("Load function plugin failed")
677
 
678
  # try:
679
- # from crazy_functions.高级功能函数模板 import 测试图表渲染
680
  # function_plugins.update({
681
- # "绘制逻辑关系(测试图表渲染)": {
682
- # "Group": "智能体",
683
  # "Color": "stop",
684
- # "AsButton": True,
685
- # "Function": HotReload(测试图表渲染)
686
  # }
687
  # })
688
  # except:
689
- # print(trimmed_format_exc())
690
  # print('Load function plugin failed')
691
 
692
  # try:
@@ -703,6 +541,8 @@ def get_crazy_functions():
703
  # except:
704
  # print('Load function plugin failed')
705
 
 
 
706
  """
707
  设置默认值:
708
  - 默认 Group = 对话
@@ -712,12 +552,12 @@ def get_crazy_functions():
712
  """
713
  for name, function_meta in function_plugins.items():
714
  if "Group" not in function_meta:
715
- function_plugins[name]["Group"] = "对话"
716
  if "AsButton" not in function_meta:
717
  function_plugins[name]["AsButton"] = True
718
  if "AdvancedArgs" not in function_meta:
719
  function_plugins[name]["AdvancedArgs"] = False
720
  if "Color" not in function_meta:
721
- function_plugins[name]["Color"] = "secondary"
722
 
723
  return function_plugins
 
1
  from toolbox import HotReload # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效
 
2
 
3
 
4
  def get_crazy_functions():
 
6
  from crazy_functions.生成函数注释 import 批量生成函数注释
7
  from crazy_functions.解析项目源代码 import 解析项目本身
8
  from crazy_functions.解析项目源代码 import 解析一个Python项目
 
9
  from crazy_functions.解析项目源代码 import 解析一个C项目的头文件
10
  from crazy_functions.解析项目源代码 import 解析一个C项目
11
  from crazy_functions.解析项目源代码 import 解析一个Golang项目
 
30
  from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入
31
  from crazy_functions.Latex全文润色 import Latex中文润色
32
  from crazy_functions.Latex全文润色 import Latex英文纠错
33
+ from crazy_functions.Latex全文翻译 import Latex中译英
34
+ from crazy_functions.Latex全文翻译 import Latex英译中
35
  from crazy_functions.批量Markdown翻译 import Markdown中译英
36
  from crazy_functions.虚空终端 import 虚空终端
37
+
38
 
39
  function_plugins = {
40
  "虚空终端": {
41
+ "Group": "对话|编程|学术",
42
  "Color": "stop",
43
  "AsButton": True,
44
+ "Function": HotReload(虚空终端)
45
  },
46
  "解析整个Python项目": {
47
  "Group": "编程",
48
  "Color": "stop",
49
  "AsButton": True,
50
  "Info": "解析一个Python项目的所有源文件(.py) | 输入参数为路径",
51
+ "Function": HotReload(解析一个Python项目)
52
  },
53
  "载入对话历史存档(先上传存档或输入路径)": {
54
  "Group": "对话",
55
  "Color": "stop",
56
  "AsButton": False,
57
  "Info": "载入对话历史存档 | 输入参数为路径",
58
+ "Function": HotReload(载入对话历史存档)
59
  },
60
  "删除所有本地对话历史记录(谨慎操作)": {
61
  "Group": "对话",
62
  "AsButton": False,
63
  "Info": "删除所有本地对话历史记录,谨慎操作 | 不需要输入参数",
64
+ "Function": HotReload(删除所有本地对话历史记录)
65
  },
66
  "清除所有缓存文件(谨慎操作)": {
67
  "Group": "对话",
68
  "Color": "stop",
69
  "AsButton": False, # 加入下拉菜单中
70
  "Info": "清除所有缓存文件,谨慎操作 | 不需要输入参数",
71
+ "Function": HotReload(清除缓存)
 
 
 
 
 
 
 
 
 
72
  },
73
  "批量总结Word文档": {
74
  "Group": "学术",
75
  "Color": "stop",
76
  "AsButton": True,
77
  "Info": "批量总结word文档 | 输入参数为路径",
78
+ "Function": HotReload(总结word文档)
 
 
 
 
 
 
 
79
  },
80
  "解析整个C++项目头文件": {
81
  "Group": "编程",
82
  "Color": "stop",
83
  "AsButton": False, # 加入下拉菜单中
84
  "Info": "解析一个C++项目的所有头文件(.h/.hpp) | 输入参数为路径",
85
+ "Function": HotReload(解析一个C项目的头文件)
86
  },
87
  "解析整个C++项目(.cpp/.hpp/.c/.h)": {
88
  "Group": "编程",
89
  "Color": "stop",
90
  "AsButton": False, # 加入下拉菜单中
91
  "Info": "解析一个C++项目的所有源文件(.cpp/.hpp/.c/.h)| 输入参数为路径",
92
+ "Function": HotReload(解析一个C项目)
93
  },
94
  "解析整个Go项目": {
95
  "Group": "编程",
96
  "Color": "stop",
97
  "AsButton": False, # 加入下拉菜单中
98
  "Info": "解析一个Go项目的所有源文件 | 输入参数为路径",
99
+ "Function": HotReload(解析一个Golang项目)
100
  },
101
  "解析整个Rust项目": {
102
  "Group": "编程",
103
  "Color": "stop",
104
  "AsButton": False, # 加入下拉菜单中
105
  "Info": "解析一个Rust项目的所有源文件 | 输入参数为路径",
106
+ "Function": HotReload(解析一个Rust项目)
107
  },
108
  "解析整个Java项目": {
109
  "Group": "编程",
110
  "Color": "stop",
111
  "AsButton": False, # 加入下拉菜单中
112
  "Info": "解析一个Java项目的所有源文件 | 输入参数为路径",
113
+ "Function": HotReload(解析一个Java项目)
114
  },
115
  "解析整个前端项目(js,ts,css等)": {
116
  "Group": "编程",
117
  "Color": "stop",
118
  "AsButton": False, # 加入下拉菜单中
119
  "Info": "解析一个前端项目的所有源文件(js,ts,css等) | 输入参数为路径",
120
+ "Function": HotReload(解析一个前端项目)
121
  },
122
  "解析整个Lua项目": {
123
  "Group": "编程",
124
  "Color": "stop",
125
  "AsButton": False, # 加入下拉菜单中
126
  "Info": "解析一个Lua项目的所有源文件 | 输入参数为路径",
127
+ "Function": HotReload(解析一个Lua项目)
128
  },
129
  "解析整个CSharp项目": {
130
  "Group": "编程",
131
  "Color": "stop",
132
  "AsButton": False, # 加入下拉菜单中
133
  "Info": "解析一个CSharp项目的所有源文件 | 输入参数为路径",
134
+ "Function": HotReload(解析一个CSharp项目)
135
  },
136
  "解析Jupyter Notebook文件": {
137
  "Group": "编程",
 
147
  "Color": "stop",
148
  "AsButton": False,
149
  "Info": "读取Tex论文并写摘要 | 输入参数为路径",
150
+ "Function": HotReload(读文章写摘要)
151
  },
152
  "翻译README或MD": {
153
  "Group": "编程",
154
  "Color": "stop",
155
  "AsButton": True,
156
  "Info": "将Markdown翻译为中文 | 输入参数为路径或URL",
157
+ "Function": HotReload(Markdown英译中)
158
  },
159
  "翻译Markdown或README(支持Github链接)": {
160
  "Group": "编程",
161
  "Color": "stop",
162
  "AsButton": False,
163
  "Info": "将Markdown或README翻译为中文 | 输入参数为路径或URL",
164
+ "Function": HotReload(Markdown英译中)
165
  },
166
  "批量生成函数注释": {
167
  "Group": "编程",
168
  "Color": "stop",
169
  "AsButton": False, # 加入下拉菜单中
170
  "Info": "批量生成函数的注释 | 输入参数为路径",
171
+ "Function": HotReload(批量生成函数注释)
172
  },
173
  "保存当前的对话": {
174
  "Group": "对话",
175
  "AsButton": True,
176
  "Info": "保存当前的对话 | 不需要输入参数",
177
+ "Function": HotReload(对话历史存档)
178
  },
179
  "[多线程Demo]解析此项目本身(源码自译解)": {
180
  "Group": "对话|编程",
181
  "AsButton": False, # 加入下拉菜单中
182
  "Info": "多线程解析并翻译此项目的源码 | 不需要输入参数",
183
+ "Function": HotReload(解析项目本身)
184
  },
185
+ "[插件demo]历史上的今天": {
186
  "Group": "对话",
187
  "AsButton": True,
188
+ "Info": "查看历史上的今天事件 | 不需要输入参数",
189
+ "Function": HotReload(高阶功能模板���数)
190
  },
191
  "精准翻译PDF论文": {
192
  "Group": "学术",
193
  "Color": "stop",
194
+ "AsButton": True,
195
  "Info": "精准翻译PDF论文为中文 | 输入参数为路径",
196
+ "Function": HotReload(批量翻译PDF文档)
197
  },
198
  "询问多个GPT模型": {
199
  "Group": "对话",
200
  "Color": "stop",
201
  "AsButton": True,
202
+ "Function": HotReload(同时问询)
203
  },
204
  "批量总结PDF文档": {
205
  "Group": "学术",
206
  "Color": "stop",
207
  "AsButton": False, # 加入下拉菜单中
208
  "Info": "批量总结PDF文档的内容 | 输入参数为路径",
209
+ "Function": HotReload(批量总结PDF文档)
210
  },
211
  "谷歌学术检索助手(输入谷歌学术搜索页url)": {
212
  "Group": "学术",
213
  "Color": "stop",
214
  "AsButton": False, # 加入下拉菜单中
215
  "Info": "使用谷歌学术检索助手搜索指定URL的结果 | 输入参数为谷歌学术搜索页的URL",
216
+ "Function": HotReload(谷歌检索小助手)
217
  },
218
  "理解PDF文档内容 (模仿ChatPDF)": {
219
  "Group": "学术",
220
  "Color": "stop",
221
  "AsButton": False, # 加入下拉菜单中
222
  "Info": "理解PDF文档的内容并进行回答 | 输入参数为路径",
223
+ "Function": HotReload(理解PDF文档内容标准文件输入)
224
  },
225
  "英文Latex项目全文润色(输入路径或上传压缩包)": {
226
  "Group": "学术",
227
  "Color": "stop",
228
  "AsButton": False, # 加入下拉菜单中
229
  "Info": "对英文Latex项目全文进行润色处理 | 输入参数为路径或上传压缩包",
230
+ "Function": HotReload(Latex英文润色)
231
+ },
232
+ "英文Latex项目全文纠错(输入路径或上传压缩包)": {
233
+ "Group": "学术",
234
+ "Color": "stop",
235
+ "AsButton": False, # 加入下拉菜单中
236
+ "Info": "对英文Latex项目全文进行纠错处理 | 输入参数为路径或上传压缩包",
237
+ "Function": HotReload(Latex英文纠错)
238
  },
 
239
  "中文Latex项目全文润色(输入路径或上传压缩包)": {
240
  "Group": "学术",
241
  "Color": "stop",
242
  "AsButton": False, # 加入下拉菜单中
243
  "Info": "对中文Latex项目全文进行润色处理 | 输入参数为路径或上传压缩包",
244
+ "Function": HotReload(Latex中文润色)
245
+ },
246
+ "Latex项目全文中译英(输入路径或上传压缩包)": {
247
+ "Group": "学术",
248
+ "Color": "stop",
249
+ "AsButton": False, # 加入下拉菜单中
250
+ "Info": "对Latex项目全文进行中译英处理 | 输入参数为路径或上传压缩包",
251
+ "Function": HotReload(Latex中译英)
252
+ },
253
+ "Latex项目全文英译中(输入路径或上传压缩包)": {
254
+ "Group": "学术",
255
+ "Color": "stop",
256
+ "AsButton": False, # 加入下拉菜单中
257
+ "Info": "对Latex项目全文进行英译中处理 | 输入参数为路径或上传压缩包",
258
+ "Function": HotReload(Latex英译中)
259
+ },
 
 
 
 
 
 
 
 
 
 
260
  "批量Markdown中译英(输入路径或上传压缩包)": {
261
  "Group": "编程",
262
  "Color": "stop",
263
  "AsButton": False, # 加入下拉菜单中
264
  "Info": "批量将Markdown文件中文翻译为英文 | 输入参数为路径或上传压缩包",
265
+ "Function": HotReload(Markdown中译英)
266
  },
267
  }
268
 
269
  # -=--=- 尚未充分测试的实验性插件 & 需要额外依赖的插件 -=--=-
270
  try:
271
  from crazy_functions.下载arxiv论文翻译摘要 import 下载arxiv论文并翻译摘要
272
+ function_plugins.update({
273
+ "一键下载arxiv论文并翻译摘要(先在input输入编号,如1812.10695)": {
274
+ "Group": "学术",
275
+ "Color": "stop",
276
+ "AsButton": False, # 加入下拉菜单中
277
+ # "Info": "下载arxiv论文并翻译摘要 | 输入参数为arxiv编号如1812.10695",
278
+ "Function": HotReload(下载arxiv论文并翻译摘要)
 
 
 
279
  }
280
+ })
281
  except:
282
+ print('Load function plugin failed')
 
283
 
284
  try:
285
  from crazy_functions.联网的ChatGPT import 连接网络回答问题
286
+ function_plugins.update({
287
+ "连接网络回答问题(输入问题后点击该插件,需要访问谷歌)": {
288
+ "Group": "对话",
289
+ "Color": "stop",
290
+ "AsButton": False, # 加入下拉菜单中
291
+ # "Info": "连接网络回答问题(需要访问谷歌)| 输入参数是一个问题",
292
+ "Function": HotReload(连接网络回答问题)
 
 
 
293
  }
294
+ })
295
  from crazy_functions.联网的ChatGPT_bing版 import 连接bing搜索回答问题
296
+ function_plugins.update({
297
+ "连接网络回答问题(中文Bing版,输入问题后点击该插件)": {
298
+ "Group": "对话",
299
+ "Color": "stop",
300
+ "AsButton": False, # 加入下拉菜单中
301
+ "Info": "连接网络回答问题(需要访问中文Bing)| 输入参数是一个问题",
302
+ "Function": HotReload(连接bing搜索回答问题)
 
 
 
303
  }
304
+ })
305
  except:
306
+ print('Load function plugin failed')
 
307
 
308
  try:
309
  from crazy_functions.解析项目源代码 import 解析任意code项目
310
+ function_plugins.update({
311
+ "解析项目源代码(手动指定和筛选源代码文件类型)": {
312
+ "Group": "编程",
313
+ "Color": "stop",
314
+ "AsButton": False,
315
+ "AdvancedArgs": True, # 调用时,唤起���级参数输入区(默认False)
316
+ "ArgsReminder": "输入时用逗号隔开, *代表通配符, 加了^代表不匹配; 不输入代表全部匹配。例如: \"*.c, ^*.cpp, config.toml, ^*.toml\"", # 高级参数输入区的显示提示
317
+ "Function": HotReload(解析任意code项目)
318
+ },
319
+ })
 
 
 
320
  except:
321
+ print('Load function plugin failed')
 
322
 
323
  try:
324
  from crazy_functions.询问多个大语言模型 import 同时问询_指定模型
325
+ function_plugins.update({
326
+ "询问多个GPT模型(手动指定询问哪些模型)": {
327
+ "Group": "对话",
328
+ "Color": "stop",
329
+ "AsButton": False,
330
+ "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
331
+ "ArgsReminder": "支持任意数量的llm接口,用&符号分隔。例如chatglm&gpt-3.5-turbo&api2d-gpt-4", # 高级参数输入区的显示提示
332
+ "Function": HotReload(同时问询_指定模型)
333
+ },
334
+ })
 
 
 
335
  except:
336
+ print('Load function plugin failed')
 
337
 
338
  try:
339
+ from crazy_functions.图片生成 import 图片生成
340
+ function_plugins.update({
341
+ "图片生成(先切换模型到openai或api2d)": {
342
+ "Group": "对话",
343
+ "Color": "stop",
344
+ "AsButton": False,
345
+ "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
346
+ "ArgsReminder": "在这里输入分辨率, 如256x256(默认)", # 高级参数输入区的显示提示
347
+ "Info": "图片生成 | 输入参数字符串,提供图像的内容",
348
+ "Function": HotReload(图片生成)
349
+ },
350
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
  except:
352
+ print('Load function plugin failed')
 
353
 
354
  try:
355
  from crazy_functions.总结音视频 import 总结音视频
356
+ function_plugins.update({
357
+ "批量总结音视频(输入路径或上传压缩包)": {
358
+ "Group": "对话",
359
+ "Color": "stop",
360
+ "AsButton": False,
361
+ "AdvancedArgs": True,
362
+ "ArgsReminder": "调用openai api 使用whisper-1模型, 目前支持的格式:mp4, m4a, wav, mpga, mpeg, mp3。此处可以输入解析提示,例如:解析为简体中文(默认)。",
363
+ "Info": "批量总结音频或视频 | 输入参数为路径",
364
+ "Function": HotReload(总结音视频)
 
 
 
365
  }
366
+ })
367
  except:
368
+ print('Load function plugin failed')
 
369
 
370
  try:
371
  from crazy_functions.数学动画生成manim import 动画生成
372
+ function_plugins.update({
373
+ "数学动画生成(Manim)": {
374
+ "Group": "对话",
375
+ "Color": "stop",
376
+ "AsButton": False,
377
+ "Info": "按照自然语言描述生成一个动画 | 输入参数是一段话",
378
+ "Function": HotReload(动画生成)
 
 
 
379
  }
380
+ })
381
  except:
382
+ print('Load function plugin failed')
 
383
 
384
  try:
385
  from crazy_functions.批量Markdown翻译 import Markdown翻译指定语言
386
+ function_plugins.update({
387
+ "Markdown翻译(手动指定语言)": {
388
+ "Group": "编程",
389
+ "Color": "stop",
390
+ "AsButton": False,
391
+ "AdvancedArgs": True,
392
+ "ArgsReminder": "请输入要翻译成哪种语言,默认为Chinese。",
393
+ "Function": HotReload(Markdown翻译指定语言)
 
 
 
394
  }
395
+ })
396
  except:
397
+ print('Load function plugin failed')
 
398
 
399
  try:
400
+ from crazy_functions.Langchain知识库 import 知识库问答
401
+ function_plugins.update({
402
+ "构建知识库(先上传文件素材,再运行此插件)": {
403
+ "Group": "对话",
404
+ "Color": "stop",
405
+ "AsButton": False,
406
+ "AdvancedArgs": True,
407
+ "ArgsReminder": "此处待注入的知识库名称id, 默认为default。文件进入知识库后可长期保存。可以通过再次调用本插件的方式,向知识库追加更多文档。",
408
+ "Function": HotReload(知识库问答)
 
 
 
409
  }
410
+ })
411
  except:
412
+ print('Load function plugin failed')
 
413
 
414
  try:
415
+ from crazy_functions.Langchain知识库 import 读取知识库作答
416
+ function_plugins.update({
417
+ "知识库问答(构建知识库后,再运行此插件)": {
418
+ "Group": "对话",
419
+ "Color": "stop",
420
+ "AsButton": False,
421
+ "AdvancedArgs": True,
422
+ "ArgsReminder": "待提取的知识库名称id, 默认为default, 您需要构建知识库后再运行此插件。",
423
+ "Function": HotReload(读取知识库作答)
 
 
 
424
  }
425
+ })
426
  except:
427
+ print('Load function plugin failed')
 
428
 
429
  try:
430
  from crazy_functions.交互功能函数模板 import 交互功能模板函数
431
+ function_plugins.update({
432
+ "交互功能模板函数": {
433
+ "Group": "对话",
434
+ "Color": "stop",
435
+ "AsButton": False,
436
+ "Function": HotReload(交互功能模板函数)
 
 
 
437
  }
438
+ })
439
  except:
440
+ print('Load function plugin failed')
 
441
 
442
  try:
443
+ from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比
444
+ function_plugins.update({
445
+ "Latex英文纠错+高亮修正位置 [需Latex]": {
446
+ "Group": "学术",
447
+ "Color": "stop",
448
+ "AsButton": False,
449
+ "AdvancedArgs": True,
450
+ "ArgsReminder": "如果有必要, 请在此处追加更细致的矫错指令(使用英文)。",
451
+ "Function": HotReload(Latex英文纠错加PDF对比)
452
+ }
453
+ })
454
+ from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
455
+ function_plugins.update({
456
+ "Arixv论文精细翻译(输入arxivID)[需Latex]": {
457
+ "Group": "学术",
458
+ "Color": "stop",
459
+ "AsButton": False,
460
+ "AdvancedArgs": True,
461
+ "ArgsReminder":
462
+ "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 " +
463
+ "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " +
464
+ 'If the term "agent" is used in this section, it should be translated to "智能体". ',
465
+ "Info": "Arixv论文精细翻译 | 输入参数arxiv论文的ID,比如1812.10695",
466
+ "Function": HotReload(Latex翻译中文并重新编译PDF)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
467
  }
468
+ })
469
+ function_plugins.update({
470
+ "本地Latex论文精细翻译(上传Latex项目)[需Latex]": {
471
+ "Group": "学术",
472
+ "Color": "stop",
473
+ "AsButton": False,
474
+ "AdvancedArgs": True,
475
+ "ArgsReminder":
476
+ "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 " +
477
+ "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " +
478
+ 'If the term "agent" is used in this section, it should be translated to "智能体". ',
479
+ "Info": "本地Latex论文精细翻译 | 输入参数是路径",
480
+ "Function": HotReload(Latex翻译中文并重新编译PDF)
481
+ }
482
+ })
483
  except:
484
+ print('Load function plugin failed')
 
485
 
486
  try:
487
  from toolbox import get_conf
488
+ ENABLE_AUDIO, = get_conf('ENABLE_AUDIO')
 
489
  if ENABLE_AUDIO:
490
  from crazy_functions.语音助手 import 语音助手
491
+ function_plugins.update({
492
+ "实时音频采集": {
493
+ "Group": "对话",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
  "Color": "stop",
495
+ "AsButton": True,
496
+ "Info": "开始语言对话 | 没有输入参数",
497
+ "Function": HotReload(语音助手)
498
  }
499
+ })
 
500
  except:
501
+ print('Load function plugin failed')
 
502
 
503
  try:
504
+ from crazy_functions.批量翻译PDF文档_NOUGAT import 批量翻译PDF文档
505
+ function_plugins.update({
506
+ "精准翻译PDF文档(NOUGAT)": {
507
+ "Group": "学术",
508
+ "Color": "stop",
509
+ "AsButton": False,
510
+ "Function": HotReload(批量翻译PDF文档)
 
 
 
511
  }
512
+ })
513
  except:
514
+ print('Load function plugin failed')
 
 
 
 
515
 
 
 
 
 
 
 
 
 
 
 
 
 
 
516
 
517
  # try:
518
+ # from crazy_functions.CodeInterpreter import 虚空终端CodeInterpreter
519
  # function_plugins.update({
520
+ # "CodeInterpreter(开发中,仅供测试)": {
521
+ # "Group": "编程|对话",
522
  # "Color": "stop",
523
+ # "AsButton": False,
524
+ # "Function": HotReload(虚空终端CodeInterpreter)
525
  # }
526
  # })
527
  # except:
 
528
  # print('Load function plugin failed')
529
 
530
  # try:
 
541
  # except:
542
  # print('Load function plugin failed')
543
 
544
+
545
+
546
  """
547
  设置默认值:
548
  - 默认 Group = 对话
 
552
  """
553
  for name, function_meta in function_plugins.items():
554
  if "Group" not in function_meta:
555
+ function_plugins[name]["Group"] = '对话'
556
  if "AsButton" not in function_meta:
557
  function_plugins[name]["AsButton"] = True
558
  if "AdvancedArgs" not in function_meta:
559
  function_plugins[name]["AdvancedArgs"] = False
560
  if "Color" not in function_meta:
561
+ function_plugins[name]["Color"] = 'secondary'
562
 
563
  return function_plugins
crazy_functions/Langchain知识库.py CHANGED
@@ -53,14 +53,14 @@ def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
53
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
54
  print('Checking Text2vec ...')
55
  from langchain.embeddings.huggingface import HuggingFaceEmbeddings
56
- with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
57
  HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
58
 
59
  # < -------------------构建知识库--------------- >
60
  chatbot.append(['<br/>'.join(file_manifest), "正在构建知识库..."])
61
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
62
  print('Establishing knowledge archive ...')
63
- with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
64
  kai = knowledge_archive_interface()
65
  kai.feed_archive(file_manifest=file_manifest, id=kai_id)
66
  kai_files = kai.get_loaded_file()
 
53
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
54
  print('Checking Text2vec ...')
55
  from langchain.embeddings.huggingface import HuggingFaceEmbeddings
56
+ with ProxyNetworkActivate(): # 临时地激活代理网络
57
  HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
58
 
59
  # < -------------------构建知识库--------------- >
60
  chatbot.append(['<br/>'.join(file_manifest), "正在构建知识库..."])
61
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
62
  print('Establishing knowledge archive ...')
63
+ with ProxyNetworkActivate(): # 临时地激活代理网络
64
  kai = knowledge_archive_interface()
65
  kai.feed_archive(file_manifest=file_manifest, id=kai_id)
66
  kai_files = kai.get_loaded_file()
crazy_functions/Latex全文润色.py CHANGED
@@ -1,5 +1,5 @@
1
  from toolbox import update_ui, trimmed_format_exc, promote_file_to_downloadzone, get_log_folder
2
- from toolbox import CatchException, report_exception, write_history_to_file, zip_folder
3
 
4
 
5
  class PaperFileGroup():
@@ -11,7 +11,7 @@ class PaperFileGroup():
11
  self.sp_file_tag = []
12
 
13
  # count_token
14
- from request_llms.bridge_all import model_info
15
  enc = model_info["gpt-3.5-turbo"]['tokenizer']
16
  def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
17
  self.get_token_num = get_token_num
@@ -26,8 +26,8 @@ class PaperFileGroup():
26
  self.sp_file_index.append(index)
27
  self.sp_file_tag.append(self.file_paths[index])
28
  else:
29
- from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
30
- segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
31
  for j, segment in enumerate(segments):
32
  self.sp_file_contents.append(segment)
33
  self.sp_file_index.append(index)
@@ -135,18 +135,18 @@ def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
135
 
136
 
137
  @CatchException
138
- def Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
139
  # 基本信息:功能、贡献者
140
  chatbot.append([
141
  "函数插件功能?",
142
- "对整个Latex项目进行润色。函数插件贡献者: Binary-Husky。(注意,此插件不调用Latex,如果有Latex环境,请使用「Latex英文纠错+高亮修正位置(需Latex)插件」"])
143
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
144
 
145
  # 尝试导入依赖,如果缺少依赖,则给出安装建议
146
  try:
147
  import tiktoken
148
  except:
149
- report_exception(chatbot, history,
150
  a=f"解析项目: {txt}",
151
  b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
152
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
@@ -157,12 +157,12 @@ def Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
157
  project_folder = txt
158
  else:
159
  if txt == "": txt = '空空如也的输入栏'
160
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
161
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
162
  return
163
  file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
164
  if len(file_manifest) == 0:
165
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
166
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
167
  return
168
  yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en')
@@ -173,7 +173,7 @@ def Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
173
 
174
 
175
  @CatchException
176
- def Latex中文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
177
  # 基本信息:功能、贡献者
178
  chatbot.append([
179
  "函数插件功能?",
@@ -184,7 +184,7 @@ def Latex中文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
184
  try:
185
  import tiktoken
186
  except:
187
- report_exception(chatbot, history,
188
  a=f"解析项目: {txt}",
189
  b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
190
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
@@ -195,12 +195,12 @@ def Latex中文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
195
  project_folder = txt
196
  else:
197
  if txt == "": txt = '空空如也的输入栏'
198
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
199
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
200
  return
201
  file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
202
  if len(file_manifest) == 0:
203
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
204
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
205
  return
206
  yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh')
@@ -209,7 +209,7 @@ def Latex中文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
209
 
210
 
211
  @CatchException
212
- def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
213
  # 基本信息:功能、贡献者
214
  chatbot.append([
215
  "函数插件功能?",
@@ -220,7 +220,7 @@ def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
220
  try:
221
  import tiktoken
222
  except:
223
- report_exception(chatbot, history,
224
  a=f"解析项目: {txt}",
225
  b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
226
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
@@ -231,12 +231,12 @@ def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
231
  project_folder = txt
232
  else:
233
  if txt == "": txt = '空空如也的输入栏'
234
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
235
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
236
  return
237
  file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
238
  if len(file_manifest) == 0:
239
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
240
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
241
  return
242
  yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='proofread')
 
1
  from toolbox import update_ui, trimmed_format_exc, promote_file_to_downloadzone, get_log_folder
2
+ from toolbox import CatchException, report_execption, write_history_to_file, zip_folder
3
 
4
 
5
  class PaperFileGroup():
 
11
  self.sp_file_tag = []
12
 
13
  # count_token
14
+ from request_llm.bridge_all import model_info
15
  enc = model_info["gpt-3.5-turbo"]['tokenizer']
16
  def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
17
  self.get_token_num = get_token_num
 
26
  self.sp_file_index.append(index)
27
  self.sp_file_tag.append(self.file_paths[index])
28
  else:
29
+ from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
30
+ segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
31
  for j, segment in enumerate(segments):
32
  self.sp_file_contents.append(segment)
33
  self.sp_file_index.append(index)
 
135
 
136
 
137
  @CatchException
138
+ def Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
139
  # 基本信息:功能、贡献者
140
  chatbot.append([
141
  "函数插件功能?",
142
+ "对整个Latex项目进行润色。函数插件贡献者: Binary-Husky。(注意,此插件不调用Latex,如果有Latex环境,请使用“Latex英文纠错+高亮”插件)"])
143
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
144
 
145
  # 尝试导入依赖,如果缺少依赖,则给出安装建议
146
  try:
147
  import tiktoken
148
  except:
149
+ report_execption(chatbot, history,
150
  a=f"解析项目: {txt}",
151
  b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
152
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 
157
  project_folder = txt
158
  else:
159
  if txt == "": txt = '空空如也的输入栏'
160
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
161
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
162
  return
163
  file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
164
  if len(file_manifest) == 0:
165
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
166
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
167
  return
168
  yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en')
 
173
 
174
 
175
  @CatchException
176
+ def Latex中文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
177
  # 基本信息:功能、贡献者
178
  chatbot.append([
179
  "函数插件功能?",
 
184
  try:
185
  import tiktoken
186
  except:
187
+ report_execption(chatbot, history,
188
  a=f"解析项目: {txt}",
189
  b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
190
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 
195
  project_folder = txt
196
  else:
197
  if txt == "": txt = '空空如也的输入栏'
198
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
199
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
200
  return
201
  file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
202
  if len(file_manifest) == 0:
203
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
204
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
205
  return
206
  yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh')
 
209
 
210
 
211
  @CatchException
212
+ def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
213
  # 基本信息:功能、贡献者
214
  chatbot.append([
215
  "函数插件功能?",
 
220
  try:
221
  import tiktoken
222
  except:
223
+ report_execption(chatbot, history,
224
  a=f"解析项目: {txt}",
225
  b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
226
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 
231
  project_folder = txt
232
  else:
233
  if txt == "": txt = '空空如也的输入栏'
234
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
235
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
236
  return
237
  file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
238
  if len(file_manifest) == 0:
239
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
240
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
241
  return
242
  yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='proofread')
crazy_functions/Latex全文翻译.py CHANGED
@@ -1,5 +1,5 @@
1
  from toolbox import update_ui, promote_file_to_downloadzone
2
- from toolbox import CatchException, report_exception, write_history_to_file
3
  fast_debug = False
4
 
5
  class PaperFileGroup():
@@ -11,7 +11,7 @@ class PaperFileGroup():
11
  self.sp_file_tag = []
12
 
13
  # count_token
14
- from request_llms.bridge_all import model_info
15
  enc = model_info["gpt-3.5-turbo"]['tokenizer']
16
  def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
17
  self.get_token_num = get_token_num
@@ -26,8 +26,8 @@ class PaperFileGroup():
26
  self.sp_file_index.append(index)
27
  self.sp_file_tag.append(self.file_paths[index])
28
  else:
29
- from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
30
- segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
31
  for j, segment in enumerate(segments):
32
  self.sp_file_contents.append(segment)
33
  self.sp_file_index.append(index)
@@ -106,7 +106,7 @@ def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
106
 
107
 
108
  @CatchException
109
- def Latex英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
110
  # 基本信息:功能、贡献者
111
  chatbot.append([
112
  "函数插件功能?",
@@ -117,7 +117,7 @@ def Latex英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prom
117
  try:
118
  import tiktoken
119
  except:
120
- report_exception(chatbot, history,
121
  a=f"解析项目: {txt}",
122
  b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
123
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
@@ -128,12 +128,12 @@ def Latex英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prom
128
  project_folder = txt
129
  else:
130
  if txt == "": txt = '空空如也的输入栏'
131
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
132
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
133
  return
134
  file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
135
  if len(file_manifest) == 0:
136
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
137
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
138
  return
139
  yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en->zh')
@@ -143,7 +143,7 @@ def Latex英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prom
143
 
144
 
145
  @CatchException
146
- def Latex中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
147
  # 基本信息:功能、贡献者
148
  chatbot.append([
149
  "函数插件功能?",
@@ -154,7 +154,7 @@ def Latex中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prom
154
  try:
155
  import tiktoken
156
  except:
157
- report_exception(chatbot, history,
158
  a=f"解析项目: {txt}",
159
  b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
160
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
@@ -165,12 +165,12 @@ def Latex中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prom
165
  project_folder = txt
166
  else:
167
  if txt == "": txt = '空空如也的输入栏'
168
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
169
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
170
  return
171
  file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
172
  if len(file_manifest) == 0:
173
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
174
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
175
  return
176
  yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh->en')
 
1
  from toolbox import update_ui, promote_file_to_downloadzone
2
+ from toolbox import CatchException, report_execption, write_history_to_file
3
  fast_debug = False
4
 
5
  class PaperFileGroup():
 
11
  self.sp_file_tag = []
12
 
13
  # count_token
14
+ from request_llm.bridge_all import model_info
15
  enc = model_info["gpt-3.5-turbo"]['tokenizer']
16
  def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
17
  self.get_token_num = get_token_num
 
26
  self.sp_file_index.append(index)
27
  self.sp_file_tag.append(self.file_paths[index])
28
  else:
29
+ from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
30
+ segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
31
  for j, segment in enumerate(segments):
32
  self.sp_file_contents.append(segment)
33
  self.sp_file_index.append(index)
 
106
 
107
 
108
  @CatchException
109
+ def Latex英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
110
  # 基本信息:功能、贡献者
111
  chatbot.append([
112
  "函数插件功能?",
 
117
  try:
118
  import tiktoken
119
  except:
120
+ report_execption(chatbot, history,
121
  a=f"解析项目: {txt}",
122
  b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
123
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 
128
  project_folder = txt
129
  else:
130
  if txt == "": txt = '空空如也的输入栏'
131
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
132
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
133
  return
134
  file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
135
  if len(file_manifest) == 0:
136
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
137
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
138
  return
139
  yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en->zh')
 
143
 
144
 
145
  @CatchException
146
+ def Latex中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
147
  # 基本信息:功能、贡献者
148
  chatbot.append([
149
  "函数插件功能?",
 
154
  try:
155
  import tiktoken
156
  except:
157
+ report_execption(chatbot, history,
158
  a=f"解析项目: {txt}",
159
  b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
160
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 
165
  project_folder = txt
166
  else:
167
  if txt == "": txt = '空空如也的输入栏'
168
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
169
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
170
  return
171
  file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
172
  if len(file_manifest) == 0:
173
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
174
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
175
  return
176
  yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh->en')
crazy_functions/Latex输出PDF.py DELETED
@@ -1,484 +0,0 @@
1
- from toolbox import update_ui, trimmed_format_exc, get_conf, get_log_folder, promote_file_to_downloadzone
2
- from toolbox import CatchException, report_exception, update_ui_lastest_msg, zip_result, gen_time_str
3
- from functools import partial
4
- import glob, os, requests, time, json, tarfile
5
-
6
- pj = os.path.join
7
- ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
8
-
9
-
10
- # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 工具函数 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
11
- # 专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". '
12
- def switch_prompt(pfg, mode, more_requirement):
13
- """
14
- Generate prompts and system prompts based on the mode for proofreading or translating.
15
- Args:
16
- - pfg: Proofreader or Translator instance.
17
- - mode: A string specifying the mode, either 'proofread' or 'translate_zh'.
18
-
19
- Returns:
20
- - inputs_array: A list of strings containing prompts for users to respond to.
21
- - sys_prompt_array: A list of strings containing prompts for system prompts.
22
- """
23
- n_split = len(pfg.sp_file_contents)
24
- if mode == 'proofread_en':
25
- inputs_array = [r"Below is a section from an academic paper, proofread this section." +
26
- r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + more_requirement +
27
- r"Answer me only with the revised text:" +
28
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
29
- sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
30
- elif mode == 'translate_zh':
31
- inputs_array = [
32
- r"Below is a section from an English academic paper, translate it into Chinese. " + more_requirement +
33
- r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
34
- r"Answer me only with the translated text:" +
35
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
36
- sys_prompt_array = ["You are a professional translator." for _ in range(n_split)]
37
- else:
38
- assert False, "未知指令"
39
- return inputs_array, sys_prompt_array
40
-
41
-
42
- def desend_to_extracted_folder_if_exist(project_folder):
43
- """
44
- Descend into the extracted folder if it exists, otherwise return the original folder.
45
-
46
- Args:
47
- - project_folder: A string specifying the folder path.
48
-
49
- Returns:
50
- - A string specifying the path to the extracted folder, or the original folder if there is no extracted folder.
51
- """
52
- maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)]
53
- if len(maybe_dir) == 0: return project_folder
54
- if maybe_dir[0].endswith('.extract'): return maybe_dir[0]
55
- return project_folder
56
-
57
-
58
- def move_project(project_folder, arxiv_id=None):
59
- """
60
- Create a new work folder and copy the project folder to it.
61
-
62
- Args:
63
- - project_folder: A string specifying the folder path of the project.
64
-
65
- Returns:
66
- - A string specifying the path to the new work folder.
67
- """
68
- import shutil, time
69
- time.sleep(2) # avoid time string conflict
70
- if arxiv_id is not None:
71
- new_workfolder = pj(ARXIV_CACHE_DIR, arxiv_id, 'workfolder')
72
- else:
73
- new_workfolder = f'{get_log_folder()}/{gen_time_str()}'
74
- try:
75
- shutil.rmtree(new_workfolder)
76
- except:
77
- pass
78
-
79
- # align subfolder if there is a folder wrapper
80
- items = glob.glob(pj(project_folder, '*'))
81
- items = [item for item in items if os.path.basename(item) != '__MACOSX']
82
- if len(glob.glob(pj(project_folder, '*.tex'))) == 0 and len(items) == 1:
83
- if os.path.isdir(items[0]): project_folder = items[0]
84
-
85
- shutil.copytree(src=project_folder, dst=new_workfolder)
86
- return new_workfolder
87
-
88
-
89
- def arxiv_download(chatbot, history, txt, allow_cache=True):
90
- def check_cached_translation_pdf(arxiv_id):
91
- translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'translation')
92
- if not os.path.exists(translation_dir):
93
- os.makedirs(translation_dir)
94
- target_file = pj(translation_dir, 'translate_zh.pdf')
95
- if os.path.exists(target_file):
96
- promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot)
97
- target_file_compare = pj(translation_dir, 'comparison.pdf')
98
- if os.path.exists(target_file_compare):
99
- promote_file_to_downloadzone(target_file_compare, rename_file=None, chatbot=chatbot)
100
- return target_file
101
- return False
102
-
103
- def is_float(s):
104
- try:
105
- float(s)
106
- return True
107
- except ValueError:
108
- return False
109
-
110
- if ('.' in txt) and ('/' not in txt) and is_float(txt): # is arxiv ID
111
- txt = 'https://arxiv.org/abs/' + txt.strip()
112
- if ('.' in txt) and ('/' not in txt) and is_float(txt[:10]): # is arxiv ID
113
- txt = 'https://arxiv.org/abs/' + txt[:10]
114
-
115
- if not txt.startswith('https://arxiv.org'):
116
- return txt, None # 是本地文件,跳过下载
117
-
118
- # <-------------- inspect format ------------->
119
- chatbot.append([f"检测到arxiv文档连接", '尝试下载 ...'])
120
- yield from update_ui(chatbot=chatbot, history=history)
121
- time.sleep(1) # 刷新界面
122
-
123
- url_ = txt # https://arxiv.org/abs/1707.06690
124
- if not txt.startswith('https://arxiv.org/abs/'):
125
- msg = f"解析arxiv网址失败, 期望格式例如: https://arxiv.org/abs/1707.06690。实际得到格式: {url_}。"
126
- yield from update_ui_lastest_msg(msg, chatbot=chatbot, history=history) # 刷新界面
127
- return msg, None
128
- # <-------------- set format ------------->
129
- arxiv_id = url_.split('/abs/')[-1]
130
- if 'v' in arxiv_id: arxiv_id = arxiv_id[:10]
131
- cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
132
- if cached_translation_pdf and allow_cache: return cached_translation_pdf, arxiv_id
133
-
134
- url_tar = url_.replace('/abs/', '/e-print/')
135
- translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
136
- extract_dst = pj(ARXIV_CACHE_DIR, arxiv_id, 'extract')
137
- os.makedirs(translation_dir, exist_ok=True)
138
-
139
- # <-------------- download arxiv source file ------------->
140
- dst = pj(translation_dir, arxiv_id + '.tar')
141
- if os.path.exists(dst):
142
- yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history) # 刷新界面
143
- else:
144
- yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history) # 刷新界面
145
- proxies = get_conf('proxies')
146
- r = requests.get(url_tar, proxies=proxies)
147
- with open(dst, 'wb+') as f:
148
- f.write(r.content)
149
- # <-------------- extract file ------------->
150
- yield from update_ui_lastest_msg("下载完成", chatbot=chatbot, history=history) # 刷新界面
151
- from toolbox import extract_archive
152
- extract_archive(file_path=dst, dest_dir=extract_dst)
153
- return extract_dst, arxiv_id
154
-
155
-
156
- def pdf2tex_project(pdf_file_path):
157
- # Mathpix API credentials
158
- app_id, app_key = get_conf('MATHPIX_APPID', 'MATHPIX_APPKEY')
159
- headers = {"app_id": app_id, "app_key": app_key}
160
-
161
- # Step 1: Send PDF file for processing
162
- options = {
163
- "conversion_formats": {"tex.zip": True},
164
- "math_inline_delimiters": ["$", "$"],
165
- "rm_spaces": True
166
- }
167
-
168
- response = requests.post(url="https://api.mathpix.com/v3/pdf",
169
- headers=headers,
170
- data={"options_json": json.dumps(options)},
171
- files={"file": open(pdf_file_path, "rb")})
172
-
173
- if response.ok:
174
- pdf_id = response.json()["pdf_id"]
175
- print(f"PDF processing initiated. PDF ID: {pdf_id}")
176
-
177
- # Step 2: Check processing status
178
- while True:
179
- conversion_response = requests.get(f"https://api.mathpix.com/v3/pdf/{pdf_id}", headers=headers)
180
- conversion_data = conversion_response.json()
181
-
182
- if conversion_data["status"] == "completed":
183
- print("PDF processing completed.")
184
- break
185
- elif conversion_data["status"] == "error":
186
- print("Error occurred during processing.")
187
- else:
188
- print(f"Processing status: {conversion_data['status']}")
189
- time.sleep(5) # wait for a few seconds before checking again
190
-
191
- # Step 3: Save results to local files
192
- output_dir = os.path.join(os.path.dirname(pdf_file_path), 'mathpix_output')
193
- if not os.path.exists(output_dir):
194
- os.makedirs(output_dir)
195
-
196
- url = f"https://api.mathpix.com/v3/pdf/{pdf_id}.tex"
197
- response = requests.get(url, headers=headers)
198
- file_name_wo_dot = '_'.join(os.path.basename(pdf_file_path).split('.')[:-1])
199
- output_name = f"{file_name_wo_dot}.tex.zip"
200
- output_path = os.path.join(output_dir, output_name)
201
- with open(output_path, "wb") as output_file:
202
- output_file.write(response.content)
203
- print(f"tex.zip file saved at: {output_path}")
204
-
205
- import zipfile
206
- unzip_dir = os.path.join(output_dir, file_name_wo_dot)
207
- with zipfile.ZipFile(output_path, 'r') as zip_ref:
208
- zip_ref.extractall(unzip_dir)
209
-
210
- return unzip_dir
211
-
212
- else:
213
- print(f"Error sending PDF for processing. Status code: {response.status_code}")
214
- return None
215
-
216
-
217
- # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 插件主程序1 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
218
-
219
-
220
- @CatchException
221
- def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
222
- # <-------------- information about this plugin ------------->
223
- chatbot.append(["函数插件功能?",
224
- "对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。仅在Windows系统进行了测试,其他操作系��表现未知。"])
225
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
226
-
227
- # <-------------- more requirements ------------->
228
- if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
229
- more_req = plugin_kwargs.get("advanced_arg", "")
230
- _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
231
-
232
- # <-------------- check deps ------------->
233
- try:
234
- import glob, os, time, subprocess
235
- subprocess.Popen(['pdflatex', '-version'])
236
- from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
237
- except Exception as e:
238
- chatbot.append([f"解析项目: {txt}",
239
- f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
240
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
241
- return
242
-
243
- # <-------------- clear history and read input ------------->
244
- history = []
245
- if os.path.exists(txt):
246
- project_folder = txt
247
- else:
248
- if txt == "": txt = '空空如也的输入栏'
249
- report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
250
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
251
- return
252
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
253
- if len(file_manifest) == 0:
254
- report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.tex文件: {txt}")
255
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
256
- return
257
-
258
- # <-------------- if is a zip/tar file ------------->
259
- project_folder = desend_to_extracted_folder_if_exist(project_folder)
260
-
261
- # <-------------- move latex project away from temp folder ------------->
262
- project_folder = move_project(project_folder, arxiv_id=None)
263
-
264
- # <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
265
- if not os.path.exists(project_folder + '/merge_proofread_en.tex'):
266
- yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
267
- chatbot, history, system_prompt, mode='proofread_en',
268
- switch_prompt=_switch_prompt_)
269
-
270
- # <-------------- compile PDF ------------->
271
- success = yield from 编译Latex(chatbot, history, main_file_original='merge',
272
- main_file_modified='merge_proofread_en',
273
- work_folder_original=project_folder, work_folder_modified=project_folder,
274
- work_folder=project_folder)
275
-
276
- # <-------------- zip PDF ------------->
277
- zip_res = zip_result(project_folder)
278
- if success:
279
- chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
280
- yield from update_ui(chatbot=chatbot, history=history);
281
- time.sleep(1) # 刷新界面
282
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
283
- else:
284
- chatbot.append((f"失败了",
285
- '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
286
- yield from update_ui(chatbot=chatbot, history=history);
287
- time.sleep(1) # 刷新界面
288
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
289
-
290
- # <-------------- we are done ------------->
291
- return success
292
-
293
-
294
- # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 插件主程序2 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
295
-
296
- @CatchException
297
- def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
298
- # <-------------- information about this plugin ------------->
299
- chatbot.append([
300
- "函数插件功能?",
301
- "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 此插件Windows支持最佳,Linux下必须使用Docker安装,详见项目主README.md。目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"])
302
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
303
-
304
- # <-------------- more requirements ------------->
305
- if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
306
- more_req = plugin_kwargs.get("advanced_arg", "")
307
- no_cache = more_req.startswith("--no-cache")
308
- if no_cache: more_req.lstrip("--no-cache")
309
- allow_cache = not no_cache
310
- _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
311
-
312
- # <-------------- check deps ------------->
313
- try:
314
- import glob, os, time, subprocess
315
- subprocess.Popen(['pdflatex', '-version'])
316
- from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
317
- except Exception as e:
318
- chatbot.append([f"解析项目: {txt}",
319
- f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
320
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
321
- return
322
-
323
- # <-------------- clear history and read input ------------->
324
- history = []
325
- try:
326
- txt, arxiv_id = yield from arxiv_download(chatbot, history, txt, allow_cache)
327
- except tarfile.ReadError as e:
328
- yield from update_ui_lastest_msg(
329
- "无法自动下载该论文的Latex源码,请前往arxiv打开此论文下载页面,点other Formats,然后download source手动下载latex源码包。接下来调用本地Latex翻译插件即可。",
330
- chatbot=chatbot, history=history)
331
- return
332
-
333
- if txt.endswith('.pdf'):
334
- report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"发现已经存在翻译好的PDF文档")
335
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
336
- return
337
-
338
- if os.path.exists(txt):
339
- project_folder = txt
340
- else:
341
- if txt == "": txt = '空空如也的输入栏'
342
- report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无法处理: {txt}")
343
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
344
- return
345
-
346
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
347
- if len(file_manifest) == 0:
348
- report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.tex文件: {txt}")
349
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
350
- return
351
-
352
- # <-------------- if is a zip/tar file ------------->
353
- project_folder = desend_to_extracted_folder_if_exist(project_folder)
354
-
355
- # <-------------- move latex project away from temp folder ------------->
356
- project_folder = move_project(project_folder, arxiv_id)
357
-
358
- # <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
359
- if not os.path.exists(project_folder + '/merge_translate_zh.tex'):
360
- yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
361
- chatbot, history, system_prompt, mode='translate_zh',
362
- switch_prompt=_switch_prompt_)
363
-
364
- # <-------------- compile PDF ------------->
365
- success = yield from 编译Latex(chatbot, history, main_file_original='merge',
366
- main_file_modified='merge_translate_zh', mode='translate_zh',
367
- work_folder_original=project_folder, work_folder_modified=project_folder,
368
- work_folder=project_folder)
369
-
370
- # <-------------- zip PDF ------------->
371
- zip_res = zip_result(project_folder)
372
- if success:
373
- chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
374
- yield from update_ui(chatbot=chatbot, history=history);
375
- time.sleep(1) # 刷新界面
376
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
377
- else:
378
- chatbot.append((f"失败了",
379
- '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 您可以到Github Issue区, 用该压缩包进行反馈。如系统是Linux,请检查系统字体(见Github wiki) ...'))
380
- yield from update_ui(chatbot=chatbot, history=history);
381
- time.sleep(1) # 刷新界面
382
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
383
-
384
- # <-------------- we are done ------------->
385
- return success
386
-
387
-
388
- # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 插件主程序3 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
389
-
390
- @CatchException
391
- def PDF翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
392
- # <-------------- information about this plugin ------------->
393
- chatbot.append([
394
- "函数插件功能?",
395
- "将PDF转换为Latex项目,翻译为中文后重新编译为PDF。函数插件贡献者: Marroh。注意事项: 此插件Windows支持最佳,Linux下必须使用Docker安装,详见项目主README.md。目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"])
396
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
397
-
398
- # <-------------- more requirements ------------->
399
- if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
400
- more_req = plugin_kwargs.get("advanced_arg", "")
401
- no_cache = more_req.startswith("--no-cache")
402
- if no_cache: more_req.lstrip("--no-cache")
403
- allow_cache = not no_cache
404
- _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
405
-
406
- # <-------------- check deps ------------->
407
- try:
408
- import glob, os, time, subprocess
409
- subprocess.Popen(['pdflatex', '-version'])
410
- from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
411
- except Exception as e:
412
- chatbot.append([f"解析项目: {txt}",
413
- f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
414
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
415
- return
416
-
417
- # <-------------- clear history and read input ------------->
418
- if os.path.exists(txt):
419
- project_folder = txt
420
- else:
421
- if txt == "": txt = '空空如也的输入栏'
422
- report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无法处理: {txt}")
423
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
424
- return
425
-
426
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)]
427
- if len(file_manifest) == 0:
428
- report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.pdf文件: {txt}")
429
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
430
- return
431
- if len(file_manifest) != 1:
432
- report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"不支持同时处理多个pdf文件: {txt}")
433
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
434
- return
435
- app_id, app_key = get_conf('MATHPIX_APPID', 'MATHPIX_APPKEY')
436
- if len(app_id) == 0 or len(app_key) == 0:
437
- report_exception(chatbot, history, a="缺失 MATHPIX_APPID 和 MATHPIX_APPKEY。", b=f"请配置 MATHPIX_APPID 和 MATHPIX_APPKEY")
438
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
439
- return
440
-
441
- # <-------------- convert pdf into tex ------------->
442
- project_folder = pdf2tex_project(file_manifest[0])
443
-
444
- # Translate English Latex to Chinese Latex, and compile it
445
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
446
- if len(file_manifest) == 0:
447
- report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.tex文件: {txt}")
448
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
449
- return
450
-
451
- # <-------------- if is a zip/tar file ------------->
452
- project_folder = desend_to_extracted_folder_if_exist(project_folder)
453
-
454
- # <-------------- move latex project away from temp folder ------------->
455
- project_folder = move_project(project_folder)
456
-
457
- # <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
458
- if not os.path.exists(project_folder + '/merge_translate_zh.tex'):
459
- yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
460
- chatbot, history, system_prompt, mode='translate_zh',
461
- switch_prompt=_switch_prompt_)
462
-
463
- # <-------------- compile PDF ------------->
464
- success = yield from 编译Latex(chatbot, history, main_file_original='merge',
465
- main_file_modified='merge_translate_zh', mode='translate_zh',
466
- work_folder_original=project_folder, work_folder_modified=project_folder,
467
- work_folder=project_folder)
468
-
469
- # <-------------- zip PDF ------------->
470
- zip_res = zip_result(project_folder)
471
- if success:
472
- chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
473
- yield from update_ui(chatbot=chatbot, history=history);
474
- time.sleep(1) # 刷新界面
475
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
476
- else:
477
- chatbot.append((f"失败了",
478
- '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 您可以到Github Issue区, 用该压缩包进行反馈。如系统是Linux,请检查系统字体(见Github wiki) ...'))
479
- yield from update_ui(chatbot=chatbot, history=history);
480
- time.sleep(1) # 刷新界面
481
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
482
-
483
- # <-------------- we are done ------------->
484
- return success
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/Latex输出PDF结果.py CHANGED
@@ -1,11 +1,11 @@
1
  from toolbox import update_ui, trimmed_format_exc, get_conf, get_log_folder, promote_file_to_downloadzone
2
- from toolbox import CatchException, report_exception, update_ui_lastest_msg, zip_result, gen_time_str
3
  from functools import partial
4
  import glob, os, requests, time
5
  pj = os.path.join
6
  ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
7
 
8
- # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 工具函数 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
9
  # 专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". '
10
  def switch_prompt(pfg, mode, more_requirement):
11
  """
@@ -73,14 +73,13 @@ def move_project(project_folder, arxiv_id=None):
73
 
74
  # align subfolder if there is a folder wrapper
75
  items = glob.glob(pj(project_folder,'*'))
76
- items = [item for item in items if os.path.basename(item)!='__MACOSX']
77
  if len(glob.glob(pj(project_folder,'*.tex'))) == 0 and len(items) == 1:
78
  if os.path.isdir(items[0]): project_folder = items[0]
79
 
80
  shutil.copytree(src=project_folder, dst=new_workfolder)
81
  return new_workfolder
82
 
83
- def arxiv_download(chatbot, history, txt, allow_cache=True):
84
  def check_cached_translation_pdf(arxiv_id):
85
  translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'translation')
86
  if not os.path.exists(translation_dir):
@@ -88,9 +87,6 @@ def arxiv_download(chatbot, history, txt, allow_cache=True):
88
  target_file = pj(translation_dir, 'translate_zh.pdf')
89
  if os.path.exists(target_file):
90
  promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot)
91
- target_file_compare = pj(translation_dir, 'comparison.pdf')
92
- if os.path.exists(target_file_compare):
93
- promote_file_to_downloadzone(target_file_compare, rename_file=None, chatbot=chatbot)
94
  return target_file
95
  return False
96
  def is_float(s):
@@ -120,7 +116,7 @@ def arxiv_download(chatbot, history, txt, allow_cache=True):
120
  arxiv_id = url_.split('/abs/')[-1]
121
  if 'v' in arxiv_id: arxiv_id = arxiv_id[:10]
122
  cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
123
- if cached_translation_pdf and allow_cache: return cached_translation_pdf, arxiv_id
124
 
125
  url_tar = url_.replace('/abs/', '/e-print/')
126
  translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
@@ -133,7 +129,7 @@ def arxiv_download(chatbot, history, txt, allow_cache=True):
133
  yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history) # 刷新界面
134
  else:
135
  yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history) # 刷新界面
136
- proxies = get_conf('proxies')
137
  r = requests.get(url_tar, proxies=proxies)
138
  with open(dst, 'wb+') as f:
139
  f.write(r.content)
@@ -142,7 +138,7 @@ def arxiv_download(chatbot, history, txt, allow_cache=True):
142
  from toolbox import extract_archive
143
  extract_archive(file_path=dst, dest_dir=extract_dst)
144
  return extract_dst, arxiv_id
145
- # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 插件主程序1 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
146
 
147
 
148
  @CatchException
@@ -175,12 +171,12 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
175
  project_folder = txt
176
  else:
177
  if txt == "": txt = '空空如也的输入栏'
178
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
179
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
180
  return
181
  file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
182
  if len(file_manifest) == 0:
183
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
184
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
185
  return
186
 
@@ -218,7 +214,8 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
218
  # <-------------- we are done ------------->
219
  return success
220
 
221
- # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 插件主程序2 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 
222
 
223
  @CatchException
224
  def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
@@ -231,9 +228,6 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
231
  # <-------------- more requirements ------------->
232
  if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
233
  more_req = plugin_kwargs.get("advanced_arg", "")
234
- no_cache = more_req.startswith("--no-cache")
235
- if no_cache: more_req.lstrip("--no-cache")
236
- allow_cache = not no_cache
237
  _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
238
 
239
  # <-------------- check deps ------------->
@@ -250,9 +244,9 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
250
 
251
  # <-------------- clear history and read input ------------->
252
  history = []
253
- txt, arxiv_id = yield from arxiv_download(chatbot, history, txt, allow_cache)
254
  if txt.endswith('.pdf'):
255
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"发现已经存在翻译好的PDF文档")
256
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
257
  return
258
 
@@ -261,13 +255,13 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
261
  project_folder = txt
262
  else:
263
  if txt == "": txt = '空空如也的输入栏'
264
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无法处理: {txt}")
265
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
266
  return
267
 
268
  file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
269
  if len(file_manifest) == 0:
270
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
271
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
272
  return
273
 
 
1
  from toolbox import update_ui, trimmed_format_exc, get_conf, get_log_folder, promote_file_to_downloadzone
2
+ from toolbox import CatchException, report_execption, update_ui_lastest_msg, zip_result, gen_time_str
3
  from functools import partial
4
  import glob, os, requests, time
5
  pj = os.path.join
6
  ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
7
 
8
+ # =================================== 工具函数 ===============================================
9
  # 专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". '
10
  def switch_prompt(pfg, mode, more_requirement):
11
  """
 
73
 
74
  # align subfolder if there is a folder wrapper
75
  items = glob.glob(pj(project_folder,'*'))
 
76
  if len(glob.glob(pj(project_folder,'*.tex'))) == 0 and len(items) == 1:
77
  if os.path.isdir(items[0]): project_folder = items[0]
78
 
79
  shutil.copytree(src=project_folder, dst=new_workfolder)
80
  return new_workfolder
81
 
82
+ def arxiv_download(chatbot, history, txt):
83
  def check_cached_translation_pdf(arxiv_id):
84
  translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'translation')
85
  if not os.path.exists(translation_dir):
 
87
  target_file = pj(translation_dir, 'translate_zh.pdf')
88
  if os.path.exists(target_file):
89
  promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot)
 
 
 
90
  return target_file
91
  return False
92
  def is_float(s):
 
116
  arxiv_id = url_.split('/abs/')[-1]
117
  if 'v' in arxiv_id: arxiv_id = arxiv_id[:10]
118
  cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
119
+ if cached_translation_pdf: return cached_translation_pdf, arxiv_id
120
 
121
  url_tar = url_.replace('/abs/', '/e-print/')
122
  translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
 
129
  yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history) # 刷新界面
130
  else:
131
  yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history) # 刷新界面
132
+ proxies, = get_conf('proxies')
133
  r = requests.get(url_tar, proxies=proxies)
134
  with open(dst, 'wb+') as f:
135
  f.write(r.content)
 
138
  from toolbox import extract_archive
139
  extract_archive(file_path=dst, dest_dir=extract_dst)
140
  return extract_dst, arxiv_id
141
+ # ========================================= 插件主程序1 =====================================================
142
 
143
 
144
  @CatchException
 
171
  project_folder = txt
172
  else:
173
  if txt == "": txt = '空空如也的输入栏'
174
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
175
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
176
  return
177
  file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
178
  if len(file_manifest) == 0:
179
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
180
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
181
  return
182
 
 
214
  # <-------------- we are done ------------->
215
  return success
216
 
217
+
218
+ # ========================================= 插件主程序2 =====================================================
219
 
220
  @CatchException
221
  def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
 
228
  # <-------------- more requirements ------------->
229
  if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
230
  more_req = plugin_kwargs.get("advanced_arg", "")
 
 
 
231
  _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
232
 
233
  # <-------------- check deps ------------->
 
244
 
245
  # <-------------- clear history and read input ------------->
246
  history = []
247
+ txt, arxiv_id = yield from arxiv_download(chatbot, history, txt)
248
  if txt.endswith('.pdf'):
249
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"发现已经存在翻译好的PDF文档")
250
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
251
  return
252
 
 
255
  project_folder = txt
256
  else:
257
  if txt == "": txt = '空空如也的输入栏'
258
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无法处理: {txt}")
259
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
260
  return
261
 
262
  file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
263
  if len(file_manifest) == 0:
264
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
265
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
266
  return
267
 
crazy_functions/agent_fns/auto_agent.py DELETED
@@ -1,23 +0,0 @@
1
- from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, ProxyNetworkActivate
2
- from toolbox import report_exception, get_log_folder, update_ui_lastest_msg, Singleton
3
- from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
4
- from crazy_functions.agent_fns.general import AutoGenGeneral
5
-
6
-
7
-
8
- class AutoGenMath(AutoGenGeneral):
9
-
10
- def define_agents(self):
11
- from autogen import AssistantAgent, UserProxyAgent
12
- return [
13
- {
14
- "name": "assistant", # name of the agent.
15
- "cls": AssistantAgent, # class of the agent.
16
- },
17
- {
18
- "name": "user_proxy", # name of the agent.
19
- "cls": UserProxyAgent, # class of the agent.
20
- "human_input_mode": "ALWAYS", # always ask for human input.
21
- "llm_config": False, # disables llm-based auto reply.
22
- },
23
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/agent_fns/echo_agent.py DELETED
@@ -1,19 +0,0 @@
1
- from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
2
-
3
- class EchoDemo(PluginMultiprocessManager):
4
- def subprocess_worker(self, child_conn):
5
- # ⭐⭐ 子进程
6
- self.child_conn = child_conn
7
- while True:
8
- msg = self.child_conn.recv() # PipeCom
9
- if msg.cmd == "user_input":
10
- # wait futher user input
11
- self.child_conn.send(PipeCom("show", msg.content))
12
- wait_success = self.subprocess_worker_wait_user_feedback(wait_msg="我准备好处理下一个问题了.")
13
- if not wait_success:
14
- # wait timeout, terminate this subprocess_worker
15
- break
16
- elif msg.cmd == "terminate":
17
- self.child_conn.send(PipeCom("done", ""))
18
- break
19
- print('[debug] subprocess_worker terminated')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/agent_fns/general.py DELETED
@@ -1,138 +0,0 @@
1
- from toolbox import trimmed_format_exc, get_conf, ProxyNetworkActivate
2
- from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
3
- from request_llms.bridge_all import predict_no_ui_long_connection
4
- import time
5
-
6
- def gpt_academic_generate_oai_reply(
7
- self,
8
- messages,
9
- sender,
10
- config,
11
- ):
12
- llm_config = self.llm_config if config is None else config
13
- if llm_config is False:
14
- return False, None
15
- if messages is None:
16
- messages = self._oai_messages[sender]
17
-
18
- inputs = messages[-1]['content']
19
- history = []
20
- for message in messages[:-1]:
21
- history.append(message['content'])
22
- context=messages[-1].pop("context", None)
23
- assert context is None, "预留参数 context 未实现"
24
-
25
- reply = predict_no_ui_long_connection(
26
- inputs=inputs,
27
- llm_kwargs=llm_config,
28
- history=history,
29
- sys_prompt=self._oai_system_message[0]['content'],
30
- console_slience=True
31
- )
32
- assumed_done = reply.endswith('\nTERMINATE')
33
- return True, reply
34
-
35
- class AutoGenGeneral(PluginMultiprocessManager):
36
- def gpt_academic_print_override(self, user_proxy, message, sender):
37
- # ⭐⭐ run in subprocess
38
- try:
39
- print_msg = sender.name + "\n\n---\n\n" + message["content"]
40
- except:
41
- print_msg = sender.name + "\n\n---\n\n" + message
42
- self.child_conn.send(PipeCom("show", print_msg))
43
-
44
- def gpt_academic_get_human_input(self, user_proxy, message):
45
- # ⭐⭐ run in subprocess
46
- patience = 300
47
- begin_waiting_time = time.time()
48
- self.child_conn.send(PipeCom("interact", message))
49
- while True:
50
- time.sleep(0.5)
51
- if self.child_conn.poll():
52
- wait_success = True
53
- break
54
- if time.time() - begin_waiting_time > patience:
55
- self.child_conn.send(PipeCom("done", ""))
56
- wait_success = False
57
- break
58
- if wait_success:
59
- return self.child_conn.recv().content
60
- else:
61
- raise TimeoutError("等待用户输入超时")
62
-
63
- def define_agents(self):
64
- raise NotImplementedError
65
-
66
- def exe_autogen(self, input):
67
- # ⭐⭐ run in subprocess
68
- input = input.content
69
- code_execution_config = {"work_dir": self.autogen_work_dir, "use_docker": self.use_docker}
70
- agents = self.define_agents()
71
- user_proxy = None
72
- assistant = None
73
- for agent_kwargs in agents:
74
- agent_cls = agent_kwargs.pop('cls')
75
- kwargs = {
76
- 'llm_config':self.llm_kwargs,
77
- 'code_execution_config':code_execution_config
78
- }
79
- kwargs.update(agent_kwargs)
80
- agent_handle = agent_cls(**kwargs)
81
- agent_handle._print_received_message = lambda a,b: self.gpt_academic_print_override(agent_kwargs, a, b)
82
- for d in agent_handle._reply_func_list:
83
- if hasattr(d['reply_func'],'__name__') and d['reply_func'].__name__ == 'generate_oai_reply':
84
- d['reply_func'] = gpt_academic_generate_oai_reply
85
- if agent_kwargs['name'] == 'user_proxy':
86
- agent_handle.get_human_input = lambda a: self.gpt_academic_get_human_input(user_proxy, a)
87
- user_proxy = agent_handle
88
- if agent_kwargs['name'] == 'assistant': assistant = agent_handle
89
- try:
90
- if user_proxy is None or assistant is None: raise Exception("用户代理或助理代理未定义")
91
- with ProxyNetworkActivate("AutoGen"):
92
- user_proxy.initiate_chat(assistant, message=input)
93
- except Exception as e:
94
- tb_str = '```\n' + trimmed_format_exc() + '```'
95
- self.child_conn.send(PipeCom("done", "AutoGen 执行失败: \n\n" + tb_str))
96
-
97
- def subprocess_worker(self, child_conn):
98
- # ⭐⭐ run in subprocess
99
- self.child_conn = child_conn
100
- while True:
101
- msg = self.child_conn.recv() # PipeCom
102
- self.exe_autogen(msg)
103
-
104
-
105
- class AutoGenGroupChat(AutoGenGeneral):
106
- def exe_autogen(self, input):
107
- # ⭐⭐ run in subprocess
108
- import autogen
109
-
110
- input = input.content
111
- with ProxyNetworkActivate("AutoGen"):
112
- code_execution_config = {"work_dir": self.autogen_work_dir, "use_docker": self.use_docker}
113
- agents = self.define_agents()
114
- agents_instances = []
115
- for agent_kwargs in agents:
116
- agent_cls = agent_kwargs.pop("cls")
117
- kwargs = {"code_execution_config": code_execution_config}
118
- kwargs.update(agent_kwargs)
119
- agent_handle = agent_cls(**kwargs)
120
- agent_handle._print_received_message = lambda a, b: self.gpt_academic_print_override(agent_kwargs, a, b)
121
- agents_instances.append(agent_handle)
122
- if agent_kwargs["name"] == "user_proxy":
123
- user_proxy = agent_handle
124
- user_proxy.get_human_input = lambda a: self.gpt_academic_get_human_input(user_proxy, a)
125
- try:
126
- groupchat = autogen.GroupChat(agents=agents_instances, messages=[], max_round=50)
127
- manager = autogen.GroupChatManager(groupchat=groupchat, **self.define_group_chat_manager_config())
128
- manager._print_received_message = lambda a, b: self.gpt_academic_print_override(agent_kwargs, a, b)
129
- manager.get_human_input = lambda a: self.gpt_academic_get_human_input(manager, a)
130
- if user_proxy is None:
131
- raise Exception("user_proxy is not defined")
132
- user_proxy.initiate_chat(manager, message=input)
133
- except Exception:
134
- tb_str = "```\n" + trimmed_format_exc() + "```"
135
- self.child_conn.send(PipeCom("done", "AutoGen exe failed: \n\n" + tb_str))
136
-
137
- def define_group_chat_manager_config(self):
138
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/agent_fns/persistent.py DELETED
@@ -1,16 +0,0 @@
1
- from toolbox import Singleton
2
- @Singleton
3
- class GradioMultiuserManagerForPersistentClasses():
4
- def __init__(self):
5
- self.mapping = {}
6
-
7
- def already_alive(self, key):
8
- return (key in self.mapping) and (self.mapping[key].is_alive())
9
-
10
- def set(self, key, x):
11
- self.mapping[key] = x
12
- return self.mapping[key]
13
-
14
- def get(self, key):
15
- return self.mapping[key]
16
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/agent_fns/pipe.py DELETED
@@ -1,194 +0,0 @@
1
- from toolbox import get_log_folder, update_ui, gen_time_str, get_conf, promote_file_to_downloadzone
2
- from crazy_functions.agent_fns.watchdog import WatchDog
3
- import time, os
4
-
5
- class PipeCom:
6
- def __init__(self, cmd, content) -> None:
7
- self.cmd = cmd
8
- self.content = content
9
-
10
-
11
- class PluginMultiprocessManager:
12
- def __init__(self, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
13
- # ⭐ run in main process
14
- self.autogen_work_dir = os.path.join(get_log_folder("autogen"), gen_time_str())
15
- self.previous_work_dir_files = {}
16
- self.llm_kwargs = llm_kwargs
17
- self.plugin_kwargs = plugin_kwargs
18
- self.chatbot = chatbot
19
- self.history = history
20
- self.system_prompt = system_prompt
21
- # self.user_request = user_request
22
- self.alive = True
23
- self.use_docker = get_conf("AUTOGEN_USE_DOCKER")
24
- self.last_user_input = ""
25
- # create a thread to monitor self.heartbeat, terminate the instance if no heartbeat for a long time
26
- timeout_seconds = 5 * 60
27
- self.heartbeat_watchdog = WatchDog(timeout=timeout_seconds, bark_fn=self.terminate, interval=5)
28
- self.heartbeat_watchdog.begin_watch()
29
-
30
- def feed_heartbeat_watchdog(self):
31
- # feed this `dog`, so the dog will not `bark` (bark_fn will terminate the instance)
32
- self.heartbeat_watchdog.feed()
33
-
34
- def is_alive(self):
35
- return self.alive
36
-
37
- def launch_subprocess_with_pipe(self):
38
- # ⭐ run in main process
39
- from multiprocessing import Process, Pipe
40
-
41
- parent_conn, child_conn = Pipe()
42
- self.p = Process(target=self.subprocess_worker, args=(child_conn,))
43
- self.p.daemon = True
44
- self.p.start()
45
- return parent_conn
46
-
47
- def terminate(self):
48
- self.p.terminate()
49
- self.alive = False
50
- print("[debug] instance terminated")
51
-
52
- def subprocess_worker(self, child_conn):
53
- # ⭐⭐ run in subprocess
54
- raise NotImplementedError
55
-
56
- def send_command(self, cmd):
57
- # ⭐ run in main process
58
- repeated = False
59
- if cmd == self.last_user_input:
60
- repeated = True
61
- cmd = ""
62
- else:
63
- self.last_user_input = cmd
64
- self.parent_conn.send(PipeCom("user_input", cmd))
65
- return repeated, cmd
66
-
67
- def immediate_showoff_when_possible(self, fp):
68
- # ⭐ 主进程
69
- # 获取fp的拓展名
70
- file_type = fp.split('.')[-1]
71
- # 如果是文本文件, 则直接显示文本内容
72
- if file_type.lower() in ['png', 'jpg']:
73
- image_path = os.path.abspath(fp)
74
- self.chatbot.append([
75
- '检测到新生图像:',
76
- f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
77
- ])
78
- yield from update_ui(chatbot=self.chatbot, history=self.history)
79
-
80
- def overwatch_workdir_file_change(self):
81
- # ⭐ 主进程 Docker 外挂文件夹监控
82
- path_to_overwatch = self.autogen_work_dir
83
- change_list = []
84
- # 扫描路径下的所有文件, 并与self.previous_work_dir_files中所记录的文件进行对比,
85
- # 如果有新文件出现,或者文件的修改时间发生变化,则更新self.previous_work_dir_files中
86
- # 把新文件和发生变化的文件的路径记录到 change_list 中
87
- for root, dirs, files in os.walk(path_to_overwatch):
88
- for file in files:
89
- file_path = os.path.join(root, file)
90
- if file_path not in self.previous_work_dir_files.keys():
91
- last_modified_time = os.stat(file_path).st_mtime
92
- self.previous_work_dir_files.update({file_path: last_modified_time})
93
- change_list.append(file_path)
94
- else:
95
- last_modified_time = os.stat(file_path).st_mtime
96
- if last_modified_time != self.previous_work_dir_files[file_path]:
97
- self.previous_work_dir_files[file_path] = last_modified_time
98
- change_list.append(file_path)
99
- if len(change_list) > 0:
100
- file_links = ""
101
- for f in change_list:
102
- res = promote_file_to_downloadzone(f)
103
- file_links += f'<br/><a href="file={res}" target="_blank">{res}</a>'
104
- yield from self.immediate_showoff_when_possible(f)
105
-
106
- self.chatbot.append(['检测到新生文档.', f'文档清单如下: {file_links}'])
107
- yield from update_ui(chatbot=self.chatbot, history=self.history)
108
- return change_list
109
-
110
-
111
- def main_process_ui_control(self, txt, create_or_resume) -> str:
112
- # ⭐ 主进程
113
- if create_or_resume == 'create':
114
- self.cnt = 1
115
- self.parent_conn = self.launch_subprocess_with_pipe() # ⭐⭐⭐
116
- repeated, cmd_to_autogen = self.send_command(txt)
117
- if txt == 'exit':
118
- self.chatbot.append([f"结束", "结束信号已明确,终止AutoGen程序。"])
119
- yield from update_ui(chatbot=self.chatbot, history=self.history)
120
- self.terminate()
121
- return "terminate"
122
-
123
- # patience = 10
124
-
125
- while True:
126
- time.sleep(0.5)
127
- if not self.alive:
128
- # the heartbeat watchdog might have it killed
129
- self.terminate()
130
- return "terminate"
131
- if self.parent_conn.poll():
132
- self.feed_heartbeat_watchdog()
133
- if "[GPT-Academic] 等待中" in self.chatbot[-1][-1]:
134
- self.chatbot.pop(-1) # remove the last line
135
- if "等待您的进一步指令" in self.chatbot[-1][-1]:
136
- self.chatbot.pop(-1) # remove the last line
137
- if '[GPT-Academic] 等待中' in self.chatbot[-1][-1]:
138
- self.chatbot.pop(-1) # remove the last line
139
- msg = self.parent_conn.recv() # PipeCom
140
- if msg.cmd == "done":
141
- self.chatbot.append([f"结束", msg.content])
142
- self.cnt += 1
143
- yield from update_ui(chatbot=self.chatbot, history=self.history)
144
- self.terminate()
145
- break
146
- if msg.cmd == "show":
147
- yield from self.overwatch_workdir_file_change()
148
- notice = ""
149
- if repeated: notice = "(自动忽略重复的输入)"
150
- self.chatbot.append([f"运行阶段-{self.cnt}(上次用户反馈输入为: 「{cmd_to_autogen}」{notice}", msg.content])
151
- self.cnt += 1
152
- yield from update_ui(chatbot=self.chatbot, history=self.history)
153
- if msg.cmd == "interact":
154
- yield from self.overwatch_workdir_file_change()
155
- self.chatbot.append([f"程序抵达用户反馈节点.", msg.content +
156
- "\n\n等待您的进一步指令." +
157
- "\n\n(1) 一般情况下您不需要说什么, 清空输入区, 然后直接点击“提交”以继续. " +
158
- "\n\n(2) 如果您需要补充些什么, 输入要反馈的内容, 直接点击“提交”以继续. " +
159
- "\n\n(3) 如果您想终止程序, 输入exit, 直接点击“提交”以终止AutoGen并解锁. "
160
- ])
161
- yield from update_ui(chatbot=self.chatbot, history=self.history)
162
- # do not terminate here, leave the subprocess_worker instance alive
163
- return "wait_feedback"
164
- else:
165
- self.feed_heartbeat_watchdog()
166
- if '[GPT-Academic] 等待中' not in self.chatbot[-1][-1]:
167
- # begin_waiting_time = time.time()
168
- self.chatbot.append(["[GPT-Academic] 等待AutoGen执行结果 ...", "[GPT-Academic] 等待中"])
169
- self.chatbot[-1] = [self.chatbot[-1][0], self.chatbot[-1][1].replace("[GPT-Academic] 等待中", "[GPT-Academic] 等待中.")]
170
- yield from update_ui(chatbot=self.chatbot, history=self.history)
171
- # if time.time() - begin_waiting_time > patience:
172
- # self.chatbot.append([f"结束", "等待超时, 终止AutoGen程序。"])
173
- # yield from update_ui(chatbot=self.chatbot, history=self.history)
174
- # self.terminate()
175
- # return "terminate"
176
-
177
- self.terminate()
178
- return "terminate"
179
-
180
- def subprocess_worker_wait_user_feedback(self, wait_msg="wait user feedback"):
181
- # ⭐⭐ run in subprocess
182
- patience = 5 * 60
183
- begin_waiting_time = time.time()
184
- self.child_conn.send(PipeCom("interact", wait_msg))
185
- while True:
186
- time.sleep(0.5)
187
- if self.child_conn.poll():
188
- wait_success = True
189
- break
190
- if time.time() - begin_waiting_time > patience:
191
- self.child_conn.send(PipeCom("done", ""))
192
- wait_success = False
193
- break
194
- return wait_success
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/agent_fns/watchdog.py DELETED
@@ -1,28 +0,0 @@
1
- import threading, time
2
-
3
- class WatchDog():
4
- def __init__(self, timeout, bark_fn, interval=3, msg="") -> None:
5
- self.last_feed = None
6
- self.timeout = timeout
7
- self.bark_fn = bark_fn
8
- self.interval = interval
9
- self.msg = msg
10
- self.kill_dog = False
11
-
12
- def watch(self):
13
- while True:
14
- if self.kill_dog: break
15
- if time.time() - self.last_feed > self.timeout:
16
- if len(self.msg) > 0: print(self.msg)
17
- self.bark_fn()
18
- break
19
- time.sleep(self.interval)
20
-
21
- def begin_watch(self):
22
- self.last_feed = time.time()
23
- th = threading.Thread(target=self.watch)
24
- th.daemon = True
25
- th.start()
26
-
27
- def feed(self):
28
- self.last_feed = time.time()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/chatglm微调工具.py CHANGED
@@ -32,7 +32,7 @@ def string_to_options(arguments):
32
  return args
33
 
34
  @CatchException
35
- def 微调数据集生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
36
  """
37
  txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
38
  llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
@@ -40,7 +40,7 @@ def 微调数据集生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
40
  chatbot 聊天显示框的句柄,用于显示给用户
41
  history 聊天历史,前情提要
42
  system_prompt 给gpt的静默提醒
43
- user_request 当前用户的请求信息(IP地址等)
44
  """
45
  history = [] # 清空历史,以免输入溢出
46
  chatbot.append(("这是什么功能?", "[Local Message] 微调数据集生成"))
@@ -80,7 +80,7 @@ def 微调数据集生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
80
 
81
 
82
  @CatchException
83
- def 启动微调(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
84
  """
85
  txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
86
  llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
@@ -88,7 +88,7 @@ def 启动微调(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt
88
  chatbot 聊天显示框的句柄,用于显示给用户
89
  history 聊天历史,前情提要
90
  system_prompt 给gpt的静默提醒
91
- user_request 当前用户的请求信息(IP地址等)
92
  """
93
  import subprocess
94
  history = [] # 清空历史,以免输入溢出
 
32
  return args
33
 
34
  @CatchException
35
+ def 微调数据集生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
36
  """
37
  txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
38
  llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
 
40
  chatbot 聊天显示框的句柄,用于显示给用户
41
  history 聊天历史,前情提要
42
  system_prompt 给gpt的静默提醒
43
+ web_port 当前软件运行的端口号
44
  """
45
  history = [] # 清空历史,以免输入溢出
46
  chatbot.append(("这是什么功能?", "[Local Message] 微调数据集生成"))
 
80
 
81
 
82
  @CatchException
83
+ def 启动微调(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
84
  """
85
  txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
86
  llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
 
88
  chatbot 聊天显示框的句柄,用于显示给用户
89
  history 聊天历史,前情提要
90
  system_prompt 给gpt的静默提醒
91
+ web_port 当前软件运行的端口号
92
  """
93
  import subprocess
94
  history = [] # 清空历史,以免输入溢出
crazy_functions/crazy_utils.py CHANGED
@@ -1,18 +1,18 @@
1
- from toolbox import update_ui, get_conf, trimmed_format_exc, get_max_token, Singleton
2
  import threading
3
  import os
4
  import logging
5
 
6
  def input_clipping(inputs, history, max_token_limit):
7
  import numpy as np
8
- from request_llms.bridge_all import model_info
9
  enc = model_info["gpt-3.5-turbo"]['tokenizer']
10
  def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
11
 
12
  mode = 'input-and-history'
13
  # 当 输入部分的token占比 小于 全文的一半时,只裁剪历史
14
  input_token_num = get_token_num(inputs)
15
- if input_token_num < max_token_limit//2:
16
  mode = 'only-history'
17
  max_token_limit = max_token_limit - input_token_num
18
 
@@ -21,7 +21,7 @@ def input_clipping(inputs, history, max_token_limit):
21
  n_token = get_token_num('\n'.join(everything))
22
  everything_token = [get_token_num(e) for e in everything]
23
  delta = max(everything_token) // 16 # 截断时的颗粒度
24
-
25
  while n_token > max_token_limit:
26
  where = np.argmax(everything_token)
27
  encoded = enc.encode(everything[where], disallowed_special=())
@@ -38,9 +38,9 @@ def input_clipping(inputs, history, max_token_limit):
38
  return inputs, history
39
 
40
  def request_gpt_model_in_new_thread_with_ui_alive(
41
- inputs, inputs_show_user, llm_kwargs,
42
  chatbot, history, sys_prompt, refresh_interval=0.2,
43
- handle_token_exceed=True,
44
  retry_times_at_unknown_error=2,
45
  ):
46
  """
@@ -63,21 +63,18 @@ def request_gpt_model_in_new_thread_with_ui_alive(
63
  """
64
  import time
65
  from concurrent.futures import ThreadPoolExecutor
66
- from request_llms.bridge_all import predict_no_ui_long_connection
67
  # 用户反馈
68
  chatbot.append([inputs_show_user, ""])
69
  yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
70
  executor = ThreadPoolExecutor(max_workers=16)
71
  mutable = ["", time.time(), ""]
72
- # 看门狗耐心
73
- watch_dog_patience = 5
74
- # 请求任务
75
  def _req_gpt(inputs, history, sys_prompt):
76
  retry_op = retry_times_at_unknown_error
77
  exceeded_cnt = 0
78
  while True:
79
  # watchdog error
80
- if len(mutable) >= 2 and (time.time()-mutable[1]) > watch_dog_patience:
81
  raise RuntimeError("检测到程序终止。")
82
  try:
83
  # 【第一种情况】:顺利完成
@@ -92,7 +89,7 @@ def request_gpt_model_in_new_thread_with_ui_alive(
92
  # 【选择处理】 尝试计算比例,尽可能多地保留文本
93
  from toolbox import get_reduce_token_percent
94
  p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
95
- MAX_TOKEN = get_max_token(llm_kwargs)
96
  EXCEED_ALLO = 512 + 512 * exceeded_cnt
97
  inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
98
  mutable[0] += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
@@ -139,13 +136,11 @@ def can_multi_process(llm):
139
  if llm.startswith('gpt-'): return True
140
  if llm.startswith('api2d-'): return True
141
  if llm.startswith('azure-'): return True
142
- if llm.startswith('spark'): return True
143
- if llm.startswith('zhipuai') or llm.startswith('glm-'): return True
144
  return False
145
 
146
  def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
147
- inputs_array, inputs_show_user_array, llm_kwargs,
148
- chatbot, history_array, sys_prompt_array,
149
  refresh_interval=0.2, max_workers=-1, scroller_max_len=30,
150
  handle_token_exceed=True, show_user_at_complete=False,
151
  retry_times_at_unknown_error=2,
@@ -179,17 +174,17 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
179
  """
180
  import time, random
181
  from concurrent.futures import ThreadPoolExecutor
182
- from request_llms.bridge_all import predict_no_ui_long_connection
183
  assert len(inputs_array) == len(history_array)
184
  assert len(inputs_array) == len(sys_prompt_array)
185
  if max_workers == -1: # 读取配置文件
186
- try: max_workers = get_conf('DEFAULT_WORKER_NUM')
187
  except: max_workers = 8
188
  if max_workers <= 0: max_workers = 3
189
  # 屏蔽掉 chatglm的多线程,可能会导致严重卡顿
190
  if not can_multi_process(llm_kwargs['llm_model']):
191
  max_workers = 1
192
-
193
  executor = ThreadPoolExecutor(max_workers=max_workers)
194
  n_frag = len(inputs_array)
195
  # 用户反馈
@@ -198,35 +193,33 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
198
  # 跨线程传递
199
  mutable = [["", time.time(), "等待中"] for _ in range(n_frag)]
200
 
201
- # 看门狗耐心
202
- watch_dog_patience = 5
203
-
204
  # 子线程任务
205
  def _req_gpt(index, inputs, history, sys_prompt):
206
  gpt_say = ""
207
  retry_op = retry_times_at_unknown_error
208
  exceeded_cnt = 0
209
  mutable[index][2] = "执行中"
210
- detect_timeout = lambda: len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > watch_dog_patience
211
  while True:
212
  # watchdog error
213
- if detect_timeout(): raise RuntimeError("检测到程序终止。")
 
214
  try:
215
  # 【第一种情况】:顺利完成
 
216
  gpt_say = predict_no_ui_long_connection(
217
- inputs=inputs, llm_kwargs=llm_kwargs, history=history,
218
  sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True
219
  )
220
  mutable[index][2] = "已成功"
221
  return gpt_say
222
  except ConnectionAbortedError as token_exceeded_error:
223
- # 【第二种情况】:Token溢出
224
  if handle_token_exceed:
225
  exceeded_cnt += 1
226
  # 【选择处理】 尝试计算比例,尽可能多地保留文本
227
  from toolbox import get_reduce_token_percent
228
  p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
229
- MAX_TOKEN = get_max_token(llm_kwargs)
230
  EXCEED_ALLO = 512 + 512 * exceeded_cnt
231
  inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
232
  gpt_say += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
@@ -241,12 +234,11 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
241
  return gpt_say # 放弃
242
  except:
243
  # 【第三种情况】:其他错误
244
- if detect_timeout(): raise RuntimeError("检测到程序终止。")
245
  tb_str = '```\n' + trimmed_format_exc() + '```'
246
  print(tb_str)
247
  gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
248
  if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
249
- if retry_op > 0:
250
  retry_op -= 1
251
  wait = random.randint(5, 20)
252
  if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
@@ -258,7 +250,6 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
258
  for i in range(wait):
259
  mutable[index][2] = f"{fail_info}等待重试 {wait-i}"; time.sleep(1)
260
  # 开始重试
261
- if detect_timeout(): raise RuntimeError("检测到程序终止。")
262
  mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}"
263
  continue # 返回重试
264
  else:
@@ -284,11 +275,12 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
284
  # 在前端打印些好玩的东西
285
  for thread_index, _ in enumerate(worker_done):
286
  print_something_really_funny = "[ ...`"+mutable[thread_index][0][-scroller_max_len:].\
287
- replace('\n', '').replace('`', '.').replace(' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
 
288
  observe_win.append(print_something_really_funny)
289
  # 在前端打印些好玩的东西
290
- stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
291
- if not done else f'`{mutable[thread_index][2]}`\n\n'
292
  for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)])
293
  # 在前端打印些好玩的东西
294
  chatbot[-1] = [chatbot[-1][0], f'多线���操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
@@ -302,17 +294,106 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
302
  for inputs_show_user, f in zip(inputs_show_user_array, futures):
303
  gpt_res = f.result()
304
  gpt_response_collection.extend([inputs_show_user, gpt_res])
305
-
306
  # 是否在结束时,在界面上显示结果
307
  if show_user_at_complete:
308
  for inputs_show_user, f in zip(inputs_show_user_array, futures):
309
  gpt_res = f.result()
310
  chatbot.append([inputs_show_user, gpt_res])
311
  yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
312
- time.sleep(0.5)
313
  return gpt_response_collection
314
 
315
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
317
  def read_and_clean_pdf_text(fp):
318
  """
@@ -352,7 +433,7 @@ def read_and_clean_pdf_text(fp):
352
  if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0
353
  fsize_statiscs[wtf['size']] += len(wtf['text'])
354
  return max(fsize_statiscs, key=fsize_statiscs.get)
355
-
356
  def ffsize_same(a,b):
357
  """
358
  提取字体大小是否近似相等
@@ -388,7 +469,7 @@ def read_and_clean_pdf_text(fp):
388
  if index == 0:
389
  page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
390
  '- ', '') for t in text_areas['blocks'] if 'lines' in t]
391
-
392
  ############################## <第 2 步,获取正文主字体> ##################################
393
  try:
394
  fsize_statiscs = {}
@@ -404,7 +485,7 @@ def read_and_clean_pdf_text(fp):
404
  mega_sec = []
405
  sec = []
406
  for index, line in enumerate(meta_line):
407
- if index == 0:
408
  sec.append(line[fc])
409
  continue
410
  if REMOVE_FOOT_NOTE:
@@ -465,9 +546,6 @@ def read_and_clean_pdf_text(fp):
465
  return True
466
  else:
467
  return False
468
- # 对于某些PDF会有第一个段落就以小写字母开头,为了避免索引错误将其更改为大写
469
- if starts_with_lowercase_word(meta_txt[0]):
470
- meta_txt[0] = meta_txt[0].capitalize()
471
  for _ in range(100):
472
  for index, block_txt in enumerate(meta_txt):
473
  if starts_with_lowercase_word(block_txt):
@@ -501,12 +579,12 @@ def get_files_from_everything(txt, type): # type='.md'
501
  """
502
  这个函数是用来获取指定目录下所有指定类型(如.md)的文件,并且对于网络上的文件,也可以获取它。
503
  下面是对每个参数和返回值的说明:
504
- 参数
505
- - txt: 路径或网址,表示要搜索的文件或者文件夹路径或网络上的文件。
506
  - type: 字符串,表示要搜索的文件类型。默认是.md。
507
- 返回值
508
- - success: 布尔值,表示函数是否成功执行。
509
- - file_manifest: 文件路径列表,里面包含以指定类型为后缀名的所有文件的绝对路径。
510
  - project_folder: 字符串,表示文件所在的文件夹路径。如果是网络上的文件,就是临时文件夹的路径。
511
  该函数详细注释已添加,请确认是否满足您的需要。
512
  """
@@ -518,7 +596,7 @@ def get_files_from_everything(txt, type): # type='.md'
518
  import requests
519
  from toolbox import get_conf
520
  from toolbox import get_log_folder, gen_time_str
521
- proxies = get_conf('proxies')
522
  try:
523
  r = requests.get(txt, proxies=proxies)
524
  except:
@@ -546,6 +624,90 @@ def get_files_from_everything(txt, type): # type='.md'
546
 
547
 
548
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
549
  @Singleton
550
  class nougat_interface():
551
  def __init__(self):
@@ -553,10 +715,8 @@ class nougat_interface():
553
 
554
  def nougat_with_timeout(self, command, cwd, timeout=3600):
555
  import subprocess
556
- from toolbox import ProxyNetworkActivate
557
  logging.info(f'正在执行命令 {command}')
558
- with ProxyNetworkActivate("Nougat_Download"):
559
- process = subprocess.Popen(command, shell=True, cwd=cwd, env=os.environ)
560
  try:
561
  stdout, stderr = process.communicate(timeout=timeout)
562
  except subprocess.TimeoutExpired:
@@ -570,7 +730,7 @@ class nougat_interface():
570
  def NOUGAT_parse_pdf(self, fp, chatbot, history):
571
  from toolbox import update_ui_lastest_msg
572
 
573
- yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在排队, 等待线程锁...",
574
  chatbot=chatbot, history=history, delay=0)
575
  self.threadLock.acquire()
576
  import glob, threading, os
@@ -578,7 +738,7 @@ class nougat_interface():
578
  dst = os.path.join(get_log_folder(plugin_name='nougat'), gen_time_str())
579
  os.makedirs(dst)
580
 
581
- yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在加载NOUGAT... (提示:首次运行需要花费较长时间下载NOUGAT参数)",
582
  chatbot=chatbot, history=history, delay=0)
583
  self.nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}"', os.getcwd(), timeout=3600)
584
  res = glob.glob(os.path.join(dst,'*.mmd'))
@@ -601,8 +761,49 @@ def try_install_deps(deps, reload_m=[]):
601
  importlib.reload(__import__(m))
602
 
603
 
604
- def get_plugin_arg(plugin_kwargs, key, default):
605
- # 如果参数是空的
606
- if (key in plugin_kwargs) and (plugin_kwargs[key] == ""): plugin_kwargs.pop(key)
607
- # 正常情况
608
- return plugin_kwargs.get(key, default)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import update_ui, get_conf, trimmed_format_exc, get_log_folder
2
  import threading
3
  import os
4
  import logging
5
 
6
  def input_clipping(inputs, history, max_token_limit):
7
  import numpy as np
8
+ from request_llm.bridge_all import model_info
9
  enc = model_info["gpt-3.5-turbo"]['tokenizer']
10
  def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
11
 
12
  mode = 'input-and-history'
13
  # 当 输入部分的token占比 小于 全文的一半时,只裁剪历史
14
  input_token_num = get_token_num(inputs)
15
+ if input_token_num < max_token_limit//2:
16
  mode = 'only-history'
17
  max_token_limit = max_token_limit - input_token_num
18
 
 
21
  n_token = get_token_num('\n'.join(everything))
22
  everything_token = [get_token_num(e) for e in everything]
23
  delta = max(everything_token) // 16 # 截断时的颗粒度
24
+
25
  while n_token > max_token_limit:
26
  where = np.argmax(everything_token)
27
  encoded = enc.encode(everything[where], disallowed_special=())
 
38
  return inputs, history
39
 
40
  def request_gpt_model_in_new_thread_with_ui_alive(
41
+ inputs, inputs_show_user, llm_kwargs,
42
  chatbot, history, sys_prompt, refresh_interval=0.2,
43
+ handle_token_exceed=True,
44
  retry_times_at_unknown_error=2,
45
  ):
46
  """
 
63
  """
64
  import time
65
  from concurrent.futures import ThreadPoolExecutor
66
+ from request_llm.bridge_all import predict_no_ui_long_connection
67
  # 用户反馈
68
  chatbot.append([inputs_show_user, ""])
69
  yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
70
  executor = ThreadPoolExecutor(max_workers=16)
71
  mutable = ["", time.time(), ""]
 
 
 
72
  def _req_gpt(inputs, history, sys_prompt):
73
  retry_op = retry_times_at_unknown_error
74
  exceeded_cnt = 0
75
  while True:
76
  # watchdog error
77
+ if len(mutable) >= 2 and (time.time()-mutable[1]) > 5:
78
  raise RuntimeError("检测到程序终止。")
79
  try:
80
  # 【第一种情况】:顺利完成
 
89
  # 【选择处理】 尝试计算比例,尽可能多地保留文本
90
  from toolbox import get_reduce_token_percent
91
  p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
92
+ MAX_TOKEN = 4096
93
  EXCEED_ALLO = 512 + 512 * exceeded_cnt
94
  inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
95
  mutable[0] += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
 
136
  if llm.startswith('gpt-'): return True
137
  if llm.startswith('api2d-'): return True
138
  if llm.startswith('azure-'): return True
 
 
139
  return False
140
 
141
  def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
142
+ inputs_array, inputs_show_user_array, llm_kwargs,
143
+ chatbot, history_array, sys_prompt_array,
144
  refresh_interval=0.2, max_workers=-1, scroller_max_len=30,
145
  handle_token_exceed=True, show_user_at_complete=False,
146
  retry_times_at_unknown_error=2,
 
174
  """
175
  import time, random
176
  from concurrent.futures import ThreadPoolExecutor
177
+ from request_llm.bridge_all import predict_no_ui_long_connection
178
  assert len(inputs_array) == len(history_array)
179
  assert len(inputs_array) == len(sys_prompt_array)
180
  if max_workers == -1: # 读取配置文件
181
+ try: max_workers, = get_conf('DEFAULT_WORKER_NUM')
182
  except: max_workers = 8
183
  if max_workers <= 0: max_workers = 3
184
  # 屏蔽掉 chatglm的多线程,可能会导致严重卡顿
185
  if not can_multi_process(llm_kwargs['llm_model']):
186
  max_workers = 1
187
+
188
  executor = ThreadPoolExecutor(max_workers=max_workers)
189
  n_frag = len(inputs_array)
190
  # 用户反馈
 
193
  # 跨线程传递
194
  mutable = [["", time.time(), "等待中"] for _ in range(n_frag)]
195
 
 
 
 
196
  # 子线程任务
197
  def _req_gpt(index, inputs, history, sys_prompt):
198
  gpt_say = ""
199
  retry_op = retry_times_at_unknown_error
200
  exceeded_cnt = 0
201
  mutable[index][2] = "执行中"
 
202
  while True:
203
  # watchdog error
204
+ if len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > 5:
205
+ raise RuntimeError("检测到程序终止。")
206
  try:
207
  # 【第一种情况】:顺利完成
208
+ # time.sleep(10); raise RuntimeError("测试")
209
  gpt_say = predict_no_ui_long_connection(
210
+ inputs=inputs, llm_kwargs=llm_kwargs, history=history,
211
  sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True
212
  )
213
  mutable[index][2] = "已成功"
214
  return gpt_say
215
  except ConnectionAbortedError as token_exceeded_error:
216
+ # 【第二种情况】:Token溢出,
217
  if handle_token_exceed:
218
  exceeded_cnt += 1
219
  # 【选择处理】 尝试计算比例,尽可能多地保留文本
220
  from toolbox import get_reduce_token_percent
221
  p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
222
+ MAX_TOKEN = 4096
223
  EXCEED_ALLO = 512 + 512 * exceeded_cnt
224
  inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
225
  gpt_say += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
 
234
  return gpt_say # 放弃
235
  except:
236
  # 【第三种情况】:其他错误
 
237
  tb_str = '```\n' + trimmed_format_exc() + '```'
238
  print(tb_str)
239
  gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
240
  if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
241
+ if retry_op > 0:
242
  retry_op -= 1
243
  wait = random.randint(5, 20)
244
  if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
 
250
  for i in range(wait):
251
  mutable[index][2] = f"{fail_info}等待重试 {wait-i}"; time.sleep(1)
252
  # 开始重试
 
253
  mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}"
254
  continue # 返回重试
255
  else:
 
275
  # 在前端打印些好玩的东西
276
  for thread_index, _ in enumerate(worker_done):
277
  print_something_really_funny = "[ ...`"+mutable[thread_index][0][-scroller_max_len:].\
278
+ replace('\n', '').replace('```', '...').replace(
279
+ ' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
280
  observe_win.append(print_something_really_funny)
281
  # 在前端打印些好玩的东西
282
+ stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
283
+ if not done else f'`{mutable[thread_index][2]}`\n\n'
284
  for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)])
285
  # 在前端打印些好玩的东西
286
  chatbot[-1] = [chatbot[-1][0], f'多线���操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
 
294
  for inputs_show_user, f in zip(inputs_show_user_array, futures):
295
  gpt_res = f.result()
296
  gpt_response_collection.extend([inputs_show_user, gpt_res])
297
+
298
  # 是否在结束时,在界面上显示结果
299
  if show_user_at_complete:
300
  for inputs_show_user, f in zip(inputs_show_user_array, futures):
301
  gpt_res = f.result()
302
  chatbot.append([inputs_show_user, gpt_res])
303
  yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
304
+ time.sleep(0.3)
305
  return gpt_response_collection
306
 
307
 
308
+ def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
309
+ def cut(txt_tocut, must_break_at_empty_line): # 递归
310
+ if get_token_fn(txt_tocut) <= limit:
311
+ return [txt_tocut]
312
+ else:
313
+ lines = txt_tocut.split('\n')
314
+ estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
315
+ estimated_line_cut = int(estimated_line_cut)
316
+ for cnt in reversed(range(estimated_line_cut)):
317
+ if must_break_at_empty_line:
318
+ if lines[cnt] != "":
319
+ continue
320
+ print(cnt)
321
+ prev = "\n".join(lines[:cnt])
322
+ post = "\n".join(lines[cnt:])
323
+ if get_token_fn(prev) < limit:
324
+ break
325
+ if cnt == 0:
326
+ raise RuntimeError("存在一行极长的文本!")
327
+ # print(len(post))
328
+ # 列表递归接龙
329
+ result = [prev]
330
+ result.extend(cut(post, must_break_at_empty_line))
331
+ return result
332
+ try:
333
+ return cut(txt, must_break_at_empty_line=True)
334
+ except RuntimeError:
335
+ return cut(txt, must_break_at_empty_line=False)
336
+
337
+
338
+ def force_breakdown(txt, limit, get_token_fn):
339
+ """
340
+ 当无法用标点、空行分割时,我们用最暴力的方法切割
341
+ """
342
+ for i in reversed(range(len(txt))):
343
+ if get_token_fn(txt[:i]) < limit:
344
+ return txt[:i], txt[i:]
345
+ return "Tiktoken未知错误", "Tiktoken未知错误"
346
+
347
+ def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
348
+ # 递归
349
+ def cut(txt_tocut, must_break_at_empty_line, break_anyway=False):
350
+ if get_token_fn(txt_tocut) <= limit:
351
+ return [txt_tocut]
352
+ else:
353
+ lines = txt_tocut.split('\n')
354
+ estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
355
+ estimated_line_cut = int(estimated_line_cut)
356
+ cnt = 0
357
+ for cnt in reversed(range(estimated_line_cut)):
358
+ if must_break_at_empty_line:
359
+ if lines[cnt] != "":
360
+ continue
361
+ prev = "\n".join(lines[:cnt])
362
+ post = "\n".join(lines[cnt:])
363
+ if get_token_fn(prev) < limit:
364
+ break
365
+ if cnt == 0:
366
+ if break_anyway:
367
+ prev, post = force_breakdown(txt_tocut, limit, get_token_fn)
368
+ else:
369
+ raise RuntimeError(f"存在一行极长的文本!{txt_tocut}")
370
+ # print(len(post))
371
+ # 列表递归接龙
372
+ result = [prev]
373
+ result.extend(cut(post, must_break_at_empty_line, break_anyway=break_anyway))
374
+ return result
375
+ try:
376
+ # 第1次尝试,将双空行(\n\n)作为切分点
377
+ return cut(txt, must_break_at_empty_line=True)
378
+ except RuntimeError:
379
+ try:
380
+ # 第2次尝试,将单空行(\n)作为切分点
381
+ return cut(txt, must_break_at_empty_line=False)
382
+ except RuntimeError:
383
+ try:
384
+ # 第3次尝试,将英文句号(.)作为切分点
385
+ res = cut(txt.replace('.', '。\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在
386
+ return [r.replace('。\n', '.') for r in res]
387
+ except RuntimeError as e:
388
+ try:
389
+ # 第4次尝试,将中文句号(。)作为切分点
390
+ res = cut(txt.replace('。', '。。\n'), must_break_at_empty_line=False)
391
+ return [r.replace('。。\n', '。') for r in res]
392
+ except RuntimeError as e:
393
+ # 第5次尝试,没办法了,随便切一下敷衍吧
394
+ return cut(txt, must_break_at_empty_line=False, break_anyway=True)
395
+
396
+
397
 
398
  def read_and_clean_pdf_text(fp):
399
  """
 
433
  if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0
434
  fsize_statiscs[wtf['size']] += len(wtf['text'])
435
  return max(fsize_statiscs, key=fsize_statiscs.get)
436
+
437
  def ffsize_same(a,b):
438
  """
439
  提取字体大小是否近似相等
 
469
  if index == 0:
470
  page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
471
  '- ', '') for t in text_areas['blocks'] if 'lines' in t]
472
+
473
  ############################## <第 2 步,获取正文主字体> ##################################
474
  try:
475
  fsize_statiscs = {}
 
485
  mega_sec = []
486
  sec = []
487
  for index, line in enumerate(meta_line):
488
+ if index == 0:
489
  sec.append(line[fc])
490
  continue
491
  if REMOVE_FOOT_NOTE:
 
546
  return True
547
  else:
548
  return False
 
 
 
549
  for _ in range(100):
550
  for index, block_txt in enumerate(meta_txt):
551
  if starts_with_lowercase_word(block_txt):
 
579
  """
580
  这个函数是用来获取指定目录下所有指定类型(如.md)的文件,并且对于网络上的文件,也可以获取它。
581
  下面是对每个参数和返回值的说明:
582
+ 参数
583
+ - txt: 路径或网址,表示要搜索的文件或者文件夹路径或网络上的文件。
584
  - type: 字符串,表示要搜索的文件类型。默认是.md。
585
+ 返回值
586
+ - success: 布尔值,表示函数是否成功执行。
587
+ - file_manifest: 文件路径列表,里面包含以指定类型为后缀名的所有文件的绝对路径。
588
  - project_folder: 字符串,表示文件所在的文件夹路径。如果是网络上的文件,就是临时文件夹的路径。
589
  该函数详细注释已添加,请确认是否满足您的需要。
590
  """
 
596
  import requests
597
  from toolbox import get_conf
598
  from toolbox import get_log_folder, gen_time_str
599
+ proxies, = get_conf('proxies')
600
  try:
601
  r = requests.get(txt, proxies=proxies)
602
  except:
 
624
 
625
 
626
 
627
+
628
+ def Singleton(cls):
629
+ _instance = {}
630
+
631
+ def _singleton(*args, **kargs):
632
+ if cls not in _instance:
633
+ _instance[cls] = cls(*args, **kargs)
634
+ return _instance[cls]
635
+
636
+ return _singleton
637
+
638
+
639
+ @Singleton
640
+ class knowledge_archive_interface():
641
+ def __init__(self) -> None:
642
+ self.threadLock = threading.Lock()
643
+ self.current_id = ""
644
+ self.kai_path = None
645
+ self.qa_handle = None
646
+ self.text2vec_large_chinese = None
647
+
648
+ def get_chinese_text2vec(self):
649
+ if self.text2vec_large_chinese is None:
650
+ # < -------------------预热文本向量化模组--------------- >
651
+ from toolbox import ProxyNetworkActivate
652
+ print('Checking Text2vec ...')
653
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
654
+ with ProxyNetworkActivate(): # 临时地激活代理网络
655
+ self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
656
+
657
+ return self.text2vec_large_chinese
658
+
659
+
660
+ def feed_archive(self, file_manifest, id="default"):
661
+ self.threadLock.acquire()
662
+ # import uuid
663
+ self.current_id = id
664
+ from zh_langchain import construct_vector_store
665
+ self.qa_handle, self.kai_path = construct_vector_store(
666
+ vs_id=self.current_id,
667
+ files=file_manifest,
668
+ sentence_size=100,
669
+ history=[],
670
+ one_conent="",
671
+ one_content_segmentation="",
672
+ text2vec = self.get_chinese_text2vec(),
673
+ )
674
+ self.threadLock.release()
675
+
676
+ def get_current_archive_id(self):
677
+ return self.current_id
678
+
679
+ def get_loaded_file(self):
680
+ return self.qa_handle.get_loaded_file()
681
+
682
+ def answer_with_archive_by_id(self, txt, id):
683
+ self.threadLock.acquire()
684
+ if not self.current_id == id:
685
+ self.current_id = id
686
+ from zh_langchain import construct_vector_store
687
+ self.qa_handle, self.kai_path = construct_vector_store(
688
+ vs_id=self.current_id,
689
+ files=[],
690
+ sentence_size=100,
691
+ history=[],
692
+ one_conent="",
693
+ one_content_segmentation="",
694
+ text2vec = self.get_chinese_text2vec(),
695
+ )
696
+ VECTOR_SEARCH_SCORE_THRESHOLD = 0
697
+ VECTOR_SEARCH_TOP_K = 4
698
+ CHUNK_SIZE = 512
699
+ resp, prompt = self.qa_handle.get_knowledge_based_conent_test(
700
+ query = txt,
701
+ vs_path = self.kai_path,
702
+ score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
703
+ vector_search_top_k=VECTOR_SEARCH_TOP_K,
704
+ chunk_conent=True,
705
+ chunk_size=CHUNK_SIZE,
706
+ text2vec = self.get_chinese_text2vec(),
707
+ )
708
+ self.threadLock.release()
709
+ return resp, prompt
710
+
711
  @Singleton
712
  class nougat_interface():
713
  def __init__(self):
 
715
 
716
  def nougat_with_timeout(self, command, cwd, timeout=3600):
717
  import subprocess
 
718
  logging.info(f'正在执行命令 {command}')
719
+ process = subprocess.Popen(command, shell=True, cwd=cwd)
 
720
  try:
721
  stdout, stderr = process.communicate(timeout=timeout)
722
  except subprocess.TimeoutExpired:
 
730
  def NOUGAT_parse_pdf(self, fp, chatbot, history):
731
  from toolbox import update_ui_lastest_msg
732
 
733
+ yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在排队, 等待线程锁...",
734
  chatbot=chatbot, history=history, delay=0)
735
  self.threadLock.acquire()
736
  import glob, threading, os
 
738
  dst = os.path.join(get_log_folder(plugin_name='nougat'), gen_time_str())
739
  os.makedirs(dst)
740
 
741
+ yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在加载NOUGAT... (提示:首次运行需要花费较长时间下载NOUGAT参数)",
742
  chatbot=chatbot, history=history, delay=0)
743
  self.nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}"', os.getcwd(), timeout=3600)
744
  res = glob.glob(os.path.join(dst,'*.mmd'))
 
761
  importlib.reload(__import__(m))
762
 
763
 
764
+ HTML_CSS = """
765
+ .row {
766
+ display: flex;
767
+ flex-wrap: wrap;
768
+ }
769
+ .column {
770
+ flex: 1;
771
+ padding: 10px;
772
+ }
773
+ .table-header {
774
+ font-weight: bold;
775
+ border-bottom: 1px solid black;
776
+ }
777
+ .table-row {
778
+ border-bottom: 1px solid lightgray;
779
+ }
780
+ .table-cell {
781
+ padding: 5px;
782
+ }
783
+ """
784
+
785
+ TABLE_CSS = """
786
+ <div class="row table-row">
787
+ <div class="column table-cell">REPLACE_A</div>
788
+ <div class="column table-cell">REPLACE_B</div>
789
+ </div>
790
+ """
791
+
792
+ class construct_html():
793
+ def __init__(self) -> None:
794
+ self.css = HTML_CSS
795
+ self.html_string = f'<!DOCTYPE html><head><meta charset="utf-8"><title>翻译结果</title><style>{self.css}</style></head>'
796
+
797
+
798
+ def add_row(self, a, b):
799
+ tmp = TABLE_CSS
800
+ from toolbox import markdown_convertion
801
+ tmp = tmp.replace('REPLACE_A', markdown_convertion(a))
802
+ tmp = tmp.replace('REPLACE_B', markdown_convertion(b))
803
+ self.html_string += tmp
804
+
805
+
806
+ def save_file(self, file_name):
807
+ with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f:
808
+ f.write(self.html_string.encode('utf-8', 'ignore').decode())
809
+ return os.path.join(get_log_folder(), file_name)
crazy_functions/diagram_fns/file_tree.py DELETED
@@ -1,122 +0,0 @@
1
- import os
2
- from textwrap import indent
3
-
4
- class FileNode:
5
- def __init__(self, name):
6
- self.name = name
7
- self.children = []
8
- self.is_leaf = False
9
- self.level = 0
10
- self.parenting_ship = []
11
- self.comment = ""
12
- self.comment_maxlen_show = 50
13
-
14
- @staticmethod
15
- def add_linebreaks_at_spaces(string, interval=10):
16
- return '\n'.join(string[i:i+interval] for i in range(0, len(string), interval))
17
-
18
- def sanitize_comment(self, comment):
19
- if len(comment) > self.comment_maxlen_show: suf = '...'
20
- else: suf = ''
21
- comment = comment[:self.comment_maxlen_show]
22
- comment = comment.replace('\"', '').replace('`', '').replace('\n', '').replace('`', '').replace('$', '')
23
- comment = self.add_linebreaks_at_spaces(comment, 10)
24
- return '`' + comment + suf + '`'
25
-
26
- def add_file(self, file_path, file_comment):
27
- directory_names, file_name = os.path.split(file_path)
28
- current_node = self
29
- level = 1
30
- if directory_names == "":
31
- new_node = FileNode(file_name)
32
- current_node.children.append(new_node)
33
- new_node.is_leaf = True
34
- new_node.comment = self.sanitize_comment(file_comment)
35
- new_node.level = level
36
- current_node = new_node
37
- else:
38
- dnamesplit = directory_names.split(os.sep)
39
- for i, directory_name in enumerate(dnamesplit):
40
- found_child = False
41
- level += 1
42
- for child in current_node.children:
43
- if child.name == directory_name:
44
- current_node = child
45
- found_child = True
46
- break
47
- if not found_child:
48
- new_node = FileNode(directory_name)
49
- current_node.children.append(new_node)
50
- new_node.level = level - 1
51
- current_node = new_node
52
- term = FileNode(file_name)
53
- term.level = level
54
- term.comment = self.sanitize_comment(file_comment)
55
- term.is_leaf = True
56
- current_node.children.append(term)
57
-
58
- def print_files_recursively(self, level=0, code="R0"):
59
- print(' '*level + self.name + ' ' + str(self.is_leaf) + ' ' + str(self.level))
60
- for j, child in enumerate(self.children):
61
- child.print_files_recursively(level=level+1, code=code+str(j))
62
- self.parenting_ship.extend(child.parenting_ship)
63
- p1 = f"""{code}[\"🗎{self.name}\"]""" if self.is_leaf else f"""{code}[[\"📁{self.name}\"]]"""
64
- p2 = """ --> """
65
- p3 = f"""{code+str(j)}[\"🗎{child.name}\"]""" if child.is_leaf else f"""{code+str(j)}[[\"📁{child.name}\"]]"""
66
- edge_code = p1 + p2 + p3
67
- if edge_code in self.parenting_ship:
68
- continue
69
- self.parenting_ship.append(edge_code)
70
- if self.comment != "":
71
- pc1 = f"""{code}[\"🗎{self.name}\"]""" if self.is_leaf else f"""{code}[[\"📁{self.name}\"]]"""
72
- pc2 = f""" -.-x """
73
- pc3 = f"""C{code}[\"{self.comment}\"]:::Comment"""
74
- edge_code = pc1 + pc2 + pc3
75
- self.parenting_ship.append(edge_code)
76
-
77
-
78
- MERMAID_TEMPLATE = r"""
79
- ```mermaid
80
- flowchart LR
81
- %% <gpt_academic_hide_mermaid_code> 一个特殊标记,用于在生成mermaid图表时隐藏代码块
82
- classDef Comment stroke-dasharray: 5 5
83
- subgraph {graph_name}
84
- {relationship}
85
- end
86
- ```
87
- """
88
-
89
- def build_file_tree_mermaid_diagram(file_manifest, file_comments, graph_name):
90
- # Create the root node
91
- file_tree_struct = FileNode("root")
92
- # Build the tree structure
93
- for file_path, file_comment in zip(file_manifest, file_comments):
94
- file_tree_struct.add_file(file_path, file_comment)
95
- file_tree_struct.print_files_recursively()
96
- cc = "\n".join(file_tree_struct.parenting_ship)
97
- ccc = indent(cc, prefix=" "*8)
98
- return MERMAID_TEMPLATE.format(graph_name=graph_name, relationship=ccc)
99
-
100
- if __name__ == "__main__":
101
- # File manifest
102
- file_manifest = [
103
- "cradle_void_terminal.ipynb",
104
- "tests/test_utils.py",
105
- "tests/test_plugins.py",
106
- "tests/test_llms.py",
107
- "config.py",
108
- "build/ChatGLM-6b-onnx-u8s8/chatglm-6b-int8-onnx-merged/model_weights_0.bin",
109
- "crazy_functions/latex_fns/latex_actions.py",
110
- "crazy_functions/latex_fns/latex_toolbox.py"
111
- ]
112
- file_comments = [
113
- "根据位置和名称,可能是一个模块的初始化文件根据位置和名称,可能是一个模块的初始化文件根据位置和名称,可能是一个模块的初始化文件",
114
- "包含一些用于文本处理和模型微调的函数和装饰器包含一些用于文本处理和模型微调的函数和装饰器包含一些用于文本处理和模型微调的函数和装饰器",
115
- "用于构建HTML报告的类和方法用于构建HTML报告的类和方法��于构建HTML报告的类和方法",
116
- "包含了用于文本切分的函数,以及处理PDF文件的示例代码包含了用于文本切分的函数,以及处理PDF文件的示例代码包含了用于文本切分的函数,以及处理PDF文件的示例代码",
117
- "用于解析和翻译PDF文件的功能和相关辅助函数用于解析和翻译PDF文件的功能和相关辅助函数用于解析和翻译PDF文件的功能和相关辅助函数",
118
- "是一个包的初始化文件,用于初始化包的属性和导入模块是一个包的初始化文件,用于初始化包的属性和导入模块是一个包的初始化文件,用于初始化包的属性和导入模块",
119
- "用于加载和分割文件中的文本的通用文件加载器用于加载和分割文件中的文本的通用文件加载器用于加载和分割文件中的文本的通用文件加载器",
120
- "包含了用于构建和管理向量数据库的函数和类包含了用于构建和管理向量数据库的函数和类包含了用于构建和管理向量数据库的函数和类",
121
- ]
122
- print(build_file_tree_mermaid_diagram(file_manifest, file_comments, "项目文件树"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/game_fns/game_ascii_art.py DELETED
@@ -1,42 +0,0 @@
1
- from toolbox import CatchException, update_ui, update_ui_lastest_msg
2
- from crazy_functions.multi_stage.multi_stage_utils import GptAcademicGameBaseState
3
- from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
4
- from request_llms.bridge_all import predict_no_ui_long_connection
5
- from crazy_functions.game_fns.game_utils import get_code_block, is_same_thing
6
- import random
7
-
8
-
9
- class MiniGame_ASCII_Art(GptAcademicGameBaseState):
10
- def step(self, prompt, chatbot, history):
11
- if self.step_cnt == 0:
12
- chatbot.append(["我画你猜(动物)", "请稍等..."])
13
- else:
14
- if prompt.strip() == 'exit':
15
- self.delete_game = True
16
- yield from update_ui_lastest_msg(lastmsg=f"谜底是{self.obj},游戏结束。", chatbot=chatbot, history=history, delay=0.)
17
- return
18
- chatbot.append([prompt, ""])
19
- yield from update_ui(chatbot=chatbot, history=history)
20
-
21
- if self.step_cnt == 0:
22
- self.lock_plugin(chatbot)
23
- self.cur_task = 'draw'
24
-
25
- if self.cur_task == 'draw':
26
- avail_obj = ["狗","猫","鸟","鱼","老鼠","蛇"]
27
- self.obj = random.choice(avail_obj)
28
- inputs = "I want to play a game called Guess the ASCII art. You can draw the ASCII art and I will try to guess it. " + \
29
- f"This time you draw a {self.obj}. Note that you must not indicate what you have draw in the text, and you should only produce the ASCII art wrapped by ```. "
30
- raw_res = predict_no_ui_long_connection(inputs=inputs, llm_kwargs=self.llm_kwargs, history=[], sys_prompt="")
31
- self.cur_task = 'identify user guess'
32
- res = get_code_block(raw_res)
33
- history += ['', f'the answer is {self.obj}', inputs, res]
34
- yield from update_ui_lastest_msg(lastmsg=res, chatbot=chatbot, history=history, delay=0.)
35
-
36
- elif self.cur_task == 'identify user guess':
37
- if is_same_thing(self.obj, prompt, self.llm_kwargs):
38
- self.delete_game = True
39
- yield from update_ui_lastest_msg(lastmsg="你猜对了!", chatbot=chatbot, history=history, delay=0.)
40
- else:
41
- self.cur_task = 'identify user guess'
42
- yield from update_ui_lastest_msg(lastmsg="猜错了,再试试,输入“exit”获取答案。", chatbot=chatbot, history=history, delay=0.)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/game_fns/game_interactive_story.py DELETED
@@ -1,212 +0,0 @@
1
- prompts_hs = """ 请以“{headstart}”为开头,编写一个小说的第一幕。
2
-
3
- - 尽量短,不要包含太多情节,因为你接下来将会与用户互动续写下面的情节,要留出足够的互动空间。
4
- - 出现人物时,给出人物的名字。
5
- - 积极地运用环境描写、人物描写等手法,让读者能够感受到你的故事世界。
6
- - 积极地运用修辞手法,比如比喻、拟人、排比、对偶、夸张等等。
7
- - 字数要求:第一幕的字数少于300字,且少于2个段落。
8
- """
9
-
10
- prompts_interact = """ 小说的前文回顾:
11
-
12
- {previously_on_story}
13
-
14
-
15
- 你是一个作家,根据以上的情节,给出4种不同的后续剧情发展方向,每个发展方向都精明扼要地用一句话说明。稍后,我将在这4个选择中,挑选一种剧情发展。
16
-
17
- 输出格式例如:
18
- 1. 后续剧情发展1
19
- 2. 后续剧情发展2
20
- 3. 后续剧情发展3
21
- 4. 后续剧情发展4
22
- """
23
-
24
-
25
- prompts_resume = """小说的前文回顾:
26
-
27
- {previously_on_story}
28
-
29
-
30
- 你是一个作家,我们正在互相讨论,确定后续剧情的发展。
31
- 在以下的剧情发展中,
32
-
33
- {choice}
34
-
35
- 我认为更合理的是:{user_choice}。
36
- 请在前文的基础上(不要重复前文),围绕我选定的剧情情节,编写小说的下一幕。
37
-
38
- - 禁止杜撰不符合我选择的剧情。
39
- - 尽量短,不要包含太多情节,因为你接下来将会与用户互动续写下面的情节,要留出足够的互动空间。
40
- - 不要重复前文。
41
- - 出现人物时,给出人物的名字。
42
- - 积极地运用环境描写、人物描写等手法,让读者能够感受到你的故事世界。
43
- - 积极地运用修辞手法,比如比喻、拟人、排比、对偶、夸张等等。
44
- - 小说的下一幕字数少于300字,且少于2个段落。
45
- """
46
-
47
-
48
- prompts_terminate = """小说的前文回顾:
49
-
50
- {previously_on_story}
51
-
52
-
53
- 你是一个作家,我们正在互相讨论,确定后续剧情的发展。
54
- 现在,故事该结束了,我认为最合理的故事结局是:{user_choice}。
55
-
56
- 请在前文的基础上(不要重复前文),编写小说的最后一幕。
57
-
58
- - 不要重复前文。
59
- - 出现人物时,给出人物的名字。
60
- - 积极地运用环境描写、人物描写等手法,让读者能够感受到你的故事世界。
61
- - 积极地运用修辞手法,比如比喻、拟人、排比、对偶、夸张等等。
62
- - 字数要求:最后一幕的字数少于1000字。
63
- """
64
-
65
-
66
- from toolbox import CatchException, update_ui, update_ui_lastest_msg
67
- from crazy_functions.multi_stage.multi_stage_utils import GptAcademicGameBaseState
68
- from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
69
- from request_llms.bridge_all import predict_no_ui_long_connection
70
- from crazy_functions.game_fns.game_utils import get_code_block, is_same_thing
71
- import random
72
-
73
-
74
- class MiniGame_ResumeStory(GptAcademicGameBaseState):
75
- story_headstart = [
76
- '先行者知道,他现在是全宇宙中唯一的一个人了。',
77
- '深夜,一个年轻人穿过天安门广场向纪念堂走去。在二十二世纪编年史中,计算机把他的代号定为M102。',
78
- '他知道,这最后一课要提前讲了。又一阵剧痛从肝部袭来,几乎使他晕厥过去。',
79
- '在距地球五万光年的远方,在银河系的中心,一场延续了两万年的星际战争已接近尾声。那里的太空中渐渐隐现出一个方形区域,仿佛灿烂的群星的背景被剪出一个方口。',
80
- '伊依一行三人乘坐一艘游艇在南太平洋上做吟诗航行,他们的目的地是南极,如果几天后能顺利到达那里,他们将钻出地壳去看诗云。',
81
- '很多人生来就会莫名其妙地迷上一样东西,仿佛他的出生就是要和这东西约会似的,正是这样,圆圆迷上了肥皂泡。'
82
- ]
83
-
84
-
85
- def begin_game_step_0(self, prompt, chatbot, history):
86
- # init game at step 0
87
- self.headstart = random.choice(self.story_headstart)
88
- self.story = []
89
- chatbot.append(["互动写故事", f"这次的故事开头是:{self.headstart}"])
90
- self.sys_prompt_ = '你是一个想象力丰富的杰出作家。正在与你的朋友互动,一起写故事,因此你每次写的故事段落应少于300字(结局除外)。'
91
-
92
-
93
- def generate_story_image(self, story_paragraph):
94
- try:
95
- from crazy_functions.图片生成 import gen_image
96
- prompt_ = predict_no_ui_long_connection(inputs=story_paragraph, llm_kwargs=self.llm_kwargs, history=[], sys_prompt='你需要根据用户给出的小说段落,进行简短的环境描写。要求:80字以内。')
97
- image_url, image_path = gen_image(self.llm_kwargs, prompt_, '512x512', model="dall-e-2", quality='standard', style='natural')
98
- return f'<br/><div align="center"><img src="file={image_path}"></div>'
99
- except:
100
- return ''
101
-
102
- def step(self, prompt, chatbot, history):
103
-
104
- """
105
- 首先,处理游戏初始化等特殊情况
106
- """
107
- if self.step_cnt == 0:
108
- self.begin_game_step_0(prompt, chatbot, history)
109
- self.lock_plugin(chatbot)
110
- self.cur_task = 'head_start'
111
- else:
112
- if prompt.strip() == 'exit' or prompt.strip() == '结束剧情':
113
- # should we terminate game here?
114
- self.delete_game = True
115
- yield from update_ui_lastest_msg(lastmsg=f"游戏结束。", chatbot=chatbot, history=history, delay=0.)
116
- return
117
- if '剧情收尾' in prompt:
118
- self.cur_task = 'story_terminate'
119
- # # well, game resumes
120
- # chatbot.append([prompt, ""])
121
- # update ui, don't keep the user waiting
122
- yield from update_ui(chatbot=chatbot, history=history)
123
-
124
-
125
- """
126
- 处理游戏的主体逻辑
127
- """
128
- if self.cur_task == 'head_start':
129
- """
130
- 这是游戏的第一步
131
- """
132
- inputs_ = prompts_hs.format(headstart=self.headstart)
133
- history_ = []
134
- story_paragraph = yield from request_gpt_model_in_new_thread_with_ui_alive(
135
- inputs_, '故事开头', self.llm_kwargs,
136
- chatbot, history_, self.sys_prompt_
137
- )
138
- self.story.append(story_paragraph)
139
- # # 配图
140
- yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>正在生成插图中 ...', chatbot=chatbot, history=history, delay=0.)
141
- yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>'+ self.generate_story_image(story_paragraph), chatbot=chatbot, history=history, delay=0.)
142
-
143
- # # 构建后续剧情引导
144
- previously_on_story = ""
145
- for s in self.story:
146
- previously_on_story += s + '\n'
147
- inputs_ = prompts_interact.format(previously_on_story=previously_on_story)
148
- history_ = []
149
- self.next_choices = yield from request_gpt_model_in_new_thread_with_ui_alive(
150
- inputs_, '请在以下几种故事走向中,选择一种(当然,您也可以选择给出其他故事走向):', self.llm_kwargs,
151
- chatbot,
152
- history_,
153
- self.sys_prompt_
154
- )
155
- self.cur_task = 'user_choice'
156
-
157
-
158
- elif self.cur_task == 'user_choice':
159
- """
160
- 根据用户的提示,确定故事的下一步
161
- """
162
- if '请在以下几种故事走向中,选择一种' in chatbot[-1][0]: chatbot.pop(-1)
163
- previously_on_story = ""
164
- for s in self.story:
165
- previously_on_story += s + '\n'
166
- inputs_ = prompts_resume.format(previously_on_story=previously_on_story, choice=self.next_choices, user_choice=prompt)
167
- history_ = []
168
- story_paragraph = yield from request_gpt_model_in_new_thread_with_ui_alive(
169
- inputs_, f'下一段故事(您的选择是:{prompt})。', self.llm_kwargs,
170
- chatbot, history_, self.sys_prompt_
171
- )
172
- self.story.append(story_paragraph)
173
- # # 配图
174
- yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>正在生成插图中 ...', chatbot=chatbot, history=history, delay=0.)
175
- yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>'+ self.generate_story_image(story_paragraph), chatbot=chatbot, history=history, delay=0.)
176
-
177
- # # 构建后续剧情引导
178
- previously_on_story = ""
179
- for s in self.story:
180
- previously_on_story += s + '\n'
181
- inputs_ = prompts_interact.format(previously_on_story=previously_on_story)
182
- history_ = []
183
- self.next_choices = yield from request_gpt_model_in_new_thread_with_ui_alive(
184
- inputs_,
185
- '请在以下几种故事走向中,选择一种。当然,您也可以给出您心中的其他故事走向。另外,如果您希望剧情立即收尾,请输入剧情走向,并以“剧情收尾”四个字提示程序。', self.llm_kwargs,
186
- chatbot,
187
- history_,
188
- self.sys_prompt_
189
- )
190
- self.cur_task = 'user_choice'
191
-
192
-
193
- elif self.cur_task == 'story_terminate':
194
- """
195
- 根据用户的提示,确定故事的结局
196
- """
197
- previously_on_story = ""
198
- for s in self.story:
199
- previously_on_story += s + '\n'
200
- inputs_ = prompts_terminate.format(previously_on_story=previously_on_story, user_choice=prompt)
201
- history_ = []
202
- story_paragraph = yield from request_gpt_model_in_new_thread_with_ui_alive(
203
- inputs_, f'故事收尾(您的选择是:{prompt})。', self.llm_kwargs,
204
- chatbot, history_, self.sys_prompt_
205
- )
206
- # # 配图
207
- yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>正在生成插图中 ...', chatbot=chatbot, history=history, delay=0.)
208
- yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>'+ self.generate_story_image(story_paragraph), chatbot=chatbot, history=history, delay=0.)
209
-
210
- # terminate game
211
- self.delete_game = True
212
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/game_fns/game_utils.py DELETED
@@ -1,35 +0,0 @@
1
-
2
- from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
3
- from request_llms.bridge_all import predict_no_ui_long_connection
4
- def get_code_block(reply):
5
- import re
6
- pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
7
- matches = re.findall(pattern, reply) # find all code blocks in text
8
- if len(matches) == 1:
9
- return "```" + matches[0] + "```" # code block
10
- raise RuntimeError("GPT is not generating proper code.")
11
-
12
- def is_same_thing(a, b, llm_kwargs):
13
- from pydantic import BaseModel, Field
14
- class IsSameThing(BaseModel):
15
- is_same_thing: bool = Field(description="determine whether two objects are same thing.", default=False)
16
-
17
- def run_gpt_fn(inputs, sys_prompt, history=[]):
18
- return predict_no_ui_long_connection(
19
- inputs=inputs, llm_kwargs=llm_kwargs,
20
- history=history, sys_prompt=sys_prompt, observe_window=[]
21
- )
22
-
23
- gpt_json_io = GptJsonIO(IsSameThing)
24
- inputs_01 = "Identity whether the user input and the target is the same thing: \n target object: {a} \n user input object: {b} \n\n\n".format(a=a, b=b)
25
- inputs_01 += "\n\n\n Note that the user may describe the target object with a different language, e.g. cat and 猫 are the same thing."
26
- analyze_res_cot_01 = run_gpt_fn(inputs_01, "", [])
27
-
28
- inputs_02 = inputs_01 + gpt_json_io.format_instructions
29
- analyze_res = run_gpt_fn(inputs_02, "", [inputs_01, analyze_res_cot_01])
30
-
31
- try:
32
- res = gpt_json_io.generate_output_auto_repair(analyze_res, run_gpt_fn)
33
- return res.is_same_thing
34
- except JsonStringError as e:
35
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/gen_fns/gen_fns_shared.py DELETED
@@ -1,70 +0,0 @@
1
- import time
2
- import importlib
3
- from toolbox import trimmed_format_exc, gen_time_str, get_log_folder
4
- from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, is_the_upload_folder
5
- from toolbox import promote_file_to_downloadzone, get_log_folder, update_ui_lastest_msg
6
- import multiprocessing
7
-
8
- def get_class_name(class_string):
9
- import re
10
- # Use regex to extract the class name
11
- class_name = re.search(r'class (\w+)\(', class_string).group(1)
12
- return class_name
13
-
14
- def try_make_module(code, chatbot):
15
- module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
16
- fn_path = f'{get_log_folder(plugin_name="gen_plugin_verify")}/{module_file}.py'
17
- with open(fn_path, 'w', encoding='utf8') as f: f.write(code)
18
- promote_file_to_downloadzone(fn_path, chatbot=chatbot)
19
- class_name = get_class_name(code)
20
- manager = multiprocessing.Manager()
21
- return_dict = manager.dict()
22
- p = multiprocessing.Process(target=is_function_successfully_generated, args=(fn_path, class_name, return_dict))
23
- # only has 10 seconds to run
24
- p.start(); p.join(timeout=10)
25
- if p.is_alive(): p.terminate(); p.join()
26
- p.close()
27
- return return_dict["success"], return_dict['traceback']
28
-
29
- # check is_function_successfully_generated
30
- def is_function_successfully_generated(fn_path, class_name, return_dict):
31
- return_dict['success'] = False
32
- return_dict['traceback'] = ""
33
- try:
34
- # Create a spec for the module
35
- module_spec = importlib.util.spec_from_file_location('example_module', fn_path)
36
- # Load the module
37
- example_module = importlib.util.module_from_spec(module_spec)
38
- module_spec.loader.exec_module(example_module)
39
- # Now you can use the module
40
- some_class = getattr(example_module, class_name)
41
- # Now you can create an instance of the class
42
- instance = some_class()
43
- return_dict['success'] = True
44
- return
45
- except:
46
- return_dict['traceback'] = trimmed_format_exc()
47
- return
48
-
49
- def subprocess_worker(code, file_path, return_dict):
50
- return_dict['result'] = None
51
- return_dict['success'] = False
52
- return_dict['traceback'] = ""
53
- try:
54
- module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
55
- fn_path = f'{get_log_folder(plugin_name="gen_plugin_run")}/{module_file}.py'
56
- with open(fn_path, 'w', encoding='utf8') as f: f.write(code)
57
- class_name = get_class_name(code)
58
- # Create a spec for the module
59
- module_spec = importlib.util.spec_from_file_location('example_module', fn_path)
60
- # Load the module
61
- example_module = importlib.util.module_from_spec(module_spec)
62
- module_spec.loader.exec_module(example_module)
63
- # Now you can use the module
64
- some_class = getattr(example_module, class_name)
65
- # Now you can create an instance of the class
66
- instance = some_class()
67
- return_dict['result'] = instance.run(file_path)
68
- return_dict['success'] = True
69
- except:
70
- return_dict['traceback'] = trimmed_format_exc()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/ipc_fns/mp.py DELETED
@@ -1,37 +0,0 @@
1
- import platform
2
- import pickle
3
- import multiprocessing
4
-
5
- def run_in_subprocess_wrapper_func(v_args):
6
- func, args, kwargs, return_dict, exception_dict = pickle.loads(v_args)
7
- import sys
8
- try:
9
- result = func(*args, **kwargs)
10
- return_dict['result'] = result
11
- except Exception as e:
12
- exc_info = sys.exc_info()
13
- exception_dict['exception'] = exc_info
14
-
15
- def run_in_subprocess_with_timeout(func, timeout=60):
16
- if platform.system() == 'Linux':
17
- def wrapper(*args, **kwargs):
18
- return_dict = multiprocessing.Manager().dict()
19
- exception_dict = multiprocessing.Manager().dict()
20
- v_args = pickle.dumps((func, args, kwargs, return_dict, exception_dict))
21
- process = multiprocessing.Process(target=run_in_subprocess_wrapper_func, args=(v_args,))
22
- process.start()
23
- process.join(timeout)
24
- if process.is_alive():
25
- process.terminate()
26
- raise TimeoutError(f'功能单元{str(func)}未能在规定时间内完成任务')
27
- process.close()
28
- if 'exception' in exception_dict:
29
- # ooops, the subprocess ran into an exception
30
- exc_info = exception_dict['exception']
31
- raise exc_info[1].with_traceback(exc_info[2])
32
- if 'result' in return_dict.keys():
33
- # If the subprocess ran successfully, return the result
34
- return return_dict['result']
35
- return wrapper
36
- else:
37
- return func
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/latex_fns/latex_actions.py CHANGED
@@ -1,10 +1,9 @@
1
  from toolbox import update_ui, update_ui_lastest_msg, get_log_folder
2
- from toolbox import get_conf, objdump, objload, promote_file_to_downloadzone
3
  from .latex_toolbox import PRESERVE, TRANSFORM
4
  from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
5
  from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process
6
  from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout
7
- from .latex_toolbox import find_title_and_abs
8
 
9
  import os, shutil
10
  import re
@@ -91,18 +90,7 @@ class LatexPaperSplit():
91
  "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
92
  # 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
93
  self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
94
- self.title = "unknown"
95
- self.abstract = "unknown"
96
-
97
- def read_title_and_abstract(self, txt):
98
- try:
99
- title, abstract = find_title_and_abs(txt)
100
- if title is not None:
101
- self.title = title.replace('\n', ' ').replace('\\\\', ' ').replace(' ', '').replace(' ', '')
102
- if abstract is not None:
103
- self.abstract = abstract.replace('\n', ' ').replace('\\\\', ' ').replace(' ', '').replace(' ', '')
104
- except:
105
- pass
106
 
107
  def merge_result(self, arr, mode, msg, buggy_lines=[], buggy_line_surgery_n_lines=10):
108
  """
@@ -175,8 +163,9 @@ class LatexPaperFileGroup():
175
  self.sp_file_contents = []
176
  self.sp_file_index = []
177
  self.sp_file_tag = []
 
178
  # count_token
179
- from request_llms.bridge_all import model_info
180
  enc = model_info["gpt-3.5-turbo"]['tokenizer']
181
  def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
182
  self.get_token_num = get_token_num
@@ -191,12 +180,13 @@ class LatexPaperFileGroup():
191
  self.sp_file_index.append(index)
192
  self.sp_file_tag.append(self.file_paths[index])
193
  else:
194
- from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
195
- segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
196
  for j, segment in enumerate(segments):
197
  self.sp_file_contents.append(segment)
198
  self.sp_file_index.append(index)
199
  self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
 
200
 
201
  def merge_result(self):
202
  self.file_result = ["" for _ in range(len(self.file_paths))]
@@ -244,8 +234,8 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
244
  chatbot.append((f"Latex文件融合完成", f'[Local Message] 正在精细切分latex文件,这需要一段时间计算,文档越长耗时越长,请耐心等待。'))
245
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
246
  lps = LatexPaperSplit()
247
- lps.read_title_and_abstract(merged_content)
248
  res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数
 
249
  # <-------- 拆分过长的latex片段 ---------->
250
  pfg = LatexPaperFileGroup()
251
  for index, r in enumerate(res):
@@ -266,19 +256,12 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
266
 
267
  else:
268
  # <-------- gpt 多线程请求 ---------->
269
- history_array = [[""] for _ in range(n_split)]
270
- # LATEX_EXPERIMENTAL, = get_conf('LATEX_EXPERIMENTAL')
271
- # if LATEX_EXPERIMENTAL:
272
- # paper_meta = f"The paper you processing is `{lps.title}`, a part of the abstraction is `{lps.abstract}`"
273
- # paper_meta_max_len = 888
274
- # history_array = [[ paper_meta[:paper_meta_max_len] + '...', "Understand, what should I do?"] for _ in range(n_split)]
275
-
276
  gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
277
  inputs_array=inputs_array,
278
  inputs_show_user_array=inputs_show_user_array,
279
  llm_kwargs=llm_kwargs,
280
  chatbot=chatbot,
281
- history_array=history_array,
282
  sys_prompt_array=sys_prompt_array,
283
  # max_workers=5, # 并行任务数量限制, 最多同时执行5个, 其他的排队等待
284
  scroller_max_len = 40
@@ -402,7 +385,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
402
  result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path
403
  promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
404
  if modified_pdf_success:
405
- yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 正在尝试生成对比PDF, 请稍候 ...', chatbot, history) # 刷新Gradio前端界面
406
  result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
407
  origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path
408
  if os.path.exists(pj(work_folder, '..', 'translation')):
@@ -414,11 +397,8 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
414
  from .latex_toolbox import merge_pdfs
415
  concat_pdf = pj(work_folder_modified, f'comparison.pdf')
416
  merge_pdfs(origin_pdf, result_pdf, concat_pdf)
417
- if os.path.exists(pj(work_folder, '..', 'translation')):
418
- shutil.copyfile(concat_pdf, pj(work_folder, '..', 'translation', 'comparison.pdf'))
419
  promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
420
  except Exception as e:
421
- print(e)
422
  pass
423
  return True # 成功啦
424
  else:
@@ -443,7 +423,7 @@ def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
443
  # write html
444
  try:
445
  import shutil
446
- from crazy_functions.pdf_fns.report_gen_html import construct_html
447
  from toolbox import gen_time_str
448
  ch = construct_html()
449
  orig = ""
 
1
  from toolbox import update_ui, update_ui_lastest_msg, get_log_folder
2
+ from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone
3
  from .latex_toolbox import PRESERVE, TRANSFORM
4
  from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
5
  from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process
6
  from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout
 
7
 
8
  import os, shutil
9
  import re
 
90
  "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
91
  # 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
92
  self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
93
+
 
 
 
 
 
 
 
 
 
 
 
94
 
95
  def merge_result(self, arr, mode, msg, buggy_lines=[], buggy_line_surgery_n_lines=10):
96
  """
 
163
  self.sp_file_contents = []
164
  self.sp_file_index = []
165
  self.sp_file_tag = []
166
+
167
  # count_token
168
+ from request_llm.bridge_all import model_info
169
  enc = model_info["gpt-3.5-turbo"]['tokenizer']
170
  def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
171
  self.get_token_num = get_token_num
 
180
  self.sp_file_index.append(index)
181
  self.sp_file_tag.append(self.file_paths[index])
182
  else:
183
+ from ..crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
184
+ segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
185
  for j, segment in enumerate(segments):
186
  self.sp_file_contents.append(segment)
187
  self.sp_file_index.append(index)
188
  self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
189
+ print('Segmentation: done')
190
 
191
  def merge_result(self):
192
  self.file_result = ["" for _ in range(len(self.file_paths))]
 
234
  chatbot.append((f"Latex文件融合完成", f'[Local Message] 正在精细切分latex文件,这需要一段时间计算,文档越长耗时越长,请耐心等待。'))
235
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
236
  lps = LatexPaperSplit()
 
237
  res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数
238
+
239
  # <-------- 拆分过长的latex片段 ---------->
240
  pfg = LatexPaperFileGroup()
241
  for index, r in enumerate(res):
 
256
 
257
  else:
258
  # <-------- gpt 多线程请求 ---------->
 
 
 
 
 
 
 
259
  gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
260
  inputs_array=inputs_array,
261
  inputs_show_user_array=inputs_show_user_array,
262
  llm_kwargs=llm_kwargs,
263
  chatbot=chatbot,
264
+ history_array=[[""] for _ in range(n_split)],
265
  sys_prompt_array=sys_prompt_array,
266
  # max_workers=5, # 并行任务数量限制, 最多同时执行5个, 其他的排队等待
267
  scroller_max_len = 40
 
385
  result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path
386
  promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
387
  if modified_pdf_success:
388
+ yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面
389
  result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
390
  origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path
391
  if os.path.exists(pj(work_folder, '..', 'translation')):
 
397
  from .latex_toolbox import merge_pdfs
398
  concat_pdf = pj(work_folder_modified, f'comparison.pdf')
399
  merge_pdfs(origin_pdf, result_pdf, concat_pdf)
 
 
400
  promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
401
  except Exception as e:
 
402
  pass
403
  return True # 成功啦
404
  else:
 
423
  # write html
424
  try:
425
  import shutil
426
+ from ..crazy_utils import construct_html
427
  from toolbox import gen_time_str
428
  ch = construct_html()
429
  orig = ""
crazy_functions/latex_fns/latex_toolbox.py CHANGED
@@ -1,18 +1,15 @@
1
  import os, shutil
2
  import re
3
  import numpy as np
4
-
5
  PRESERVE = 0
6
  TRANSFORM = 1
7
 
8
  pj = os.path.join
9
 
10
-
11
- class LinkedListNode:
12
  """
13
  Linked List Node
14
  """
15
-
16
  def __init__(self, string, preserve=True) -> None:
17
  self.string = string
18
  self.preserve = preserve
@@ -21,47 +18,41 @@ class LinkedListNode:
21
  # self.begin_line = 0
22
  # self.begin_char = 0
23
 
24
-
25
  def convert_to_linklist(text, mask):
26
  root = LinkedListNode("", preserve=True)
27
  current_node = root
28
  for c, m, i in zip(text, mask, range(len(text))):
29
- if (m == PRESERVE and current_node.preserve) or (
30
- m == TRANSFORM and not current_node.preserve
31
- ):
32
  # add
33
  current_node.string += c
34
  else:
35
- current_node.next = LinkedListNode(c, preserve=(m == PRESERVE))
36
  current_node = current_node.next
37
  return root
38
 
39
-
40
  def post_process(root):
41
  # 修复括号
42
  node = root
43
  while True:
44
  string = node.string
45
- if node.preserve:
46
  node = node.next
47
- if node is None:
48
- break
49
  continue
50
-
51
  def break_check(string):
52
- str_stack = [""] # (lv, index)
53
  for i, c in enumerate(string):
54
- if c == "{":
55
- str_stack.append("{")
56
- elif c == "}":
57
  if len(str_stack) == 1:
58
- print("stack fix")
59
  return i
60
  str_stack.pop(-1)
61
  else:
62
  str_stack[-1] += c
63
  return -1
64
-
65
  bp = break_check(string)
66
 
67
  if bp == -1:
@@ -78,66 +69,51 @@ def post_process(root):
78
  node.next = q
79
 
80
  node = node.next
81
- if node is None:
82
- break
83
 
84
  # 屏蔽空行和太短的句子
85
  node = root
86
  while True:
87
- if len(node.string.strip("\n").strip("")) == 0:
88
- node.preserve = True
89
- if len(node.string.strip("\n").strip("")) < 42:
90
- node.preserve = True
91
  node = node.next
92
- if node is None:
93
- break
94
  node = root
95
  while True:
96
  if node.next and node.preserve and node.next.preserve:
97
  node.string += node.next.string
98
  node.next = node.next.next
99
  node = node.next
100
- if node is None:
101
- break
102
 
103
  # 将前后断行符脱离
104
  node = root
105
  prev_node = None
106
  while True:
107
  if not node.preserve:
108
- lstriped_ = node.string.lstrip().lstrip("\n")
109
- if (
110
- (prev_node is not None)
111
- and (prev_node.preserve)
112
- and (len(lstriped_) != len(node.string))
113
- ):
114
- prev_node.string += node.string[: -len(lstriped_)]
115
  node.string = lstriped_
116
- rstriped_ = node.string.rstrip().rstrip("\n")
117
- if (
118
- (node.next is not None)
119
- and (node.next.preserve)
120
- and (len(rstriped_) != len(node.string))
121
- ):
122
- node.next.string = node.string[len(rstriped_) :] + node.next.string
123
  node.string = rstriped_
124
- # =-=-=
125
  prev_node = node
126
  node = node.next
127
- if node is None:
128
- break
129
 
130
  # 标注节点的行数范围
131
  node = root
132
  n_line = 0
133
  expansion = 2
134
  while True:
135
- n_l = node.string.count("\n")
136
- node.range = [n_line - expansion, n_line + n_l + expansion] # 失败时,扭转的范围
137
- n_line = n_line + n_l
138
  node = node.next
139
- if node is None:
140
- break
141
  return root
142
 
143
 
@@ -152,125 +128,97 @@ def set_forbidden_text(text, mask, pattern, flags=0):
152
  """
153
  Add a preserve text area in this paper
154
  e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
155
- you can mask out (mask = PRESERVE so that text become untouchable for GPT)
156
  everything between "\begin{equation}" and "\end{equation}"
157
  """
158
- if isinstance(pattern, list):
159
- pattern = "|".join(pattern)
160
  pattern_compile = re.compile(pattern, flags)
161
  for res in pattern_compile.finditer(text):
162
- mask[res.span()[0] : res.span()[1]] = PRESERVE
163
  return text, mask
164
 
165
-
166
  def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
167
  """
168
  Move area out of preserve area (make text editable for GPT)
169
- count the number of the braces so as to catch compelete text area.
170
  e.g.
171
- \begin{abstract} blablablablablabla. \end{abstract}
172
  """
173
- if isinstance(pattern, list):
174
- pattern = "|".join(pattern)
175
  pattern_compile = re.compile(pattern, flags)
176
  for res in pattern_compile.finditer(text):
177
  if not forbid_wrapper:
178
- mask[res.span()[0] : res.span()[1]] = TRANSFORM
179
  else:
180
- mask[res.regs[0][0] : res.regs[1][0]] = PRESERVE # '\\begin{abstract}'
181
- mask[res.regs[1][0] : res.regs[1][1]] = TRANSFORM # abstract
182
- mask[res.regs[1][1] : res.regs[0][1]] = PRESERVE # abstract
183
  return text, mask
184
 
185
-
186
  def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
187
  """
188
  Add a preserve text area in this paper (text become untouchable for GPT).
189
- count the number of the braces so as to catch compelete text area.
190
  e.g.
191
- \caption{blablablablabla\texbf{blablabla}blablabla.}
192
  """
193
  pattern_compile = re.compile(pattern, flags)
194
  for res in pattern_compile.finditer(text):
195
  brace_level = -1
196
  p = begin = end = res.regs[0][0]
197
- for _ in range(1024 * 16):
198
- if text[p] == "}" and brace_level == 0:
199
- break
200
- elif text[p] == "}":
201
- brace_level -= 1
202
- elif text[p] == "{":
203
- brace_level += 1
204
  p += 1
205
- end = p + 1
206
  mask[begin:end] = PRESERVE
207
  return text, mask
208
 
209
-
210
- def reverse_forbidden_text_careful_brace(
211
- text, mask, pattern, flags=0, forbid_wrapper=True
212
- ):
213
  """
214
  Move area out of preserve area (make text editable for GPT)
215
- count the number of the braces so as to catch compelete text area.
216
  e.g.
217
- \caption{blablablablabla\texbf{blablabla}blablabla.}
218
  """
219
  pattern_compile = re.compile(pattern, flags)
220
  for res in pattern_compile.finditer(text):
221
  brace_level = 0
222
  p = begin = end = res.regs[1][0]
223
- for _ in range(1024 * 16):
224
- if text[p] == "}" and brace_level == 0:
225
- break
226
- elif text[p] == "}":
227
- brace_level -= 1
228
- elif text[p] == "{":
229
- brace_level += 1
230
  p += 1
231
  end = p
232
  mask[begin:end] = TRANSFORM
233
  if forbid_wrapper:
234
- mask[res.regs[0][0] : begin] = PRESERVE
235
- mask[end : res.regs[0][1]] = PRESERVE
236
  return text, mask
237
 
238
-
239
  def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
240
  """
241
  Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
242
  Add it to preserve area
243
  """
244
  pattern_compile = re.compile(pattern, flags)
245
-
246
  def search_with_line_limit(text, mask):
247
  for res in pattern_compile.finditer(text):
248
  cmd = res.group(1) # begin{what}
249
- this = res.group(2) # content between begin and end
250
- this_mask = mask[res.regs[2][0] : res.regs[2][1]]
251
- white_list = [
252
- "document",
253
- "abstract",
254
- "lemma",
255
- "definition",
256
- "sproof",
257
- "em",
258
- "emph",
259
- "textit",
260
- "textbf",
261
- "itemize",
262
- "enumerate",
263
- ]
264
- if (cmd in white_list) or this.count(
265
- "\n"
266
- ) >= limit_n_lines: # use a magical number 42
267
  this, this_mask = search_with_line_limit(this, this_mask)
268
- mask[res.regs[2][0] : res.regs[2][1]] = this_mask
269
  else:
270
- mask[res.regs[0][0] : res.regs[0][1]] = PRESERVE
271
  return text, mask
 
272
 
273
- return search_with_line_limit(text, mask)
274
 
275
 
276
  """
@@ -279,7 +227,6 @@ Latex Merge File
279
  =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
280
  """
281
 
282
-
283
  def find_main_tex_file(file_manifest, mode):
284
  """
285
  在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。
@@ -287,36 +234,27 @@ def find_main_tex_file(file_manifest, mode):
287
  """
288
  canidates = []
289
  for texf in file_manifest:
290
- if os.path.basename(texf).startswith("merge"):
291
  continue
292
- with open(texf, "r", encoding="utf8", errors="ignore") as f:
293
  file_content = f.read()
294
- if r"\documentclass" in file_content:
295
  canidates.append(texf)
296
  else:
297
  continue
298
 
299
  if len(canidates) == 0:
300
- raise RuntimeError("无法找到一个主Tex文件(包含documentclass关键字)")
301
  elif len(canidates) == 1:
302
  return canidates[0]
303
- else: # if len(canidates) >= 2 通过一些Latex模板中常见(但通常不会出现在正文)的单词,对不同latex源文件扣分,取评分最高者返回
304
  canidates_score = []
305
  # 给出一些判定模板文档的词作为扣分项
306
- unexpected_words = [
307
- "\\LaTeX",
308
- "manuscript",
309
- "Guidelines",
310
- "font",
311
- "citations",
312
- "rejected",
313
- "blind review",
314
- "reviewers",
315
- ]
316
- expected_words = ["\\input", "\\ref", "\\cite"]
317
  for texf in canidates:
318
  canidates_score.append(0)
319
- with open(texf, "r", encoding="utf8", errors="ignore") as f:
320
  file_content = f.read()
321
  file_content = rm_comments(file_content)
322
  for uw in unexpected_words:
@@ -325,10 +263,9 @@ def find_main_tex_file(file_manifest, mode):
325
  for uw in expected_words:
326
  if uw in file_content:
327
  canidates_score[-1] += 1
328
- select = np.argmax(canidates_score) # 取评分最高者返回
329
  return canidates[select]
330
-
331
-
332
  def rm_comments(main_file):
333
  new_file_remove_comment_lines = []
334
  for l in main_file.splitlines():
@@ -337,39 +274,30 @@ def rm_comments(main_file):
337
  pass
338
  else:
339
  new_file_remove_comment_lines.append(l)
340
- main_file = "\n".join(new_file_remove_comment_lines)
341
  # main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令
342
- main_file = re.sub(r"(?<!\\)%.*", "", main_file) # 使用正则表达式查找半行注释, 并替换为空字符串
343
  return main_file
344
 
345
-
346
  def find_tex_file_ignore_case(fp):
347
  dir_name = os.path.dirname(fp)
348
  base_name = os.path.basename(fp)
349
  # 如果输入的文件路径是正确的
350
- if os.path.isfile(pj(dir_name, base_name)):
351
- return pj(dir_name, base_name)
352
  # 如果不正确,试着加上.tex后缀试试
353
- if not base_name.endswith(".tex"):
354
- base_name += ".tex"
355
- if os.path.isfile(pj(dir_name, base_name)):
356
- return pj(dir_name, base_name)
357
  # 如果还找不到,解除大小写限制,再试一次
358
  import glob
359
-
360
- for f in glob.glob(dir_name + "/*.tex"):
361
  base_name_s = os.path.basename(fp)
362
  base_name_f = os.path.basename(f)
363
- if base_name_s.lower() == base_name_f.lower():
364
- return f
365
  # 试着加上.tex后缀试试
366
- if not base_name_s.endswith(".tex"):
367
- base_name_s += ".tex"
368
- if base_name_s.lower() == base_name_f.lower():
369
- return f
370
  return None
371
 
372
-
373
  def merge_tex_files_(project_foler, main_file, mode):
374
  """
375
  Merge Tex project recrusively
@@ -380,51 +308,13 @@ def merge_tex_files_(project_foler, main_file, mode):
380
  fp = os.path.join(project_foler, f)
381
  fp_ = find_tex_file_ignore_case(fp)
382
  if fp_:
383
- try:
384
- with open(fp_, "r", encoding="utf-8", errors="replace") as fx:
385
- c = fx.read()
386
- except:
387
- c = f"\n\nWarning from GPT-Academic: LaTex source file is missing!\n\n"
388
  else:
389
- raise RuntimeError(f"找不到{fp},Tex源文件缺失!")
390
  c = merge_tex_files_(project_foler, c, mode)
391
- main_file = main_file[: s.span()[0]] + c + main_file[s.span()[1] :]
392
  return main_file
393
 
394
-
395
- def find_title_and_abs(main_file):
396
- def extract_abstract_1(text):
397
- pattern = r"\\abstract\{(.*?)\}"
398
- match = re.search(pattern, text, re.DOTALL)
399
- if match:
400
- return match.group(1)
401
- else:
402
- return None
403
-
404
- def extract_abstract_2(text):
405
- pattern = r"\\begin\{abstract\}(.*?)\\end\{abstract\}"
406
- match = re.search(pattern, text, re.DOTALL)
407
- if match:
408
- return match.group(1)
409
- else:
410
- return None
411
-
412
- def extract_title(string):
413
- pattern = r"\\title\{(.*?)\}"
414
- match = re.search(pattern, string, re.DOTALL)
415
-
416
- if match:
417
- return match.group(1)
418
- else:
419
- return None
420
-
421
- abstract = extract_abstract_1(main_file)
422
- if abstract is None:
423
- abstract = extract_abstract_2(main_file)
424
- title = extract_title(main_file)
425
- return title, abstract
426
-
427
-
428
  def merge_tex_files(project_foler, main_file, mode):
429
  """
430
  Merge Tex project recrusively
@@ -434,105 +324,46 @@ def merge_tex_files(project_foler, main_file, mode):
434
  main_file = merge_tex_files_(project_foler, main_file, mode)
435
  main_file = rm_comments(main_file)
436
 
437
- if mode == "translate_zh":
438
  # find paper documentclass
439
- pattern = re.compile(r"\\documentclass.*\n")
440
  match = pattern.search(main_file)
441
  assert match is not None, "Cannot find documentclass statement!"
442
  position = match.end()
443
- add_ctex = "\\usepackage{ctex}\n"
444
- add_url = "\\usepackage{url}\n" if "{url}" not in main_file else ""
445
  main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
446
  # fontset=windows
447
  import platform
448
-
449
- main_file = re.sub(
450
- r"\\documentclass\[(.*?)\]{(.*?)}",
451
- r"\\documentclass[\1,fontset=windows,UTF8]{\2}",
452
- main_file,
453
- )
454
- main_file = re.sub(
455
- r"\\documentclass{(.*?)}",
456
- r"\\documentclass[fontset=windows,UTF8]{\1}",
457
- main_file,
458
- )
459
  # find paper abstract
460
- pattern_opt1 = re.compile(r"\\begin\{abstract\}.*\n")
461
  pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
462
  match_opt1 = pattern_opt1.search(main_file)
463
  match_opt2 = pattern_opt2.search(main_file)
464
- if (match_opt1 is None) and (match_opt2 is None):
465
- # "Cannot find paper abstract section!"
466
- main_file = insert_abstract(main_file)
467
- match_opt1 = pattern_opt1.search(main_file)
468
- match_opt2 = pattern_opt2.search(main_file)
469
- assert (match_opt1 is not None) or (
470
- match_opt2 is not None
471
- ), "Cannot find paper abstract section!"
472
  return main_file
473
 
474
 
475
- insert_missing_abs_str = r"""
476
- \begin{abstract}
477
- The GPT-Academic program cannot find abstract section in this paper.
478
- \end{abstract}
479
- """
480
-
481
-
482
- def insert_abstract(tex_content):
483
- if "\\maketitle" in tex_content:
484
- # find the position of "\maketitle"
485
- find_index = tex_content.index("\\maketitle")
486
- # find the nearest ending line
487
- end_line_index = tex_content.find("\n", find_index)
488
- # insert "abs_str" on the next line
489
- modified_tex = (
490
- tex_content[: end_line_index + 1]
491
- + "\n\n"
492
- + insert_missing_abs_str
493
- + "\n\n"
494
- + tex_content[end_line_index + 1 :]
495
- )
496
- return modified_tex
497
- elif r"\begin{document}" in tex_content:
498
- # find the position of "\maketitle"
499
- find_index = tex_content.index(r"\begin{document}")
500
- # find the nearest ending line
501
- end_line_index = tex_content.find("\n", find_index)
502
- # insert "abs_str" on the next line
503
- modified_tex = (
504
- tex_content[: end_line_index + 1]
505
- + "\n\n"
506
- + insert_missing_abs_str
507
- + "\n\n"
508
- + tex_content[end_line_index + 1 :]
509
- )
510
- return modified_tex
511
- else:
512
- return tex_content
513
-
514
-
515
  """
516
  =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
517
  Post process
518
  =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
519
  """
520
-
521
-
522
  def mod_inbraket(match):
523
  """
524
- 为啥chatgpt会把cite里面的逗号换成中文逗号呀
525
  """
526
  # get the matched string
527
  cmd = match.group(1)
528
  str_to_modify = match.group(2)
529
  # modify the matched string
530
- str_to_modify = str_to_modify.replace("", ":") # 前面是中文冒号,后面是英文冒号
531
- str_to_modify = str_to_modify.replace("", ",") # 前面是中文逗号,后面是英文逗号
532
  # str_to_modify = 'BOOM'
533
  return "\\" + cmd + "{" + str_to_modify + "}"
534
 
535
-
536
  def fix_content(final_tex, node_string):
537
  """
538
  Fix common GPT errors to increase success rate
@@ -543,10 +374,10 @@ def fix_content(final_tex, node_string):
543
  final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
544
 
545
  if "Traceback" in final_tex and "[Local Message]" in final_tex:
546
- final_tex = node_string # 出问题了,还原原文
547
- if node_string.count("\\begin") != final_tex.count("\\begin"):
548
- final_tex = node_string # 出问题了,还原原文
549
- if node_string.count("\_") > 0 and node_string.count("\_") > final_tex.count("\_"):
550
  # walk and replace any _ without \
551
  final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
552
 
@@ -554,32 +385,24 @@ def fix_content(final_tex, node_string):
554
  # this function count the number of { and }
555
  brace_level = 0
556
  for c in string:
557
- if c == "{":
558
- brace_level += 1
559
- elif c == "}":
560
- brace_level -= 1
561
  return brace_level
562
-
563
  def join_most(tex_t, tex_o):
564
  # this function join translated string and original string when something goes wrong
565
  p_t = 0
566
  p_o = 0
567
-
568
  def find_next(string, chars, begin):
569
  p = begin
570
  while p < len(string):
571
- if string[p] in chars:
572
- return p, string[p]
573
  p += 1
574
  return None, None
575
-
576
  while True:
577
- res1, char = find_next(tex_o, ["{", "}"], p_o)
578
- if res1 is None:
579
- break
580
  res2, char = find_next(tex_t, [char], p_t)
581
- if res2 is None:
582
- break
583
  p_o = res1 + 1
584
  p_t = res2 + 1
585
  return tex_t[:p_t] + tex_o[p_o:]
@@ -588,14 +411,10 @@ def fix_content(final_tex, node_string):
588
  # 出问题了,还原部分原文,保证括号正确
589
  final_tex = join_most(final_tex, node_string)
590
  return final_tex
591
-
592
-
593
  def compile_latex_with_timeout(command, cwd, timeout=60):
594
  import subprocess
595
-
596
- process = subprocess.Popen(
597
- command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd
598
- )
599
  try:
600
  stdout, stderr = process.communicate(timeout=timeout)
601
  except subprocess.TimeoutExpired:
@@ -606,51 +425,15 @@ def compile_latex_with_timeout(command, cwd, timeout=60):
606
  return True
607
 
608
 
609
- def run_in_subprocess_wrapper_func(func, args, kwargs, return_dict, exception_dict):
610
- import sys
611
-
612
- try:
613
- result = func(*args, **kwargs)
614
- return_dict["result"] = result
615
- except Exception as e:
616
- exc_info = sys.exc_info()
617
- exception_dict["exception"] = exc_info
618
-
619
-
620
- def run_in_subprocess(func):
621
- import multiprocessing
622
-
623
- def wrapper(*args, **kwargs):
624
- return_dict = multiprocessing.Manager().dict()
625
- exception_dict = multiprocessing.Manager().dict()
626
- process = multiprocessing.Process(
627
- target=run_in_subprocess_wrapper_func,
628
- args=(func, args, kwargs, return_dict, exception_dict),
629
- )
630
- process.start()
631
- process.join()
632
- process.close()
633
- if "exception" in exception_dict:
634
- # ooops, the subprocess ran into an exception
635
- exc_info = exception_dict["exception"]
636
- raise exc_info[1].with_traceback(exc_info[2])
637
- if "result" in return_dict.keys():
638
- # If the subprocess ran successfully, return the result
639
- return return_dict["result"]
640
-
641
- return wrapper
642
-
643
-
644
- def _merge_pdfs(pdf1_path, pdf2_path, output_path):
645
- import PyPDF2 # PyPDF2这个库有严重的内存泄露问题,把它放到子进程中运行,从而方便内存的释放
646
 
 
 
647
  Percent = 0.95
648
- # raise RuntimeError('PyPDF2 has a serious memory leak problem, please use other tools to merge PDF files.')
649
  # Open the first PDF file
650
- with open(pdf1_path, "rb") as pdf1_file:
651
  pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)
652
  # Open the second PDF file
653
- with open(pdf2_path, "rb") as pdf2_file:
654
  pdf2_reader = PyPDF2.PdfFileReader(pdf2_file)
655
  # Create a new PDF file to store the merged pages
656
  output_writer = PyPDF2.PdfFileWriter()
@@ -670,25 +453,12 @@ def _merge_pdfs(pdf1_path, pdf2_path, output_path):
670
  page2 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
671
  # Create a new empty page with double width
672
  new_page = PyPDF2.PageObject.createBlankPage(
673
- width=int(
674
- int(page1.mediaBox.getWidth())
675
- + int(page2.mediaBox.getWidth()) * Percent
676
- ),
677
- height=max(page1.mediaBox.getHeight(), page2.mediaBox.getHeight()),
678
  )
679
  new_page.mergeTranslatedPage(page1, 0, 0)
680
- new_page.mergeTranslatedPage(
681
- page2,
682
- int(
683
- int(page1.mediaBox.getWidth())
684
- - int(page2.mediaBox.getWidth()) * (1 - Percent)
685
- ),
686
- 0,
687
- )
688
  output_writer.addPage(new_page)
689
  # Save the merged PDF file
690
- with open(output_path, "wb") as output_file:
691
  output_writer.write(output_file)
692
-
693
-
694
- merge_pdfs = run_in_subprocess(_merge_pdfs) # PyPDF2这个库有严重的内存泄露问题,把它放到子进程中运行,从而方便内存的释放
 
1
  import os, shutil
2
  import re
3
  import numpy as np
 
4
  PRESERVE = 0
5
  TRANSFORM = 1
6
 
7
  pj = os.path.join
8
 
9
+ class LinkedListNode():
 
10
  """
11
  Linked List Node
12
  """
 
13
  def __init__(self, string, preserve=True) -> None:
14
  self.string = string
15
  self.preserve = preserve
 
18
  # self.begin_line = 0
19
  # self.begin_char = 0
20
 
 
21
  def convert_to_linklist(text, mask):
22
  root = LinkedListNode("", preserve=True)
23
  current_node = root
24
  for c, m, i in zip(text, mask, range(len(text))):
25
+ if (m==PRESERVE and current_node.preserve) \
26
+ or (m==TRANSFORM and not current_node.preserve):
 
27
  # add
28
  current_node.string += c
29
  else:
30
+ current_node.next = LinkedListNode(c, preserve=(m==PRESERVE))
31
  current_node = current_node.next
32
  return root
33
 
 
34
  def post_process(root):
35
  # 修复括号
36
  node = root
37
  while True:
38
  string = node.string
39
+ if node.preserve:
40
  node = node.next
41
+ if node is None: break
 
42
  continue
 
43
  def break_check(string):
44
+ str_stack = [""] # (lv, index)
45
  for i, c in enumerate(string):
46
+ if c == '{':
47
+ str_stack.append('{')
48
+ elif c == '}':
49
  if len(str_stack) == 1:
50
+ print('stack fix')
51
  return i
52
  str_stack.pop(-1)
53
  else:
54
  str_stack[-1] += c
55
  return -1
 
56
  bp = break_check(string)
57
 
58
  if bp == -1:
 
69
  node.next = q
70
 
71
  node = node.next
72
+ if node is None: break
 
73
 
74
  # 屏蔽空行和太短的句子
75
  node = root
76
  while True:
77
+ if len(node.string.strip('\n').strip(''))==0: node.preserve = True
78
+ if len(node.string.strip('\n').strip(''))<42: node.preserve = True
 
 
79
  node = node.next
80
+ if node is None: break
 
81
  node = root
82
  while True:
83
  if node.next and node.preserve and node.next.preserve:
84
  node.string += node.next.string
85
  node.next = node.next.next
86
  node = node.next
87
+ if node is None: break
 
88
 
89
  # 将前后断行符脱离
90
  node = root
91
  prev_node = None
92
  while True:
93
  if not node.preserve:
94
+ lstriped_ = node.string.lstrip().lstrip('\n')
95
+ if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
96
+ prev_node.string += node.string[:-len(lstriped_)]
 
 
 
 
97
  node.string = lstriped_
98
+ rstriped_ = node.string.rstrip().rstrip('\n')
99
+ if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
100
+ node.next.string = node.string[len(rstriped_):] + node.next.string
 
 
 
 
101
  node.string = rstriped_
102
+ # =====
103
  prev_node = node
104
  node = node.next
105
+ if node is None: break
 
106
 
107
  # 标注节点的行数范围
108
  node = root
109
  n_line = 0
110
  expansion = 2
111
  while True:
112
+ n_l = node.string.count('\n')
113
+ node.range = [n_line-expansion, n_line+n_l+expansion] # 失败时,扭转的范围
114
+ n_line = n_line+n_l
115
  node = node.next
116
+ if node is None: break
 
117
  return root
118
 
119
 
 
128
  """
129
  Add a preserve text area in this paper
130
  e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
131
+ you can mask out (mask = PRESERVE so that text become untouchable for GPT)
132
  everything between "\begin{equation}" and "\end{equation}"
133
  """
134
+ if isinstance(pattern, list): pattern = '|'.join(pattern)
 
135
  pattern_compile = re.compile(pattern, flags)
136
  for res in pattern_compile.finditer(text):
137
+ mask[res.span()[0]:res.span()[1]] = PRESERVE
138
  return text, mask
139
 
 
140
  def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
141
  """
142
  Move area out of preserve area (make text editable for GPT)
143
+ count the number of the braces so as to catch compelete text area.
144
  e.g.
145
+ \begin{abstract} blablablablablabla. \end{abstract}
146
  """
147
+ if isinstance(pattern, list): pattern = '|'.join(pattern)
 
148
  pattern_compile = re.compile(pattern, flags)
149
  for res in pattern_compile.finditer(text):
150
  if not forbid_wrapper:
151
+ mask[res.span()[0]:res.span()[1]] = TRANSFORM
152
  else:
153
+ mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE # '\\begin{abstract}'
154
+ mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM # abstract
155
+ mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE # abstract
156
  return text, mask
157
 
 
158
  def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
159
  """
160
  Add a preserve text area in this paper (text become untouchable for GPT).
161
+ count the number of the braces so as to catch compelete text area.
162
  e.g.
163
+ \caption{blablablablabla\texbf{blablabla}blablabla.}
164
  """
165
  pattern_compile = re.compile(pattern, flags)
166
  for res in pattern_compile.finditer(text):
167
  brace_level = -1
168
  p = begin = end = res.regs[0][0]
169
+ for _ in range(1024*16):
170
+ if text[p] == '}' and brace_level == 0: break
171
+ elif text[p] == '}': brace_level -= 1
172
+ elif text[p] == '{': brace_level += 1
 
 
 
173
  p += 1
174
+ end = p+1
175
  mask[begin:end] = PRESERVE
176
  return text, mask
177
 
178
+ def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True):
 
 
 
179
  """
180
  Move area out of preserve area (make text editable for GPT)
181
+ count the number of the braces so as to catch compelete text area.
182
  e.g.
183
+ \caption{blablablablabla\texbf{blablabla}blablabla.}
184
  """
185
  pattern_compile = re.compile(pattern, flags)
186
  for res in pattern_compile.finditer(text):
187
  brace_level = 0
188
  p = begin = end = res.regs[1][0]
189
+ for _ in range(1024*16):
190
+ if text[p] == '}' and brace_level == 0: break
191
+ elif text[p] == '}': brace_level -= 1
192
+ elif text[p] == '{': brace_level += 1
 
 
 
193
  p += 1
194
  end = p
195
  mask[begin:end] = TRANSFORM
196
  if forbid_wrapper:
197
+ mask[res.regs[0][0]:begin] = PRESERVE
198
+ mask[end:res.regs[0][1]] = PRESERVE
199
  return text, mask
200
 
 
201
  def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
202
  """
203
  Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
204
  Add it to preserve area
205
  """
206
  pattern_compile = re.compile(pattern, flags)
 
207
  def search_with_line_limit(text, mask):
208
  for res in pattern_compile.finditer(text):
209
  cmd = res.group(1) # begin{what}
210
+ this = res.group(2) # content between begin and end
211
+ this_mask = mask[res.regs[2][0]:res.regs[2][1]]
212
+ white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof',
213
+ 'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate']
214
+ if (cmd in white_list) or this.count('\n') >= limit_n_lines: # use a magical number 42
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  this, this_mask = search_with_line_limit(this, this_mask)
216
+ mask[res.regs[2][0]:res.regs[2][1]] = this_mask
217
  else:
218
+ mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE
219
  return text, mask
220
+ return search_with_line_limit(text, mask)
221
 
 
222
 
223
 
224
  """
 
227
  =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
228
  """
229
 
 
230
  def find_main_tex_file(file_manifest, mode):
231
  """
232
  在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。
 
234
  """
235
  canidates = []
236
  for texf in file_manifest:
237
+ if os.path.basename(texf).startswith('merge'):
238
  continue
239
+ with open(texf, 'r', encoding='utf8', errors='ignore') as f:
240
  file_content = f.read()
241
+ if r'\documentclass' in file_content:
242
  canidates.append(texf)
243
  else:
244
  continue
245
 
246
  if len(canidates) == 0:
247
+ raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)')
248
  elif len(canidates) == 1:
249
  return canidates[0]
250
+ else: # if len(canidates) >= 2 通过一些Latex模板中常见(但通常不会出现在正文)的单词,对不同latex源文件扣分,取评分最高者返回
251
  canidates_score = []
252
  # 给出一些判定模板文档的词作为扣分项
253
+ unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
254
+ expected_words = ['\input', '\ref', '\cite']
 
 
 
 
 
 
 
 
 
255
  for texf in canidates:
256
  canidates_score.append(0)
257
+ with open(texf, 'r', encoding='utf8', errors='ignore') as f:
258
  file_content = f.read()
259
  file_content = rm_comments(file_content)
260
  for uw in unexpected_words:
 
263
  for uw in expected_words:
264
  if uw in file_content:
265
  canidates_score[-1] += 1
266
+ select = np.argmax(canidates_score) # 取评分最高者返回
267
  return canidates[select]
268
+
 
269
  def rm_comments(main_file):
270
  new_file_remove_comment_lines = []
271
  for l in main_file.splitlines():
 
274
  pass
275
  else:
276
  new_file_remove_comment_lines.append(l)
277
+ main_file = '\n'.join(new_file_remove_comment_lines)
278
  # main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令
279
+ main_file = re.sub(r'(?<!\\)%.*', '', main_file) # 使用正则表达式查找半行注释, 并替换为空字符串
280
  return main_file
281
 
 
282
  def find_tex_file_ignore_case(fp):
283
  dir_name = os.path.dirname(fp)
284
  base_name = os.path.basename(fp)
285
  # 如果输入的文件路径是正确的
286
+ if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
 
287
  # 如果不正确,试着加上.tex后缀试试
288
+ if not base_name.endswith('.tex'): base_name+='.tex'
289
+ if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
 
 
290
  # 如果还找不到,解除大小写限制,再试一次
291
  import glob
292
+ for f in glob.glob(dir_name+'/*.tex'):
 
293
  base_name_s = os.path.basename(fp)
294
  base_name_f = os.path.basename(f)
295
+ if base_name_s.lower() == base_name_f.lower(): return f
 
296
  # 试着加上.tex后缀试试
297
+ if not base_name_s.endswith('.tex'): base_name_s+='.tex'
298
+ if base_name_s.lower() == base_name_f.lower(): return f
 
 
299
  return None
300
 
 
301
  def merge_tex_files_(project_foler, main_file, mode):
302
  """
303
  Merge Tex project recrusively
 
308
  fp = os.path.join(project_foler, f)
309
  fp_ = find_tex_file_ignore_case(fp)
310
  if fp_:
311
+ with open(fp_, 'r', encoding='utf-8', errors='replace') as fx: c = fx.read()
 
 
 
 
312
  else:
313
+ raise RuntimeError(f'找不到{fp},Tex源文件缺失!')
314
  c = merge_tex_files_(project_foler, c, mode)
315
+ main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
316
  return main_file
317
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  def merge_tex_files(project_foler, main_file, mode):
319
  """
320
  Merge Tex project recrusively
 
324
  main_file = merge_tex_files_(project_foler, main_file, mode)
325
  main_file = rm_comments(main_file)
326
 
327
+ if mode == 'translate_zh':
328
  # find paper documentclass
329
+ pattern = re.compile(r'\\documentclass.*\n')
330
  match = pattern.search(main_file)
331
  assert match is not None, "Cannot find documentclass statement!"
332
  position = match.end()
333
+ add_ctex = '\\usepackage{ctex}\n'
334
+ add_url = '\\usepackage{url}\n' if '{url}' not in main_file else ''
335
  main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
336
  # fontset=windows
337
  import platform
338
+ main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
339
+ main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
 
 
 
 
 
 
 
 
 
340
  # find paper abstract
341
+ pattern_opt1 = re.compile(r'\\begin\{abstract\}.*\n')
342
  pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
343
  match_opt1 = pattern_opt1.search(main_file)
344
  match_opt2 = pattern_opt2.search(main_file)
345
+ assert (match_opt1 is not None) or (match_opt2 is not None), "Cannot find paper abstract section!"
 
 
 
 
 
 
 
346
  return main_file
347
 
348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  """
350
  =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
351
  Post process
352
  =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
353
  """
 
 
354
  def mod_inbraket(match):
355
  """
356
+ 为啥chatgpt会把cite里面的逗号换成中文逗号呀
357
  """
358
  # get the matched string
359
  cmd = match.group(1)
360
  str_to_modify = match.group(2)
361
  # modify the matched string
362
+ str_to_modify = str_to_modify.replace('', ':') # 前面是中文冒号,后面是英文冒号
363
+ str_to_modify = str_to_modify.replace('', ',') # 前面是中文逗号,后面是英文逗号
364
  # str_to_modify = 'BOOM'
365
  return "\\" + cmd + "{" + str_to_modify + "}"
366
 
 
367
  def fix_content(final_tex, node_string):
368
  """
369
  Fix common GPT errors to increase success rate
 
374
  final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
375
 
376
  if "Traceback" in final_tex and "[Local Message]" in final_tex:
377
+ final_tex = node_string # 出问题了,还原原文
378
+ if node_string.count('\\begin') != final_tex.count('\\begin'):
379
+ final_tex = node_string # 出问题了,还原原文
380
+ if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
381
  # walk and replace any _ without \
382
  final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
383
 
 
385
  # this function count the number of { and }
386
  brace_level = 0
387
  for c in string:
388
+ if c == "{": brace_level += 1
389
+ elif c == "}": brace_level -= 1
 
 
390
  return brace_level
 
391
  def join_most(tex_t, tex_o):
392
  # this function join translated string and original string when something goes wrong
393
  p_t = 0
394
  p_o = 0
 
395
  def find_next(string, chars, begin):
396
  p = begin
397
  while p < len(string):
398
+ if string[p] in chars: return p, string[p]
 
399
  p += 1
400
  return None, None
 
401
  while True:
402
+ res1, char = find_next(tex_o, ['{','}'], p_o)
403
+ if res1 is None: break
 
404
  res2, char = find_next(tex_t, [char], p_t)
405
+ if res2 is None: break
 
406
  p_o = res1 + 1
407
  p_t = res2 + 1
408
  return tex_t[:p_t] + tex_o[p_o:]
 
411
  # 出问题了,还原部分原文,保证括号正确
412
  final_tex = join_most(final_tex, node_string)
413
  return final_tex
414
+
 
415
  def compile_latex_with_timeout(command, cwd, timeout=60):
416
  import subprocess
417
+ process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
 
 
 
418
  try:
419
  stdout, stderr = process.communicate(timeout=timeout)
420
  except subprocess.TimeoutExpired:
 
425
  return True
426
 
427
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
 
429
+ def merge_pdfs(pdf1_path, pdf2_path, output_path):
430
+ import PyPDF2
431
  Percent = 0.95
 
432
  # Open the first PDF file
433
+ with open(pdf1_path, 'rb') as pdf1_file:
434
  pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)
435
  # Open the second PDF file
436
+ with open(pdf2_path, 'rb') as pdf2_file:
437
  pdf2_reader = PyPDF2.PdfFileReader(pdf2_file)
438
  # Create a new PDF file to store the merged pages
439
  output_writer = PyPDF2.PdfFileWriter()
 
453
  page2 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
454
  # Create a new empty page with double width
455
  new_page = PyPDF2.PageObject.createBlankPage(
456
+ width = int(int(page1.mediaBox.getWidth()) + int(page2.mediaBox.getWidth()) * Percent),
457
+ height = max(page1.mediaBox.getHeight(), page2.mediaBox.getHeight())
 
 
 
458
  )
459
  new_page.mergeTranslatedPage(page1, 0, 0)
460
+ new_page.mergeTranslatedPage(page2, int(int(page1.mediaBox.getWidth())-int(page2.mediaBox.getWidth())* (1-Percent)), 0)
 
 
 
 
 
 
 
461
  output_writer.addPage(new_page)
462
  # Save the merged PDF file
463
+ with open(output_path, 'wb') as output_file:
464
  output_writer.write(output_file)
 
 
 
crazy_functions/live_audio/aliyunASR.py CHANGED
@@ -1,106 +1,4 @@
1
- import time, logging, json, sys, struct
2
- import numpy as np
3
- from scipy.io.wavfile import WAVE_FORMAT
4
-
5
- def write_numpy_to_wave(filename, rate, data, add_header=False):
6
- """
7
- Write a NumPy array as a WAV file.
8
- """
9
- def _array_tofile(fid, data):
10
- # ravel gives a c-contiguous buffer
11
- fid.write(data.ravel().view('b').data)
12
-
13
- if hasattr(filename, 'write'):
14
- fid = filename
15
- else:
16
- fid = open(filename, 'wb')
17
-
18
- fs = rate
19
-
20
- try:
21
- dkind = data.dtype.kind
22
- if not (dkind == 'i' or dkind == 'f' or (dkind == 'u' and
23
- data.dtype.itemsize == 1)):
24
- raise ValueError("Unsupported data type '%s'" % data.dtype)
25
-
26
- header_data = b''
27
-
28
- header_data += b'RIFF'
29
- header_data += b'\x00\x00\x00\x00'
30
- header_data += b'WAVE'
31
-
32
- # fmt chunk
33
- header_data += b'fmt '
34
- if dkind == 'f':
35
- format_tag = WAVE_FORMAT.IEEE_FLOAT
36
- else:
37
- format_tag = WAVE_FORMAT.PCM
38
- if data.ndim == 1:
39
- channels = 1
40
- else:
41
- channels = data.shape[1]
42
- bit_depth = data.dtype.itemsize * 8
43
- bytes_per_second = fs*(bit_depth // 8)*channels
44
- block_align = channels * (bit_depth // 8)
45
-
46
- fmt_chunk_data = struct.pack('<HHIIHH', format_tag, channels, fs,
47
- bytes_per_second, block_align, bit_depth)
48
- if not (dkind == 'i' or dkind == 'u'):
49
- # add cbSize field for non-PCM files
50
- fmt_chunk_data += b'\x00\x00'
51
-
52
- header_data += struct.pack('<I', len(fmt_chunk_data))
53
- header_data += fmt_chunk_data
54
-
55
- # fact chunk (non-PCM files)
56
- if not (dkind == 'i' or dkind == 'u'):
57
- header_data += b'fact'
58
- header_data += struct.pack('<II', 4, data.shape[0])
59
-
60
- # check data size (needs to be immediately before the data chunk)
61
- if ((len(header_data)-4-4) + (4+4+data.nbytes)) > 0xFFFFFFFF:
62
- raise ValueError("Data exceeds wave file size limit")
63
- if add_header:
64
- fid.write(header_data)
65
- # data chunk
66
- fid.write(b'data')
67
- fid.write(struct.pack('<I', data.nbytes))
68
- if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and
69
- sys.byteorder == 'big'):
70
- data = data.byteswap()
71
- _array_tofile(fid, data)
72
-
73
- if add_header:
74
- # Determine file size and place it in correct
75
- # position at start of the file.
76
- size = fid.tell()
77
- fid.seek(4)
78
- fid.write(struct.pack('<I', size-8))
79
-
80
- finally:
81
- if not hasattr(filename, 'write'):
82
- fid.close()
83
- else:
84
- fid.seek(0)
85
-
86
- def is_speaker_speaking(vad, data, sample_rate):
87
- # Function to detect if the speaker is speaking
88
- # The WebRTC VAD only accepts 16-bit mono PCM audio,
89
- # sampled at 8000, 16000, 32000 or 48000 Hz.
90
- # A frame must be either 10, 20, or 30 ms in duration:
91
- frame_duration = 30
92
- n_bit_each = int(sample_rate * frame_duration / 1000)*2 # x2 because audio is 16 bit (2 bytes)
93
- res_list = []
94
- for t in range(len(data)):
95
- if t!=0 and t % n_bit_each == 0:
96
- res_list.append(vad.is_speech(data[t-n_bit_each:t], sample_rate))
97
-
98
- info = ''.join(['^' if r else '.' for r in res_list])
99
- info = info[:10]
100
- if any(res_list):
101
- return True, info
102
- else:
103
- return False, info
104
 
105
 
106
  class AliyunASR():
@@ -168,22 +66,12 @@ class AliyunASR():
168
  on_close=self.test_on_close,
169
  callback_args=[uuid.hex]
170
  )
171
- timeout_limit_second = 20
172
  r = sr.start(aformat="pcm",
173
- timeout=timeout_limit_second,
174
  enable_intermediate_result=True,
175
  enable_punctuation_prediction=True,
176
  enable_inverse_text_normalization=True)
177
 
178
- import webrtcvad
179
- vad = webrtcvad.Vad()
180
- vad.set_mode(1)
181
-
182
- is_previous_frame_transmitted = False # 上一帧是否有人说话
183
- previous_frame_data = None
184
- echo_cnt = 0 # 在没有声音之后,继续向服务器发送n次音频数据
185
- echo_cnt_max = 4 # 在没有声音之后,继续向服务器发送n次音频数据
186
- keep_alive_last_send_time = time.time()
187
  while not self.stop:
188
  # time.sleep(self.capture_interval)
189
  audio = rad.read(uuid.hex)
@@ -191,32 +79,12 @@ class AliyunASR():
191
  # convert to pcm file
192
  temp_file = f'{temp_folder}/{uuid.hex}.pcm' #
193
  dsdata = change_sample_rate(audio, rad.rate, NEW_SAMPLERATE) # 48000 --> 16000
194
- write_numpy_to_wave(temp_file, NEW_SAMPLERATE, dsdata)
195
  # read pcm binary
196
  with open(temp_file, "rb") as f: data = f.read()
197
- is_speaking, info = is_speaker_speaking(vad, data, NEW_SAMPLERATE)
198
-
199
- if is_speaking or echo_cnt > 0:
200
- # 如果话筒激活 / 如果处于回声收尾阶段
201
- echo_cnt -= 1
202
- if not is_previous_frame_transmitted: # 上一帧没有人声,但是我们把上一帧同样加上
203
- if previous_frame_data is not None: data = previous_frame_data + data
204
- if is_speaking:
205
- echo_cnt = echo_cnt_max
206
- slices = zip(*(iter(data),) * 640) # 640个字节为一组
207
- for i in slices: sr.send_audio(bytes(i))
208
- keep_alive_last_send_time = time.time()
209
- is_previous_frame_transmitted = True
210
- else:
211
- is_previous_frame_transmitted = False
212
- echo_cnt = 0
213
- # 保持链接激活,即使没有声音,也根据时间间隔,发送一些音频片段给服务器
214
- if time.time() - keep_alive_last_send_time > timeout_limit_second/2:
215
- slices = zip(*(iter(data),) * 640) # 640个字节为一组
216
- for i in slices: sr.send_audio(bytes(i))
217
- keep_alive_last_send_time = time.time()
218
- is_previous_frame_transmitted = True
219
- self.audio_shape = info
220
  else:
221
  time.sleep(0.1)
222
 
 
1
+ import time, logging, json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
 
4
  class AliyunASR():
 
66
  on_close=self.test_on_close,
67
  callback_args=[uuid.hex]
68
  )
69
+
70
  r = sr.start(aformat="pcm",
 
71
  enable_intermediate_result=True,
72
  enable_punctuation_prediction=True,
73
  enable_inverse_text_normalization=True)
74
 
 
 
 
 
 
 
 
 
 
75
  while not self.stop:
76
  # time.sleep(self.capture_interval)
77
  audio = rad.read(uuid.hex)
 
79
  # convert to pcm file
80
  temp_file = f'{temp_folder}/{uuid.hex}.pcm' #
81
  dsdata = change_sample_rate(audio, rad.rate, NEW_SAMPLERATE) # 48000 --> 16000
82
+ io.wavfile.write(temp_file, NEW_SAMPLERATE, dsdata)
83
  # read pcm binary
84
  with open(temp_file, "rb") as f: data = f.read()
85
+ # print('audio len:', len(audio), '\t ds len:', len(dsdata), '\t need n send:', len(data)//640)
86
+ slices = zip(*(iter(data),) * 640) # 640个字节为一组
87
+ for i in slices: sr.send_audio(bytes(i))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  else:
89
  time.sleep(0.1)
90
 
crazy_functions/live_audio/audio_io.py CHANGED
@@ -35,7 +35,7 @@ class RealtimeAudioDistribution():
35
  def read(self, uuid):
36
  if uuid in self.data:
37
  res = self.data.pop(uuid)
38
- # print('\r read-', len(res), '-', max(res), end='', flush=True)
39
  else:
40
  res = None
41
  return res
 
35
  def read(self, uuid):
36
  if uuid in self.data:
37
  res = self.data.pop(uuid)
38
+ print('\r read-', len(res), '-', max(res), end='', flush=True)
39
  else:
40
  res = None
41
  return res
crazy_functions/multi_stage/multi_stage_utils.py DELETED
@@ -1,93 +0,0 @@
1
- from pydantic import BaseModel, Field
2
- from typing import List
3
- from toolbox import update_ui_lastest_msg, disable_auto_promotion
4
- from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log_folder
5
- from request_llms.bridge_all import predict_no_ui_long_connection
6
- from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
7
- import time
8
- import pickle
9
-
10
- def have_any_recent_upload_files(chatbot):
11
- _5min = 5 * 60
12
- if not chatbot: return False # chatbot is None
13
- most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
14
- if not most_recent_uploaded: return False # most_recent_uploaded is None
15
- if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
16
- else: return False # most_recent_uploaded is too old
17
-
18
- class GptAcademicState():
19
- def __init__(self):
20
- self.reset()
21
-
22
- def reset(self):
23
- pass
24
-
25
- def dump_state(self, chatbot):
26
- chatbot._cookies['plugin_state'] = pickle.dumps(self)
27
-
28
- def set_state(self, chatbot, key, value):
29
- setattr(self, key, value)
30
- chatbot._cookies['plugin_state'] = pickle.dumps(self)
31
-
32
- def get_state(chatbot, cls=None):
33
- state = chatbot._cookies.get('plugin_state', None)
34
- if state is not None: state = pickle.loads(state)
35
- elif cls is not None: state = cls()
36
- else: state = GptAcademicState()
37
- state.chatbot = chatbot
38
- return state
39
-
40
-
41
- class GptAcademicGameBaseState():
42
- """
43
- 1. first init: __init__ ->
44
- """
45
- def init_game(self, chatbot, lock_plugin):
46
- self.plugin_name = None
47
- self.callback_fn = None
48
- self.delete_game = False
49
- self.step_cnt = 0
50
-
51
- def lock_plugin(self, chatbot):
52
- if self.callback_fn is None:
53
- raise ValueError("callback_fn is None")
54
- chatbot._cookies['lock_plugin'] = self.callback_fn
55
- self.dump_state(chatbot)
56
-
57
- def get_plugin_name(self):
58
- if self.plugin_name is None:
59
- raise ValueError("plugin_name is None")
60
- return self.plugin_name
61
-
62
- def dump_state(self, chatbot):
63
- chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = pickle.dumps(self)
64
-
65
- def set_state(self, chatbot, key, value):
66
- setattr(self, key, value)
67
- chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = pickle.dumps(self)
68
-
69
- @staticmethod
70
- def sync_state(chatbot, llm_kwargs, cls, plugin_name, callback_fn, lock_plugin=True):
71
- state = chatbot._cookies.get(f'plugin_state/{plugin_name}', None)
72
- if state is not None:
73
- state = pickle.loads(state)
74
- else:
75
- state = cls()
76
- state.init_game(chatbot, lock_plugin)
77
- state.plugin_name = plugin_name
78
- state.llm_kwargs = llm_kwargs
79
- state.chatbot = chatbot
80
- state.callback_fn = callback_fn
81
- return state
82
-
83
- def continue_game(self, prompt, chatbot, history):
84
- # 游戏主体
85
- yield from self.step(prompt, chatbot, history)
86
- self.step_cnt += 1
87
- # 保存状态,收尾
88
- self.dump_state(chatbot)
89
- # 如果游戏结束,清理
90
- if self.delete_game:
91
- chatbot._cookies['lock_plugin'] = None
92
- chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = None
93
- yield from update_ui(chatbot=chatbot, history=history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/pdf_fns/breakdown_txt.py DELETED
@@ -1,125 +0,0 @@
1
- from crazy_functions.ipc_fns.mp import run_in_subprocess_with_timeout
2
-
3
- def force_breakdown(txt, limit, get_token_fn):
4
- """ 当无法用标点、空行分割时,我们用最暴力的方法切割
5
- """
6
- for i in reversed(range(len(txt))):
7
- if get_token_fn(txt[:i]) < limit:
8
- return txt[:i], txt[i:]
9
- return "Tiktoken未知错误", "Tiktoken未知错误"
10
-
11
-
12
- def maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage):
13
- """ 为了加速计算,我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时, 我们把 _max 后的文字转存至 remain_txt_to_cut_storage
14
- 当 remain_txt_to_cut < `_min` 时,我们再把 remain_txt_to_cut_storage 中的部分文字取出
15
- """
16
- _min = int(5e4)
17
- _max = int(1e5)
18
- # print(len(remain_txt_to_cut), len(remain_txt_to_cut_storage))
19
- if len(remain_txt_to_cut) < _min and len(remain_txt_to_cut_storage) > 0:
20
- remain_txt_to_cut = remain_txt_to_cut + remain_txt_to_cut_storage
21
- remain_txt_to_cut_storage = ""
22
- if len(remain_txt_to_cut) > _max:
23
- remain_txt_to_cut_storage = remain_txt_to_cut[_max:] + remain_txt_to_cut_storage
24
- remain_txt_to_cut = remain_txt_to_cut[:_max]
25
- return remain_txt_to_cut, remain_txt_to_cut_storage
26
-
27
-
28
- def cut(limit, get_token_fn, txt_tocut, must_break_at_empty_line, break_anyway=False):
29
- """ 文本切分
30
- """
31
- res = []
32
- total_len = len(txt_tocut)
33
- fin_len = 0
34
- remain_txt_to_cut = txt_tocut
35
- remain_txt_to_cut_storage = ""
36
- # 为了加速计算,我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时, 我们把 _max 后的文字转存至 remain_txt_to_cut_storage
37
- remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage)
38
-
39
- while True:
40
- if get_token_fn(remain_txt_to_cut) <= limit:
41
- # 如果剩余文本的token数小于限制,那么就不用切了
42
- res.append(remain_txt_to_cut); fin_len+=len(remain_txt_to_cut)
43
- break
44
- else:
45
- # 如果剩余文本的token数大于限制,那么就切
46
- lines = remain_txt_to_cut.split('\n')
47
-
48
- # 估计一个切分点
49
- estimated_line_cut = limit / get_token_fn(remain_txt_to_cut) * len(lines)
50
- estimated_line_cut = int(estimated_line_cut)
51
-
52
- # 开始查找合适切分点的偏移(cnt)
53
- cnt = 0
54
- for cnt in reversed(range(estimated_line_cut)):
55
- if must_break_at_empty_line:
56
- # 首先尝试用双空行(\n\n)作为切分点
57
- if lines[cnt] != "":
58
- continue
59
- prev = "\n".join(lines[:cnt])
60
- post = "\n".join(lines[cnt:])
61
- if get_token_fn(prev) < limit:
62
- break
63
-
64
- if cnt == 0:
65
- # 如果没有找到合适的切分点
66
- if break_anyway:
67
- # 是否允许暴力切分
68
- prev, post = force_breakdown(remain_txt_to_cut, limit, get_token_fn)
69
- else:
70
- # 不允许直接报错
71
- raise RuntimeError(f"存在一行极长的文本!{remain_txt_to_cut}")
72
-
73
- # 追加列表
74
- res.append(prev); fin_len+=len(prev)
75
- # 准备下一次迭代
76
- remain_txt_to_cut = post
77
- remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage)
78
- process = fin_len/total_len
79
- print(f'正在文本切分 {int(process*100)}%')
80
- if len(remain_txt_to_cut.strip()) == 0:
81
- break
82
- return res
83
-
84
-
85
- def breakdown_text_to_satisfy_token_limit_(txt, limit, llm_model="gpt-3.5-turbo"):
86
- """ 使用多种方式尝试切分文本,以满足 token 限制
87
- """
88
- from request_llms.bridge_all import model_info
89
- enc = model_info[llm_model]['tokenizer']
90
- def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=()))
91
- try:
92
- # 第1次尝试,将双空行(\n\n)作为切分点
93
- return cut(limit, get_token_fn, txt, must_break_at_empty_line=True)
94
- except RuntimeError:
95
- try:
96
- # 第2次尝试,将单空行(\n)作为切分点
97
- return cut(limit, get_token_fn, txt, must_break_at_empty_line=False)
98
- except RuntimeError:
99
- try:
100
- # 第3次尝试,将英文句号(.)作为切分点
101
- res = cut(limit, get_token_fn, txt.replace('.', '。\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在
102
- return [r.replace('。\n', '.') for r in res]
103
- except RuntimeError as e:
104
- try:
105
- # 第4次尝试,将中文句号(。)作为切分点
106
- res = cut(limit, get_token_fn, txt.replace('。', '。。\n'), must_break_at_empty_line=False)
107
- return [r.replace('。。\n', '。') for r in res]
108
- except RuntimeError as e:
109
- # 第5次尝试,没办法了,随便切一下吧
110
- return cut(limit, get_token_fn, txt, must_break_at_empty_line=False, break_anyway=True)
111
-
112
- breakdown_text_to_satisfy_token_limit = run_in_subprocess_with_timeout(breakdown_text_to_satisfy_token_limit_, timeout=60)
113
-
114
- if __name__ == '__main__':
115
- from crazy_functions.crazy_utils import read_and_clean_pdf_text
116
- file_content, page_one = read_and_clean_pdf_text("build/assets/at.pdf")
117
-
118
- from request_llms.bridge_all import model_info
119
- for i in range(5):
120
- file_content += file_content
121
-
122
- print(len(file_content))
123
- TOKEN_LIMIT_PER_FRAGMENT = 2500
124
- res = breakdown_text_to_satisfy_token_limit(file_content, TOKEN_LIMIT_PER_FRAGMENT)
125
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/pdf_fns/parse_pdf.py CHANGED
@@ -1,26 +1,16 @@
1
- from functools import lru_cache
2
- from toolbox import gen_time_str
3
- from toolbox import promote_file_to_downloadzone
4
- from toolbox import write_history_to_file, promote_file_to_downloadzone
5
- from toolbox import get_conf
6
- from toolbox import ProxyNetworkActivate
7
- from colorful import *
8
  import requests
9
  import random
10
- import copy
11
- import os
12
- import math
13
-
14
  class GROBID_OFFLINE_EXCEPTION(Exception): pass
15
 
16
  def get_avail_grobid_url():
17
- GROBID_URLS = get_conf('GROBID_URLS')
 
18
  if len(GROBID_URLS) == 0: return None
19
  try:
20
  _grobid_url = random.choice(GROBID_URLS) # 随机负载均衡
21
  if _grobid_url.endswith('/'): _grobid_url = _grobid_url.rstrip('/')
22
- with ProxyNetworkActivate('Connect_Grobid'):
23
- res = requests.get(_grobid_url+'/api/isalive')
24
  if res.text=='true': return _grobid_url
25
  else: return None
26
  except:
@@ -31,141 +21,10 @@ def parse_pdf(pdf_path, grobid_url):
31
  import scipdf # pip install scipdf_parser
32
  if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/')
33
  try:
34
- with ProxyNetworkActivate('Connect_Grobid'):
35
- article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url)
36
  except GROBID_OFFLINE_EXCEPTION:
37
  raise GROBID_OFFLINE_EXCEPTION("GROBID服务不可用,请修改config中的GROBID_URL,可修改成本地GROBID服务。")
38
  except:
39
  raise RuntimeError("解析PDF失败,请检查PDF是否损坏。")
40
  return article_dict
41
 
42
-
43
- def produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chatbot, fp, generated_conclusion_files):
44
- # -=-=-=-=-=-=-=-= 写出第1个文件:翻译前后混合 -=-=-=-=-=-=-=-=
45
- res_path = write_history_to_file(meta + ["# Meta Translation" , paper_meta_info] + gpt_response_collection, file_basename=f"{gen_time_str()}translated_and_original.md", file_fullname=None)
46
- promote_file_to_downloadzone(res_path, rename_file=os.path.basename(res_path)+'.md', chatbot=chatbot)
47
- generated_conclusion_files.append(res_path)
48
-
49
- # -=-=-=-=-=-=-=-= 写出第2个文件:仅翻译后的文本 -=-=-=-=-=-=-=-=
50
- translated_res_array = []
51
- # 记录当前的大章节标题:
52
- last_section_name = ""
53
- for index, value in enumerate(gpt_response_collection):
54
- # 先挑选偶数序列号:
55
- if index % 2 != 0:
56
- # 先提取当前英文标题:
57
- cur_section_name = gpt_response_collection[index-1].split('\n')[0].split(" Part")[0]
58
- # 如果index是1的话,则直接使用first section name:
59
- if cur_section_name != last_section_name:
60
- cur_value = cur_section_name + '\n'
61
- last_section_name = copy.deepcopy(cur_section_name)
62
- else:
63
- cur_value = ""
64
- # 再做一个小修改:重新修改当前part的标题,默认用英文的
65
- cur_value += value
66
- translated_res_array.append(cur_value)
67
- res_path = write_history_to_file(meta + ["# Meta Translation" , paper_meta_info] + translated_res_array,
68
- file_basename = f"{gen_time_str()}-translated_only.md",
69
- file_fullname = None,
70
- auto_caption = False)
71
- promote_file_to_downloadzone(res_path, rename_file=os.path.basename(res_path)+'.md', chatbot=chatbot)
72
- generated_conclusion_files.append(res_path)
73
- return res_path
74
-
75
- def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_files, TOKEN_LIMIT_PER_FRAGMENT, DST_LANG):
76
- from crazy_functions.pdf_fns.report_gen_html import construct_html
77
- from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
78
- from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
79
- from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
80
-
81
- prompt = "以下是一篇学术论文的基本信息:\n"
82
- # title
83
- title = article_dict.get('title', '无法获取 title'); prompt += f'title:{title}\n\n'
84
- # authors
85
- authors = article_dict.get('authors', '无法获取 authors')[:100]; prompt += f'authors:{authors}\n\n'
86
- # abstract
87
- abstract = article_dict.get('abstract', '无法获取 abstract'); prompt += f'abstract:{abstract}\n\n'
88
- # command
89
- prompt += f"请将题目和摘要翻译为{DST_LANG}。"
90
- meta = [f'# Title:\n\n', title, f'# Abstract:\n\n', abstract ]
91
-
92
- # 单线,获取文章meta信息
93
- paper_meta_info = yield from request_gpt_model_in_new_thread_with_ui_alive(
94
- inputs=prompt,
95
- inputs_show_user=prompt,
96
- llm_kwargs=llm_kwargs,
97
- chatbot=chatbot, history=[],
98
- sys_prompt="You are an academic paper reader。",
99
- )
100
-
101
- # 多线,翻译
102
- inputs_array = []
103
- inputs_show_user_array = []
104
-
105
- # get_token_num
106
- from request_llms.bridge_all import model_info
107
- enc = model_info[llm_kwargs['llm_model']]['tokenizer']
108
- def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
109
-
110
- def break_down(txt):
111
- raw_token_num = get_token_num(txt)
112
- if raw_token_num <= TOKEN_LIMIT_PER_FRAGMENT:
113
- return [txt]
114
- else:
115
- # raw_token_num > TOKEN_LIMIT_PER_FRAGMENT
116
- # find a smooth token limit to achieve even seperation
117
- count = int(math.ceil(raw_token_num / TOKEN_LIMIT_PER_FRAGMENT))
118
- token_limit_smooth = raw_token_num // count + count
119
- return breakdown_text_to_satisfy_token_limit(txt, limit=token_limit_smooth, llm_model=llm_kwargs['llm_model'])
120
-
121
- for section in article_dict.get('sections'):
122
- if len(section['text']) == 0: continue
123
- section_frags = break_down(section['text'])
124
- for i, fragment in enumerate(section_frags):
125
- heading = section['heading']
126
- if len(section_frags) > 1: heading += f' Part-{i+1}'
127
- inputs_array.append(
128
- f"你需要翻译{heading}章节,内容如下: \n\n{fragment}"
129
- )
130
- inputs_show_user_array.append(
131
- f"# {heading}\n\n{fragment}"
132
- )
133
-
134
- gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
135
- inputs_array=inputs_array,
136
- inputs_show_user_array=inputs_show_user_array,
137
- llm_kwargs=llm_kwargs,
138
- chatbot=chatbot,
139
- history_array=[meta for _ in inputs_array],
140
- sys_prompt_array=[
141
- "请你作为一个学术翻译,负责把学术论文准确翻译成中文。注意文章中的每一句话都要翻译。" for _ in inputs_array],
142
- )
143
- # -=-=-=-=-=-=-=-= 写出Markdown文件 -=-=-=-=-=-=-=-=
144
- produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chatbot, fp, generated_conclusion_files)
145
-
146
- # -=-=-=-=-=-=-=-= 写出HTML文件 -=-=-=-=-=-=-=-=
147
- ch = construct_html()
148
- orig = ""
149
- trans = ""
150
- gpt_response_collection_html = copy.deepcopy(gpt_response_collection)
151
- for i,k in enumerate(gpt_response_collection_html):
152
- if i%2==0:
153
- gpt_response_collection_html[i] = inputs_show_user_array[i//2]
154
- else:
155
- # 先提取当前英文标题:
156
- cur_section_name = gpt_response_collection[i-1].split('\n')[0].split(" Part")[0]
157
- cur_value = cur_section_name + "\n" + gpt_response_collection_html[i]
158
- gpt_response_collection_html[i] = cur_value
159
-
160
- final = ["", "", "一、论文概况", "", "Abstract", paper_meta_info, "二、论文翻译", ""]
161
- final.extend(gpt_response_collection_html)
162
- for i, k in enumerate(final):
163
- if i%2==0:
164
- orig = k
165
- if i%2==1:
166
- trans = k
167
- ch.add_row(a=orig, b=trans)
168
- create_report_file_name = f"{os.path.basename(fp)}.trans.html"
169
- html_file = ch.save_file(create_report_file_name)
170
- generated_conclusion_files.append(html_file)
171
- promote_file_to_downloadzone(html_file, rename_file=os.path.basename(html_file), chatbot=chatbot)
 
 
 
 
 
 
 
 
1
  import requests
2
  import random
3
+ from functools import lru_cache
 
 
 
4
  class GROBID_OFFLINE_EXCEPTION(Exception): pass
5
 
6
  def get_avail_grobid_url():
7
+ from toolbox import get_conf
8
+ GROBID_URLS, = get_conf('GROBID_URLS')
9
  if len(GROBID_URLS) == 0: return None
10
  try:
11
  _grobid_url = random.choice(GROBID_URLS) # 随机负载均衡
12
  if _grobid_url.endswith('/'): _grobid_url = _grobid_url.rstrip('/')
13
+ res = requests.get(_grobid_url+'/api/isalive')
 
14
  if res.text=='true': return _grobid_url
15
  else: return None
16
  except:
 
21
  import scipdf # pip install scipdf_parser
22
  if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/')
23
  try:
24
+ article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url)
 
25
  except GROBID_OFFLINE_EXCEPTION:
26
  raise GROBID_OFFLINE_EXCEPTION("GROBID服务不可用,请修改config中的GROBID_URL,可修改成本地GROBID服务。")
27
  except:
28
  raise RuntimeError("解析PDF失败,请检查PDF是否损坏。")
29
  return article_dict
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/pdf_fns/parse_word.py DELETED
@@ -1,85 +0,0 @@
1
- from crazy_functions.crazy_utils import read_and_clean_pdf_text, get_files_from_everything
2
- import os
3
- import re
4
- def extract_text_from_files(txt, chatbot, history):
5
- """
6
- 查找pdf/md/word并获取文本内容并返回状态以及文本
7
-
8
- 输入参数 Args:
9
- chatbot: chatbot inputs and outputs (用户界面对话窗口句柄,用于数据流可视化)
10
- history (list): List of chat history (历史,对话历史列表)
11
-
12
- 输出 Returns:
13
- 文件是否存在(bool)
14
- final_result(list):文本内容
15
- page_one(list):第一页内容/摘要
16
- file_manifest(list):文件路径
17
- excption(string):需要用户手动处理的信息,如没出错则保持为空
18
- """
19
-
20
- final_result = []
21
- page_one = []
22
- file_manifest = []
23
- excption = ""
24
-
25
- if txt == "":
26
- final_result.append(txt)
27
- return False, final_result, page_one, file_manifest, excption #如输入区内容不是文件则直接返回输入区内容
28
-
29
- #查找输入区内容中的文件
30
- file_pdf,pdf_manifest,folder_pdf = get_files_from_everything(txt, '.pdf')
31
- file_md,md_manifest,folder_md = get_files_from_everything(txt, '.md')
32
- file_word,word_manifest,folder_word = get_files_from_everything(txt, '.docx')
33
- file_doc,doc_manifest,folder_doc = get_files_from_everything(txt, '.doc')
34
-
35
- if file_doc:
36
- excption = "word"
37
- return False, final_result, page_one, file_manifest, excption
38
-
39
- file_num = len(pdf_manifest) + len(md_manifest) + len(word_manifest)
40
- if file_num == 0:
41
- final_result.append(txt)
42
- return False, final_result, page_one, file_manifest, excption #如输入区内容不是文件则直接返回输入区内容
43
-
44
- if file_pdf:
45
- try: # 尝试导入依赖,如果缺少依赖,则给出安装建议
46
- import fitz
47
- except:
48
- excption = "pdf"
49
- return False, final_result, page_one, file_manifest, excption
50
- for index, fp in enumerate(pdf_manifest):
51
- file_content, pdf_one = read_and_clean_pdf_text(fp) # (尝试)按照章节切割PDF
52
- file_content = file_content.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
53
- pdf_one = str(pdf_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
54
- final_result.append(file_content)
55
- page_one.append(pdf_one)
56
- file_manifest.append(os.path.relpath(fp, folder_pdf))
57
-
58
- if file_md:
59
- for index, fp in enumerate(md_manifest):
60
- with open(fp, 'r', encoding='utf-8', errors='replace') as f:
61
- file_content = f.read()
62
- file_content = file_content.encode('utf-8', 'ignore').decode()
63
- headers = re.findall(r'^#\s(.*)$', file_content, re.MULTILINE) #接下来提取md中的一级/二级标题作为摘要
64
- if len(headers) > 0:
65
- page_one.append("\n".join(headers)) #合并所有的标题,以换行符分割
66
- else:
67
- page_one.append("")
68
- final_result.append(file_content)
69
- file_manifest.append(os.path.relpath(fp, folder_md))
70
-
71
- if file_word:
72
- try: # 尝试导入依赖,如果缺少依赖,则给出安装建议
73
- from docx import Document
74
- except:
75
- excption = "word_pip"
76
- return False, final_result, page_one, file_manifest, excption
77
- for index, fp in enumerate(word_manifest):
78
- doc = Document(fp)
79
- file_content = '\n'.join([p.text for p in doc.paragraphs])
80
- file_content = file_content.encode('utf-8', 'ignore').decode()
81
- page_one.append(file_content[:200])
82
- final_result.append(file_content)
83
- file_manifest.append(os.path.relpath(fp, folder_word))
84
-
85
- return True, final_result, page_one, file_manifest, excption
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/pdf_fns/report_gen_html.py DELETED
@@ -1,58 +0,0 @@
1
- from toolbox import update_ui, get_conf, trimmed_format_exc, get_log_folder
2
- import os
3
-
4
-
5
-
6
-
7
- class construct_html():
8
- def __init__(self) -> None:
9
- self.html_string = ""
10
-
11
- def add_row(self, a, b):
12
- from toolbox import markdown_convertion
13
- template = """
14
- {
15
- primary_col: {
16
- header: String.raw`__PRIMARY_HEADER__`,
17
- msg: String.raw`__PRIMARY_MSG__`,
18
- },
19
- secondary_rol: {
20
- header: String.raw`__SECONDARY_HEADER__`,
21
- msg: String.raw`__SECONDARY_MSG__`,
22
- }
23
- },
24
- """
25
- def std(str):
26
- str = str.replace(r'`',r'&#96;')
27
- if str.endswith("\\"): str += ' '
28
- if str.endswith("}"): str += ' '
29
- if str.endswith("$"): str += ' '
30
- return str
31
-
32
- template_ = template
33
- a_lines = a.split('\n')
34
- b_lines = b.split('\n')
35
-
36
- if len(a_lines) == 1 or len(a_lines[0]) > 50:
37
- template_ = template_.replace("__PRIMARY_HEADER__", std(a[:20]))
38
- template_ = template_.replace("__PRIMARY_MSG__", std(markdown_convertion(a)))
39
- else:
40
- template_ = template_.replace("__PRIMARY_HEADER__", std(a_lines[0]))
41
- template_ = template_.replace("__PRIMARY_MSG__", std(markdown_convertion('\n'.join(a_lines[1:]))))
42
-
43
- if len(b_lines) == 1 or len(b_lines[0]) > 50:
44
- template_ = template_.replace("__SECONDARY_HEADER__", std(b[:20]))
45
- template_ = template_.replace("__SECONDARY_MSG__", std(markdown_convertion(b)))
46
- else:
47
- template_ = template_.replace("__SECONDARY_HEADER__", std(b_lines[0]))
48
- template_ = template_.replace("__SECONDARY_MSG__", std(markdown_convertion('\n'.join(b_lines[1:]))))
49
- self.html_string += template_
50
-
51
- def save_file(self, file_name):
52
- from toolbox import get_log_folder
53
- with open('crazy_functions/pdf_fns/report_template.html', 'r', encoding='utf8') as f:
54
- html_template = f.read()
55
- html_template = html_template.replace("__TF_ARR__", self.html_string)
56
- with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f:
57
- f.write(html_template.encode('utf-8', 'ignore').decode())
58
- return os.path.join(get_log_folder(), file_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/pdf_fns/report_template.html DELETED
The diff for this file is too large to render. See raw diff
 
crazy_functions/vector_fns/__init__.py DELETED
File without changes
crazy_functions/vector_fns/general_file_loader.py DELETED
@@ -1,70 +0,0 @@
1
- # From project chatglm-langchain
2
-
3
-
4
- from langchain.document_loaders import UnstructuredFileLoader
5
- from langchain.text_splitter import CharacterTextSplitter
6
- import re
7
- from typing import List
8
-
9
- class ChineseTextSplitter(CharacterTextSplitter):
10
- def __init__(self, pdf: bool = False, sentence_size: int = None, **kwargs):
11
- super().__init__(**kwargs)
12
- self.pdf = pdf
13
- self.sentence_size = sentence_size
14
-
15
- def split_text1(self, text: str) -> List[str]:
16
- if self.pdf:
17
- text = re.sub(r"\n{3,}", "\n", text)
18
- text = re.sub('\s', ' ', text)
19
- text = text.replace("\n\n", "")
20
- sent_sep_pattern = re.compile('([﹒﹔﹖﹗.。!?]["’”」』]{0,2}|(?=["‘“「『]{1,2}|$))') # del :;
21
- sent_list = []
22
- for ele in sent_sep_pattern.split(text):
23
- if sent_sep_pattern.match(ele) and sent_list:
24
- sent_list[-1] += ele
25
- elif ele:
26
- sent_list.append(ele)
27
- return sent_list
28
-
29
- def split_text(self, text: str) -> List[str]: ##此处需要进一步优化逻辑
30
- if self.pdf:
31
- text = re.sub(r"\n{3,}", r"\n", text)
32
- text = re.sub('\s', " ", text)
33
- text = re.sub("\n\n", "", text)
34
-
35
- text = re.sub(r'([;;.!?。!?\?])([^”’])', r"\1\n\2", text) # 单字符断句符
36
- text = re.sub(r'(\.{6})([^"’”」』])', r"\1\n\2", text) # 英文省略号
37
- text = re.sub(r'(\…{2})([^"’”」』])', r"\1\n\2", text) # 中文省略号
38
- text = re.sub(r'([;;!?。!?\?]["’”」』]{0,2})([^;;!?,。!?\?])', r'\1\n\2', text)
39
- # 如果双引号前有终止符,那么双引号才是句子的终点,把分句符\n放到双引号后,注意前面的几句都小心保留了双引号
40
- text = text.rstrip() # 段尾如果有多余的\n就去掉它
41
- # 很多规则中会考虑分号;,但是这里我把它忽略不计,破折号、英文双引号等同样忽略,需要的再做些简单调整即可。
42
- ls = [i for i in text.split("\n") if i]
43
- for ele in ls:
44
- if len(ele) > self.sentence_size:
45
- ele1 = re.sub(r'([,,.]["’”」』]{0,2})([^,,.])', r'\1\n\2', ele)
46
- ele1_ls = ele1.split("\n")
47
- for ele_ele1 in ele1_ls:
48
- if len(ele_ele1) > self.sentence_size:
49
- ele_ele2 = re.sub(r'([\n]{1,}| {2,}["’”」』]{0,2})([^\s])', r'\1\n\2', ele_ele1)
50
- ele2_ls = ele_ele2.split("\n")
51
- for ele_ele2 in ele2_ls:
52
- if len(ele_ele2) > self.sentence_size:
53
- ele_ele3 = re.sub('( ["’”」』]{0,2})([^ ])', r'\1\n\2', ele_ele2)
54
- ele2_id = ele2_ls.index(ele_ele2)
55
- ele2_ls = ele2_ls[:ele2_id] + [i for i in ele_ele3.split("\n") if i] + ele2_ls[
56
- ele2_id + 1:]
57
- ele_id = ele1_ls.index(ele_ele1)
58
- ele1_ls = ele1_ls[:ele_id] + [i for i in ele2_ls if i] + ele1_ls[ele_id + 1:]
59
-
60
- id = ls.index(ele)
61
- ls = ls[:id] + [i for i in ele1_ls if i] + ls[id + 1:]
62
- return ls
63
-
64
- def load_file(filepath, sentence_size):
65
- loader = UnstructuredFileLoader(filepath, mode="elements")
66
- textsplitter = ChineseTextSplitter(pdf=False, sentence_size=sentence_size)
67
- docs = loader.load_and_split(text_splitter=textsplitter)
68
- # write_check_file(filepath, docs)
69
- return docs
70
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/vector_fns/vector_database.py DELETED
@@ -1,338 +0,0 @@
1
- # From project chatglm-langchain
2
-
3
- import threading
4
- from toolbox import Singleton
5
- import os
6
- import shutil
7
- import os
8
- import uuid
9
- import tqdm
10
- from langchain.vectorstores import FAISS
11
- from langchain.docstore.document import Document
12
- from typing import List, Tuple
13
- import numpy as np
14
- from crazy_functions.vector_fns.general_file_loader import load_file
15
-
16
- embedding_model_dict = {
17
- "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
18
- "ernie-base": "nghuyong/ernie-3.0-base-zh",
19
- "text2vec-base": "shibing624/text2vec-base-chinese",
20
- "text2vec": "GanymedeNil/text2vec-large-chinese",
21
- }
22
-
23
- # Embedding model name
24
- EMBEDDING_MODEL = "text2vec"
25
-
26
- # Embedding running device
27
- EMBEDDING_DEVICE = "cpu"
28
-
29
- # 基于上下文的prompt模版,请务必保留"{question}"和"{context}"
30
- PROMPT_TEMPLATE = """已知信息:
31
- {context}
32
-
33
- 根据上述已知信息,简洁和专业的来回答用户的问题。如果无法从中得到答案,请说 “根据已知信息无法回答该问题” 或 “没有提供足够的相关信息”,不允许在答案中添加编造成分,答案请使用中文。 问题是:{question}"""
34
-
35
- # 文本分句长度
36
- SENTENCE_SIZE = 100
37
-
38
- # 匹配后单段上下文长度
39
- CHUNK_SIZE = 250
40
-
41
- # LLM input history length
42
- LLM_HISTORY_LEN = 3
43
-
44
- # return top-k text chunk from vector store
45
- VECTOR_SEARCH_TOP_K = 5
46
-
47
- # 知识检索内容相关度 Score, 数值范围约为0-1100,如果为0,则不生效,经测试设置为小于500时,匹配结果更精准
48
- VECTOR_SEARCH_SCORE_THRESHOLD = 0
49
-
50
- NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nltk_data")
51
-
52
- FLAG_USER_NAME = uuid.uuid4().hex
53
-
54
- # 是否开启跨域,默认为False,如果需要开启,请设置为True
55
- # is open cross domain
56
- OPEN_CROSS_DOMAIN = False
57
-
58
- def similarity_search_with_score_by_vector(
59
- self, embedding: List[float], k: int = 4
60
- ) -> List[Tuple[Document, float]]:
61
-
62
- def seperate_list(ls: List[int]) -> List[List[int]]:
63
- lists = []
64
- ls1 = [ls[0]]
65
- for i in range(1, len(ls)):
66
- if ls[i - 1] + 1 == ls[i]:
67
- ls1.append(ls[i])
68
- else:
69
- lists.append(ls1)
70
- ls1 = [ls[i]]
71
- lists.append(ls1)
72
- return lists
73
-
74
- scores, indices = self.index.search(np.array([embedding], dtype=np.float32), k)
75
- docs = []
76
- id_set = set()
77
- store_len = len(self.index_to_docstore_id)
78
- for j, i in enumerate(indices[0]):
79
- if i == -1 or 0 < self.score_threshold < scores[0][j]:
80
- # This happens when not enough docs are returned.
81
- continue
82
- _id = self.index_to_docstore_id[i]
83
- doc = self.docstore.search(_id)
84
- if not self.chunk_conent:
85
- if not isinstance(doc, Document):
86
- raise ValueError(f"Could not find document for id {_id}, got {doc}")
87
- doc.metadata["score"] = int(scores[0][j])
88
- docs.append(doc)
89
- continue
90
- id_set.add(i)
91
- docs_len = len(doc.page_content)
92
- for k in range(1, max(i, store_len - i)):
93
- break_flag = False
94
- for l in [i + k, i - k]:
95
- if 0 <= l < len(self.index_to_docstore_id):
96
- _id0 = self.index_to_docstore_id[l]
97
- doc0 = self.docstore.search(_id0)
98
- if docs_len + len(doc0.page_content) > self.chunk_size:
99
- break_flag = True
100
- break
101
- elif doc0.metadata["source"] == doc.metadata["source"]:
102
- docs_len += len(doc0.page_content)
103
- id_set.add(l)
104
- if break_flag:
105
- break
106
- if not self.chunk_conent:
107
- return docs
108
- if len(id_set) == 0 and self.score_threshold > 0:
109
- return []
110
- id_list = sorted(list(id_set))
111
- id_lists = seperate_list(id_list)
112
- for id_seq in id_lists:
113
- for id in id_seq:
114
- if id == id_seq[0]:
115
- _id = self.index_to_docstore_id[id]
116
- doc = self.docstore.search(_id)
117
- else:
118
- _id0 = self.index_to_docstore_id[id]
119
- doc0 = self.docstore.search(_id0)
120
- doc.page_content += " " + doc0.page_content
121
- if not isinstance(doc, Document):
122
- raise ValueError(f"Could not find document for id {_id}, got {doc}")
123
- doc_score = min([scores[0][id] for id in [indices[0].tolist().index(i) for i in id_seq if i in indices[0]]])
124
- doc.metadata["score"] = int(doc_score)
125
- docs.append(doc)
126
- return docs
127
-
128
-
129
- class LocalDocQA:
130
- llm: object = None
131
- embeddings: object = None
132
- top_k: int = VECTOR_SEARCH_TOP_K
133
- chunk_size: int = CHUNK_SIZE
134
- chunk_conent: bool = True
135
- score_threshold: int = VECTOR_SEARCH_SCORE_THRESHOLD
136
-
137
- def init_cfg(self,
138
- top_k=VECTOR_SEARCH_TOP_K,
139
- ):
140
-
141
- self.llm = None
142
- self.top_k = top_k
143
-
144
- def init_knowledge_vector_store(self,
145
- filepath,
146
- vs_path: str or os.PathLike = None,
147
- sentence_size=SENTENCE_SIZE,
148
- text2vec=None):
149
- loaded_files = []
150
- failed_files = []
151
- if isinstance(filepath, str):
152
- if not os.path.exists(filepath):
153
- print("路径不存在")
154
- return None
155
- elif os.path.isfile(filepath):
156
- file = os.path.split(filepath)[-1]
157
- try:
158
- docs = load_file(filepath, SENTENCE_SIZE)
159
- print(f"{file} 已成功加载")
160
- loaded_files.append(filepath)
161
- except Exception as e:
162
- print(e)
163
- print(f"{file} 未能成功加载")
164
- return None
165
- elif os.path.isdir(filepath):
166
- docs = []
167
- for file in tqdm(os.listdir(filepath), desc="加载文件"):
168
- fullfilepath = os.path.join(filepath, file)
169
- try:
170
- docs += load_file(fullfilepath, SENTENCE_SIZE)
171
- loaded_files.append(fullfilepath)
172
- except Exception as e:
173
- print(e)
174
- failed_files.append(file)
175
-
176
- if len(failed_files) > 0:
177
- print("以下文件未能成功加载:")
178
- for file in failed_files:
179
- print(f"{file}\n")
180
-
181
- else:
182
- docs = []
183
- for file in filepath:
184
- docs += load_file(file, SENTENCE_SIZE)
185
- print(f"{file} 已成功加载")
186
- loaded_files.append(file)
187
-
188
- if len(docs) > 0:
189
- print("文件加载完毕,正在生成向量库")
190
- if vs_path and os.path.isdir(vs_path):
191
- try:
192
- self.vector_store = FAISS.load_local(vs_path, text2vec)
193
- self.vector_store.add_documents(docs)
194
- except:
195
- self.vector_store = FAISS.from_documents(docs, text2vec)
196
- else:
197
- self.vector_store = FAISS.from_documents(docs, text2vec) # docs 为Document列表
198
-
199
- self.vector_store.save_local(vs_path)
200
- return vs_path, loaded_files
201
- else:
202
- raise RuntimeError("文件加载失败,请检查文件格式是否正确")
203
-
204
- def get_loaded_file(self, vs_path):
205
- ds = self.vector_store.docstore
206
- return set([ds._dict[k].metadata['source'].split(vs_path)[-1] for k in ds._dict])
207
-
208
-
209
- # query 查询内容
210
- # vs_path 知识库路径
211
- # chunk_conent 是否启用上下文关联
212
- # score_threshold 搜索匹配score阈值
213
- # vector_search_top_k 搜索知识库内容条数,默认搜索5条结果
214
- # chunk_sizes 匹配单段内容的连接上下文长度
215
- def get_knowledge_based_conent_test(self, query, vs_path, chunk_conent,
216
- score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
217
- vector_search_top_k=VECTOR_SEARCH_TOP_K, chunk_size=CHUNK_SIZE,
218
- text2vec=None):
219
- self.vector_store = FAISS.load_local(vs_path, text2vec)
220
- self.vector_store.chunk_conent = chunk_conent
221
- self.vector_store.score_threshold = score_threshold
222
- self.vector_store.chunk_size = chunk_size
223
-
224
- embedding = self.vector_store.embedding_function.embed_query(query)
225
- related_docs_with_score = similarity_search_with_score_by_vector(self.vector_store, embedding, k=vector_search_top_k)
226
-
227
- if not related_docs_with_score:
228
- response = {"query": query,
229
- "source_documents": []}
230
- return response, ""
231
- # prompt = f"{query}. You should answer this question using information from following documents: \n\n"
232
- prompt = f"{query}. 你必须利用以下文档中包含的信息回答这个问题: \n\n---\n\n"
233
- prompt += "\n\n".join([f"({k}): " + doc.page_content for k, doc in enumerate(related_docs_with_score)])
234
- prompt += "\n\n---\n\n"
235
- prompt = prompt.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
236
- # print(prompt)
237
- response = {"query": query, "source_documents": related_docs_with_score}
238
- return response, prompt
239
-
240
-
241
-
242
-
243
- def construct_vector_store(vs_id, vs_path, files, sentence_size, history, one_conent, one_content_segmentation, text2vec):
244
- for file in files:
245
- assert os.path.exists(file), "输入文件不存在:" + file
246
- import nltk
247
- if NLTK_DATA_PATH not in nltk.data.path: nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
248
- local_doc_qa = LocalDocQA()
249
- local_doc_qa.init_cfg()
250
- filelist = []
251
- if not os.path.exists(os.path.join(vs_path, vs_id)):
252
- os.makedirs(os.path.join(vs_path, vs_id))
253
- for file in files:
254
- file_name = file.name if not isinstance(file, str) else file
255
- filename = os.path.split(file_name)[-1]
256
- shutil.copyfile(file_name, os.path.join(vs_path, vs_id, filename))
257
- filelist.append(os.path.join(vs_path, vs_id, filename))
258
- vs_path, loaded_files = local_doc_qa.init_knowledge_vector_store(filelist, os.path.join(vs_path, vs_id), sentence_size, text2vec)
259
-
260
- if len(loaded_files):
261
- file_status = f"已添加 {'、'.join([os.path.split(i)[-1] for i in loaded_files if i])} 内容至知识库,并已加载知识库,请开始提问"
262
- else:
263
- pass
264
- # file_status = "文件未成功加载,请重新上传文件"
265
- # print(file_status)
266
- return local_doc_qa, vs_path
267
-
268
- @Singleton
269
- class knowledge_archive_interface():
270
- def __init__(self) -> None:
271
- self.threadLock = threading.Lock()
272
- self.current_id = ""
273
- self.kai_path = None
274
- self.qa_handle = None
275
- self.text2vec_large_chinese = None
276
-
277
- def get_chinese_text2vec(self):
278
- if self.text2vec_large_chinese is None:
279
- # < -------------------预热文本向量化模组--------------- >
280
- from toolbox import ProxyNetworkActivate
281
- print('Checking Text2vec ...')
282
- from langchain.embeddings.huggingface import HuggingFaceEmbeddings
283
- with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
284
- self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
285
-
286
- return self.text2vec_large_chinese
287
-
288
-
289
- def feed_archive(self, file_manifest, vs_path, id="default"):
290
- self.threadLock.acquire()
291
- # import uuid
292
- self.current_id = id
293
- self.qa_handle, self.kai_path = construct_vector_store(
294
- vs_id=self.current_id,
295
- vs_path=vs_path,
296
- files=file_manifest,
297
- sentence_size=100,
298
- history=[],
299
- one_conent="",
300
- one_content_segmentation="",
301
- text2vec = self.get_chinese_text2vec(),
302
- )
303
- self.threadLock.release()
304
-
305
- def get_current_archive_id(self):
306
- return self.current_id
307
-
308
- def get_loaded_file(self, vs_path):
309
- return self.qa_handle.get_loaded_file(vs_path)
310
-
311
- def answer_with_archive_by_id(self, txt, id, vs_path):
312
- self.threadLock.acquire()
313
- if not self.current_id == id:
314
- self.current_id = id
315
- self.qa_handle, self.kai_path = construct_vector_store(
316
- vs_id=self.current_id,
317
- vs_path=vs_path,
318
- files=[],
319
- sentence_size=100,
320
- history=[],
321
- one_conent="",
322
- one_content_segmentation="",
323
- text2vec = self.get_chinese_text2vec(),
324
- )
325
- VECTOR_SEARCH_SCORE_THRESHOLD = 0
326
- VECTOR_SEARCH_TOP_K = 4
327
- CHUNK_SIZE = 512
328
- resp, prompt = self.qa_handle.get_knowledge_based_conent_test(
329
- query = txt,
330
- vs_path = self.kai_path,
331
- score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
332
- vector_search_top_k=VECTOR_SEARCH_TOP_K,
333
- chunk_conent=True,
334
- chunk_size=CHUNK_SIZE,
335
- text2vec = self.get_chinese_text2vec(),
336
- )
337
- self.threadLock.release()
338
- return resp, prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/vt_fns/vt_call_plugin.py CHANGED
@@ -1,7 +1,7 @@
1
  from pydantic import BaseModel, Field
2
  from typing import List
3
  from toolbox import update_ui_lastest_msg, disable_auto_promotion
4
- from request_llms.bridge_all import predict_no_ui_long_connection
5
  from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
6
  import copy, json, pickle, os, sys, time
7
 
 
1
  from pydantic import BaseModel, Field
2
  from typing import List
3
  from toolbox import update_ui_lastest_msg, disable_auto_promotion
4
+ from request_llm.bridge_all import predict_no_ui_long_connection
5
  from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
6
  import copy, json, pickle, os, sys, time
7
 
crazy_functions/vt_fns/vt_modify_config.py CHANGED
@@ -1,13 +1,13 @@
1
  from pydantic import BaseModel, Field
2
  from typing import List
3
  from toolbox import update_ui_lastest_msg, get_conf
4
- from request_llms.bridge_all import predict_no_ui_long_connection
5
  from crazy_functions.json_fns.pydantic_io import GptJsonIO
6
  import copy, json, pickle, os, sys
7
 
8
 
9
  def modify_configuration_hot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
10
- ALLOW_RESET_CONFIG = get_conf('ALLOW_RESET_CONFIG')
11
  if not ALLOW_RESET_CONFIG:
12
  yield from update_ui_lastest_msg(
13
  lastmsg=f"当前配置不允许被修改!如需激活本功能,请在config.py中设置ALLOW_RESET_CONFIG=True后重启软件。",
@@ -66,7 +66,7 @@ def modify_configuration_hot(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
66
  )
67
 
68
  def modify_configuration_reboot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
69
- ALLOW_RESET_CONFIG = get_conf('ALLOW_RESET_CONFIG')
70
  if not ALLOW_RESET_CONFIG:
71
  yield from update_ui_lastest_msg(
72
  lastmsg=f"当前配置不允许被修改!如需激活本功能,请在config.py中设置ALLOW_RESET_CONFIG=True后重启软件。",
 
1
  from pydantic import BaseModel, Field
2
  from typing import List
3
  from toolbox import update_ui_lastest_msg, get_conf
4
+ from request_llm.bridge_all import predict_no_ui_long_connection
5
  from crazy_functions.json_fns.pydantic_io import GptJsonIO
6
  import copy, json, pickle, os, sys
7
 
8
 
9
  def modify_configuration_hot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
10
+ ALLOW_RESET_CONFIG, = get_conf('ALLOW_RESET_CONFIG')
11
  if not ALLOW_RESET_CONFIG:
12
  yield from update_ui_lastest_msg(
13
  lastmsg=f"当前配置不允许被修改!如需激活本功能,请在config.py中设置ALLOW_RESET_CONFIG=True后重启软件。",
 
66
  )
67
 
68
  def modify_configuration_reboot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
69
+ ALLOW_RESET_CONFIG, = get_conf('ALLOW_RESET_CONFIG')
70
  if not ALLOW_RESET_CONFIG:
71
  yield from update_ui_lastest_msg(
72
  lastmsg=f"当前配置不允许被修改!如需激活本功能,请在config.py中设置ALLOW_RESET_CONFIG=True后重启软件。",
crazy_functions/下载arxiv论文翻译摘要.py CHANGED
@@ -1,6 +1,6 @@
1
  from toolbox import update_ui, get_log_folder
2
  from toolbox import write_history_to_file, promote_file_to_downloadzone
3
- from toolbox import CatchException, report_exception, get_conf
4
  import re, requests, unicodedata, os
5
  from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
6
  def download_arxiv_(url_pdf):
@@ -43,7 +43,7 @@ def download_arxiv_(url_pdf):
43
  file_path = download_dir+title_str
44
 
45
  print('下载中')
46
- proxies = get_conf('proxies')
47
  r = requests.get(requests_pdf_url, proxies=proxies)
48
  with open(file_path, 'wb+') as f:
49
  f.write(r.content)
@@ -77,7 +77,7 @@ def get_name(_url_):
77
  # print('在缓存中')
78
  # return arxiv_recall[_url_]
79
 
80
- proxies = get_conf('proxies')
81
  res = requests.get(_url_, proxies=proxies)
82
 
83
  bs = BeautifulSoup(res.text, 'html.parser')
@@ -130,7 +130,7 @@ def get_name(_url_):
130
 
131
 
132
  @CatchException
133
- def 下载arxiv论文并翻译摘要(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
134
 
135
  CRAZY_FUNCTION_INFO = "下载arxiv论文并翻译摘要,函数插件作者[binary-husky]。正在提取摘要并下载PDF文档……"
136
  import glob
@@ -144,7 +144,7 @@ def 下载arxiv论文并翻译摘要(txt, llm_kwargs, plugin_kwargs, chatbot, hi
144
  try:
145
  import bs4
146
  except:
147
- report_exception(chatbot, history,
148
  a = f"解析项目: {txt}",
149
  b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade beautifulsoup4```。")
150
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
@@ -157,7 +157,7 @@ def 下载arxiv论文并翻译摘要(txt, llm_kwargs, plugin_kwargs, chatbot, hi
157
  try:
158
  pdf_path, info = download_arxiv_(txt)
159
  except:
160
- report_exception(chatbot, history,
161
  a = f"解析项目: {txt}",
162
  b = f"下载pdf文件未成功")
163
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 
1
  from toolbox import update_ui, get_log_folder
2
  from toolbox import write_history_to_file, promote_file_to_downloadzone
3
+ from toolbox import CatchException, report_execption, get_conf
4
  import re, requests, unicodedata, os
5
  from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
6
  def download_arxiv_(url_pdf):
 
43
  file_path = download_dir+title_str
44
 
45
  print('下载中')
46
+ proxies, = get_conf('proxies')
47
  r = requests.get(requests_pdf_url, proxies=proxies)
48
  with open(file_path, 'wb+') as f:
49
  f.write(r.content)
 
77
  # print('在缓存中')
78
  # return arxiv_recall[_url_]
79
 
80
+ proxies, = get_conf('proxies')
81
  res = requests.get(_url_, proxies=proxies)
82
 
83
  bs = BeautifulSoup(res.text, 'html.parser')
 
130
 
131
 
132
  @CatchException
133
+ def 下载arxiv论文并翻译摘要(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
134
 
135
  CRAZY_FUNCTION_INFO = "下载arxiv论文并翻译摘要,函数插件作者[binary-husky]。正在提取摘要并下载PDF文档……"
136
  import glob
 
144
  try:
145
  import bs4
146
  except:
147
+ report_execption(chatbot, history,
148
  a = f"解析项目: {txt}",
149
  b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade beautifulsoup4```。")
150
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 
157
  try:
158
  pdf_path, info = download_arxiv_(txt)
159
  except:
160
+ report_execption(chatbot, history,
161
  a = f"解析项目: {txt}",
162
  b = f"下载pdf文件未成功")
163
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
crazy_functions/互动小游戏.py DELETED
@@ -1,40 +0,0 @@
1
- from toolbox import CatchException, update_ui, update_ui_lastest_msg
2
- from crazy_functions.multi_stage.multi_stage_utils import GptAcademicGameBaseState
3
- from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
4
- from request_llms.bridge_all import predict_no_ui_long_connection
5
- from crazy_functions.game_fns.game_utils import get_code_block, is_same_thing
6
-
7
- @CatchException
8
- def 随机小游戏(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
9
- from crazy_functions.game_fns.game_interactive_story import MiniGame_ResumeStory
10
- # 清空历史
11
- history = []
12
- # 选择游戏
13
- cls = MiniGame_ResumeStory
14
- # 如果之前已经初始化了游戏实例,则继续该实例;否则重新初始化
15
- state = cls.sync_state(chatbot,
16
- llm_kwargs,
17
- cls,
18
- plugin_name='MiniGame_ResumeStory',
19
- callback_fn='crazy_functions.互动小游戏->随机小游戏',
20
- lock_plugin=True
21
- )
22
- yield from state.continue_game(prompt, chatbot, history)
23
-
24
-
25
- @CatchException
26
- def 随机小游戏1(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
27
- from crazy_functions.game_fns.game_ascii_art import MiniGame_ASCII_Art
28
- # 清空历史
29
- history = []
30
- # 选择游戏
31
- cls = MiniGame_ASCII_Art
32
- # 如果之前已经初始化了游戏实例,则继续该实例;否则重新初始化
33
- state = cls.sync_state(chatbot,
34
- llm_kwargs,
35
- cls,
36
- plugin_name='MiniGame_ASCII_Art',
37
- callback_fn='crazy_functions.互动小游戏->随机小游戏1',
38
- lock_plugin=True
39
- )
40
- yield from state.continue_game(prompt, chatbot, history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/交互功能函数模板.py CHANGED
@@ -3,7 +3,7 @@ from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
3
 
4
 
5
  @CatchException
6
- def 交互功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
7
  """
8
  txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
9
  llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
@@ -11,7 +11,7 @@ def 交互功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
11
  chatbot 聊天显示框的句柄,用于显示给用户
12
  history 聊天历史,前情提要
13
  system_prompt 给gpt的静默提醒
14
- user_request 当前用户的请求信息(IP地址等)
15
  """
16
  history = [] # 清空历史,以免输入溢出
17
  chatbot.append(("这是什么功能?", "交互功能函数模板。在执行完成之后, 可以将自身的状态存储到cookie中, 等待用户的再次调用。"))
 
3
 
4
 
5
  @CatchException
6
+ def 交互功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
7
  """
8
  txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
9
  llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
 
11
  chatbot 聊天显示框的句柄,用于显示给用户
12
  history 聊天历史,前情提要
13
  system_prompt 给gpt的静默提醒
14
+ web_port 当前软件运行的端口号
15
  """
16
  history = [] # 清空历史,以免输入溢出
17
  chatbot.append(("这是什么功能?", "交互功能函数模板。在执行完成之后, 可以将自身的状态存储到cookie中, 等待用户的再次调用。"))
crazy_functions/函数动态生成.py DELETED
@@ -1,252 +0,0 @@
1
- # 本源代码中, ⭐ = 关键步骤
2
- """
3
- 测试:
4
- - 裁剪图像,保留下半部分
5
- - 交换图像的蓝色通道和红色通道
6
- - 将图像转为灰度图像
7
- - 将csv文件转excel表格
8
-
9
- Testing:
10
- - Crop the image, keeping the bottom half.
11
- - Swap the blue channel and red channel of the image.
12
- - Convert the image to grayscale.
13
- - Convert the CSV file to an Excel spreadsheet.
14
- """
15
-
16
-
17
- from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, is_the_upload_folder
18
- from toolbox import promote_file_to_downloadzone, get_log_folder, update_ui_lastest_msg
19
- from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_plugin_arg
20
- from .crazy_utils import input_clipping, try_install_deps
21
- from crazy_functions.gen_fns.gen_fns_shared import is_function_successfully_generated
22
- from crazy_functions.gen_fns.gen_fns_shared import get_class_name
23
- from crazy_functions.gen_fns.gen_fns_shared import subprocess_worker
24
- from crazy_functions.gen_fns.gen_fns_shared import try_make_module
25
- import os
26
- import time
27
- import glob
28
- import multiprocessing
29
-
30
- templete = """
31
- ```python
32
- import ... # Put dependencies here, e.g. import numpy as np.
33
-
34
- class TerminalFunction(object): # Do not change the name of the class, The name of the class must be `TerminalFunction`
35
-
36
- def run(self, path): # The name of the function must be `run`, it takes only a positional argument.
37
- # rewrite the function you have just written here
38
- ...
39
- return generated_file_path
40
- ```
41
- """
42
-
43
- def inspect_dependency(chatbot, history):
44
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
45
- return True
46
-
47
- def get_code_block(reply):
48
- import re
49
- pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
50
- matches = re.findall(pattern, reply) # find all code blocks in text
51
- if len(matches) == 1:
52
- return matches[0].strip('python') # code block
53
- for match in matches:
54
- if 'class TerminalFunction' in match:
55
- return match.strip('python') # code block
56
- raise RuntimeError("GPT is not generating proper code.")
57
-
58
- def gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history):
59
- # 输入
60
- prompt_compose = [
61
- f'Your job:\n'
62
- f'1. write a single Python function, which takes a path of a `{file_type}` file as the only argument and returns a `string` containing the result of analysis or the path of generated files. \n',
63
- f"2. You should write this function to perform following task: " + txt + "\n",
64
- f"3. Wrap the output python function with markdown codeblock."
65
- ]
66
- i_say = "".join(prompt_compose)
67
- demo = []
68
-
69
- # 第一步
70
- gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
71
- inputs=i_say, inputs_show_user=i_say,
72
- llm_kwargs=llm_kwargs, chatbot=chatbot, history=demo,
73
- sys_prompt= r"You are a world-class programmer."
74
- )
75
- history.extend([i_say, gpt_say])
76
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
77
-
78
- # 第二步
79
- prompt_compose = [
80
- "If previous stage is successful, rewrite the function you have just written to satisfy following templete: \n",
81
- templete
82
- ]
83
- i_say = "".join(prompt_compose); inputs_show_user = "If previous stage is successful, rewrite the function you have just written to satisfy executable templete. "
84
- gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
85
- inputs=i_say, inputs_show_user=inputs_show_user,
86
- llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
87
- sys_prompt= r"You are a programmer. You need to replace `...` with valid packages, do not give `...` in your answer!"
88
- )
89
- code_to_return = gpt_say
90
- history.extend([i_say, gpt_say])
91
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
92
-
93
- # # 第三步
94
- # i_say = "Please list to packages to install to run the code above. Then show me how to use `try_install_deps` function to install them."
95
- # i_say += 'For instance. `try_install_deps(["opencv-python", "scipy", "numpy"])`'
96
- # installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
97
- # inputs=i_say, inputs_show_user=inputs_show_user,
98
- # llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
99
- # sys_prompt= r"You are a programmer."
100
- # )
101
-
102
- # # # 第三步
103
- # i_say = "Show me how to use `pip` to install packages to run the code above. "
104
- # i_say += 'For instance. `pip install -r opencv-python scipy numpy`'
105
- # installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
106
- # inputs=i_say, inputs_show_user=i_say,
107
- # llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
108
- # sys_prompt= r"You are a programmer."
109
- # )
110
- installation_advance = ""
111
-
112
- return code_to_return, installation_advance, txt, file_type, llm_kwargs, chatbot, history
113
-
114
-
115
-
116
-
117
- def for_immediate_show_off_when_possible(file_type, fp, chatbot):
118
- if file_type in ['png', 'jpg']:
119
- image_path = os.path.abspath(fp)
120
- chatbot.append(['这是一张图片, 展示如下:',
121
- f'本地文件地址: <br/>`{image_path}`<br/>'+
122
- f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
123
- ])
124
- return chatbot
125
-
126
-
127
-
128
- def have_any_recent_upload_files(chatbot):
129
- _5min = 5 * 60
130
- if not chatbot: return False # chatbot is None
131
- most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
132
- if not most_recent_uploaded: return False # most_recent_uploaded is None
133
- if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
134
- else: return False # most_recent_uploaded is too old
135
-
136
- def get_recent_file_prompt_support(chatbot):
137
- most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
138
- path = most_recent_uploaded['path']
139
- return path
140
-
141
- @CatchException
142
- def 函数动态生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
143
- """
144
- txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
145
- llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
146
- plugin_kwargs 插件模型的参数,暂时没有用武之地
147
- chatbot 聊天显示框的句柄,用于显示给用户
148
- history 聊天历史,前情提要
149
- system_prompt 给gpt的静默提醒
150
- user_request 当前用户的请求信息(IP地址等)
151
- """
152
-
153
- # 清空历史
154
- history = []
155
-
156
- # 基本信息:功能、贡献者
157
- chatbot.append(["正在启动: 插件动态生成插件", "插件动态生成, 执行开始, 作者Binary-Husky."])
158
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
159
-
160
- # ⭐ 文件上传区是否有东西
161
- # 1. 如果有文件: 作为函数参数
162
- # 2. 如果没有文件:需要用GPT提取参数 (太懒了,以后再写,虚空终端已经实现了类似的代码)
163
- file_list = []
164
- if get_plugin_arg(plugin_kwargs, key="file_path_arg", default=False):
165
- file_path = get_plugin_arg(plugin_kwargs, key="file_path_arg", default=None)
166
- file_list.append(file_path)
167
- yield from update_ui_lastest_msg(f"当前文件: {file_path}", chatbot, history, 1)
168
- elif have_any_recent_upload_files(chatbot):
169
- file_dir = get_recent_file_prompt_support(chatbot)
170
- file_list = glob.glob(os.path.join(file_dir, '**/*'), recursive=True)
171
- yield from update_ui_lastest_msg(f"当前文件处理列表: {file_list}", chatbot, history, 1)
172
- else:
173
- chatbot.append(["文件检索", "没有发现任何近期上传的文件。"])
174
- yield from update_ui_lastest_msg("没有发现任何近期上传的文件。", chatbot, history, 1)
175
- return # 2. 如果没有文件
176
- if len(file_list) == 0:
177
- chatbot.append(["文件检索", "没有发现任何近期上传的文件。"])
178
- yield from update_ui_lastest_msg("没有发现任何近期上传的文件。", chatbot, history, 1)
179
- return # 2. 如果没有文件
180
-
181
- # 读取文件
182
- file_type = file_list[0].split('.')[-1]
183
-
184
- # 粗心检查
185
- if is_the_upload_folder(txt):
186
- yield from update_ui_lastest_msg(f"请在输入框内填写需求, 然后再次点击该插件! 至于您的文件,不用担心, 文件路径 {txt} 已经被记忆. ", chatbot, history, 1)
187
- return
188
-
189
- # 开始干正事
190
- MAX_TRY = 3
191
- for j in range(MAX_TRY): # 最多重试5次
192
- traceback = ""
193
- try:
194
- # ⭐ 开始啦 !
195
- code, installation_advance, txt, file_type, llm_kwargs, chatbot, history = \
196
- yield from gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history)
197
- chatbot.append(["代码生成阶段结束", ""])
198
- yield from update_ui_lastest_msg(f"正在验证上述代码的有效性 ...", chatbot, history, 1)
199
- # ⭐ 分离代码块
200
- code = get_code_block(code)
201
- # ⭐ 检查模块
202
- ok, traceback = try_make_module(code, chatbot)
203
- # 搞定代码生成
204
- if ok: break
205
- except Exception as e:
206
- if not traceback: traceback = trimmed_format_exc()
207
- # 处理异常
208
- if not traceback: traceback = trimmed_format_exc()
209
- yield from update_ui_lastest_msg(f"第 {j+1}/{MAX_TRY} 次代码生成尝试, 失败了~ 别担心, 我们5秒后再试一次... \n\n此次我们的错误追踪是\n```\n{traceback}\n```\n", chatbot, history, 5)
210
-
211
- # 代码生成结束, 开始执行
212
- TIME_LIMIT = 15
213
- yield from update_ui_lastest_msg(f"开始创建新进程并执行代码! 时间限制 {TIME_LIMIT} 秒. 请等待任务完成... ", chatbot, history, 1)
214
- manager = multiprocessing.Manager()
215
- return_dict = manager.dict()
216
-
217
- # ⭐ 到最后一步了,开始逐个文件进行处理
218
- for file_path in file_list:
219
- if os.path.exists(file_path):
220
- chatbot.append([f"正在处理文件: {file_path}", f"请稍等..."])
221
- chatbot = for_immediate_show_off_when_possible(file_type, file_path, chatbot)
222
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
223
- else:
224
- continue
225
-
226
- # ⭐⭐⭐ subprocess_worker ⭐⭐⭐
227
- p = multiprocessing.Process(target=subprocess_worker, args=(code, file_path, return_dict))
228
- # ⭐ 开始执行,时间限制TIME_LIMIT
229
- p.start(); p.join(timeout=TIME_LIMIT)
230
- if p.is_alive(): p.terminate(); p.join()
231
- p.close()
232
- res = return_dict['result']
233
- success = return_dict['success']
234
- traceback = return_dict['traceback']
235
- if not success:
236
- if not traceback: traceback = trimmed_format_exc()
237
- chatbot.append(["执行失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
238
- # chatbot.append(["如果是缺乏依赖,请参考以下建议", installation_advance])
239
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
240
- return
241
-
242
- # 顺利完成,收尾
243
- res = str(res)
244
- if os.path.exists(res):
245
- chatbot.append(["执行成功了,结果是一个有效文件", "结果:" + res])
246
- new_file_path = promote_file_to_downloadzone(res, chatbot=chatbot)
247
- chatbot = for_immediate_show_off_when_possible(file_type, new_file_path, chatbot)
248
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
249
- else:
250
- chatbot.append(["执行成功了,结果是一个字符串", "结果:" + res])
251
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
252
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/命令行助手.py CHANGED
@@ -4,7 +4,7 @@ from .crazy_utils import input_clipping
4
  import copy, json
5
 
6
  @CatchException
7
- def 命令行助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
8
  """
9
  txt 输入栏用户输入的文本, 例如需要翻译的一段话, 再例如一个包含了待处理文件的路径
10
  llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
@@ -12,7 +12,7 @@ def 命令行助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
12
  chatbot 聊天显示框的句柄, 用于显示给用户
13
  history 聊天历史, 前情提要
14
  system_prompt 给gpt的静默提醒
15
- user_request 当前用户的请求信息(IP地址等)
16
  """
17
  # 清空历史, 以免输入溢出
18
  history = []
 
4
  import copy, json
5
 
6
  @CatchException
7
+ def 命令行助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
8
  """
9
  txt 输入栏用户输入的文本, 例如需要翻译的一段话, 再例如一个包含了待处理文件的路径
10
  llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
 
12
  chatbot 聊天显示框的句柄, 用于显示给用户
13
  history 聊天历史, 前情提要
14
  system_prompt 给gpt的静默提醒
15
+ web_port 当前软件运行的端口号
16
  """
17
  # 清空历史, 以免输入溢出
18
  history = []
crazy_functions/图片生成.py CHANGED
@@ -1,12 +1,13 @@
1
  from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log_folder
2
- from crazy_functions.multi_stage.multi_stage_utils import GptAcademicState
 
3
 
4
 
5
- def gen_image(llm_kwargs, prompt, resolution="1024x1024", model="dall-e-2", quality=None, style=None):
6
  import requests, json, time, os
7
- from request_llms.bridge_all import model_info
8
 
9
- proxies = get_conf('proxies')
10
  # Set up OpenAI API key and model
11
  api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
12
  chat_endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
@@ -22,13 +23,8 @@ def gen_image(llm_kwargs, prompt, resolution="1024x1024", model="dall-e-2", qual
22
  'prompt': prompt,
23
  'n': 1,
24
  'size': resolution,
25
- 'model': model,
26
  'response_format': 'url'
27
  }
28
- if quality is not None:
29
- data['quality'] = quality
30
- if style is not None:
31
- data['style'] = style
32
  response = requests.post(url, headers=headers, json=data, proxies=proxies)
33
  print(response.content)
34
  try:
@@ -46,72 +42,23 @@ def gen_image(llm_kwargs, prompt, resolution="1024x1024", model="dall-e-2", qual
46
  return image_url, file_path+file_name
47
 
48
 
49
- def edit_image(llm_kwargs, prompt, image_path, resolution="1024x1024", model="dall-e-2"):
50
- import requests, json, time, os
51
- from request_llms.bridge_all import model_info
52
-
53
- proxies = get_conf('proxies')
54
- api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
55
- chat_endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
56
- # 'https://api.openai.com/v1/chat/completions'
57
- img_endpoint = chat_endpoint.replace('chat/completions','images/edits')
58
- # # Generate the image
59
- url = img_endpoint
60
- n = 1
61
- headers = {
62
- 'Authorization': f"Bearer {api_key}",
63
- }
64
- make_transparent(image_path, image_path+'.tsp.png')
65
- make_square_image(image_path+'.tsp.png', image_path+'.tspsq.png')
66
- resize_image(image_path+'.tspsq.png', image_path+'.ready.png', max_size=1024)
67
- image_path = image_path+'.ready.png'
68
- with open(image_path, 'rb') as f:
69
- file_content = f.read()
70
- files = {
71
- 'image': (os.path.basename(image_path), file_content),
72
- # 'mask': ('mask.png', open('mask.png', 'rb'))
73
- 'prompt': (None, prompt),
74
- "n": (None, str(n)),
75
- 'size': (None, resolution),
76
- }
77
-
78
- response = requests.post(url, headers=headers, files=files, proxies=proxies)
79
- print(response.content)
80
- try:
81
- image_url = json.loads(response.content.decode('utf8'))['data'][0]['url']
82
- except:
83
- raise RuntimeError(response.content.decode())
84
- # 文件保存到本地
85
- r = requests.get(image_url, proxies=proxies)
86
- file_path = f'{get_log_folder()}/image_gen/'
87
- os.makedirs(file_path, exist_ok=True)
88
- file_name = 'Image' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.png'
89
- with open(file_path+file_name, 'wb+') as f: f.write(r.content)
90
-
91
-
92
- return image_url, file_path+file_name
93
-
94
 
95
  @CatchException
96
- def 图片生成_DALLE2(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
97
  """
98
- txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
99
- llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
100
- plugin_kwargs 插件模型的参数,暂时没有用武之地
101
- chatbot 聊天显示框的句柄,用于显示给用户
102
- history 聊天历史,前情提要
103
  system_prompt 给gpt的静默提醒
104
- user_request 当前用户的请求信息(IP地址等)
105
  """
106
- history = [] # 清空历史,以免输入溢出
107
- if prompt.strip() == "":
108
- chatbot.append((prompt, "[Local Message] 图像生成提示为空白,请在“输入区”输入图像生成提示。"))
109
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新
110
- return
111
- chatbot.append(("您正在调用“图像生成”插件。", "[Local Message] 生成图像, 请先把模型切换至gpt-*。如果中文Prompt效果不理想, 请尝试英文Prompt。正在处理中 ....."))
112
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 由于请求gpt需要一段时间,我们先及时地做一次界面更新
113
  if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
114
- resolution = plugin_kwargs.get("advanced_arg", '1024x1024')
115
  image_url, image_path = gen_image(llm_kwargs, prompt, resolution)
116
  chatbot.append([prompt,
117
  f'图像中转网址: <br/>`{image_url}`<br/>'+
@@ -119,158 +66,4 @@ def 图片生成_DALLE2(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys
119
  f'本地文件地址: <br/>`{image_path}`<br/>'+
120
  f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
121
  ])
122
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新
123
-
124
-
125
- @CatchException
126
- def 图片生成_DALLE3(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
127
- history = [] # 清空历史,以免输入溢出
128
- if prompt.strip() == "":
129
- chatbot.append((prompt, "[Local Message] 图像生成提示为空白,请在“输入区”输入图像生成提示。"))
130
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新
131
- return
132
- chatbot.append(("您正在调用“图像生成”插件。", "[Local Message] 生成图像, 请先把模型切换至gpt-*。如果中文Prompt效果不理想, 请尝试英文Prompt。正在处理中 ....."))
133
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 由于请求gpt需要一段时间,我们先及时地做一次界面更新
134
- if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
135
- resolution_arg = plugin_kwargs.get("advanced_arg", '1024x1024-standard-vivid').lower()
136
- parts = resolution_arg.split('-')
137
- resolution = parts[0] # 解析分辨率
138
- quality = 'standard' # 质量与风格默认值
139
- style = 'vivid'
140
- # 遍历检查是否有额外参数
141
- for part in parts[1:]:
142
- if part in ['hd', 'standard']:
143
- quality = part
144
- elif part in ['vivid', 'natural']:
145
- style = part
146
- image_url, image_path = gen_image(llm_kwargs, prompt, resolution, model="dall-e-3", quality=quality, style=style)
147
- chatbot.append([prompt,
148
- f'图像中转网址: <br/>`{image_url}`<br/>'+
149
- f'中转网址预览: <br/><div align="center"><img src="{image_url}"></div>'
150
- f'本地文件地址: <br/>`{image_path}`<br/>'+
151
- f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
152
- ])
153
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新
154
-
155
-
156
- class ImageEditState(GptAcademicState):
157
- # 尚未完成
158
- def get_image_file(self, x):
159
- import os, glob
160
- if len(x) == 0: return False, None
161
- if not os.path.exists(x): return False, None
162
- if x.endswith('.png'): return True, x
163
- file_manifest = [f for f in glob.glob(f'{x}/**/*.png', recursive=True)]
164
- confirm = (len(file_manifest) >= 1 and file_manifest[0].endswith('.png') and os.path.exists(file_manifest[0]))
165
- file = None if not confirm else file_manifest[0]
166
- return confirm, file
167
-
168
- def lock_plugin(self, chatbot):
169
- chatbot._cookies['lock_plugin'] = 'crazy_functions.图片生成->图片修改_DALLE2'
170
- self.dump_state(chatbot)
171
-
172
- def unlock_plugin(self, chatbot):
173
- self.reset()
174
- chatbot._cookies['lock_plugin'] = None
175
- self.dump_state(chatbot)
176
-
177
- def get_resolution(self, x):
178
- return (x in ['256x256', '512x512', '1024x1024']), x
179
-
180
- def get_prompt(self, x):
181
- confirm = (len(x)>=5) and (not self.get_resolution(x)[0]) and (not self.get_image_file(x)[0])
182
- return confirm, x
183
-
184
- def reset(self):
185
- self.req = [
186
- {'value':None, 'description': '请先上传图像(必须是.png格式), 然后再次点击本插件', 'verify_fn': self.get_image_file},
187
- {'value':None, 'description': '请输入分辨率,可选:256x256, 512x512 或 1024x1024, 然后再次点击本插件', 'verify_fn': self.get_resolution},
188
- {'value':None, 'description': '请输入修改需求,建议您使用英文提示词, 然后再次点击本插件', 'verify_fn': self.get_prompt},
189
- ]
190
- self.info = ""
191
-
192
- def feed(self, prompt, chatbot):
193
- for r in self.req:
194
- if r['value'] is None:
195
- confirm, res = r['verify_fn'](prompt)
196
- if confirm:
197
- r['value'] = res
198
- self.dump_state(chatbot)
199
- break
200
- return self
201
-
202
- def next_req(self):
203
- for r in self.req:
204
- if r['value'] is None:
205
- return r['description']
206
- return "已经收集到所有信息"
207
-
208
- def already_obtained_all_materials(self):
209
- return all([x['value'] is not None for x in self.req])
210
-
211
- @CatchException
212
- def 图片修改_DALLE2(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
213
- # 尚未完成
214
- history = [] # 清空历史
215
- state = ImageEditState.get_state(chatbot, ImageEditState)
216
- state = state.feed(prompt, chatbot)
217
- state.lock_plugin(chatbot)
218
- if not state.already_obtained_all_materials():
219
- chatbot.append(["图片修改\n\n1. 上传图片(图片中需要修改的位置用橡皮擦擦除为纯白色,即RGB=255,255,255)\n2. 输入分辨率 \n3. 输入修改需求", state.next_req()])
220
- yield from update_ui(chatbot=chatbot, history=history)
221
- return
222
-
223
- image_path = state.req[0]['value']
224
- resolution = state.req[1]['value']
225
- prompt = state.req[2]['value']
226
- chatbot.append(["图片修改, 执行中", f"图片:`{image_path}`<br/>分辨率:`{resolution}`<br/>修改需求:`{prompt}`"])
227
- yield from update_ui(chatbot=chatbot, history=history)
228
- image_url, image_path = edit_image(llm_kwargs, prompt, image_path, resolution)
229
- chatbot.append([prompt,
230
- f'图像中转网址: <br/>`{image_url}`<br/>'+
231
- f'中转网址预览: <br/><div align="center"><img src="{image_url}"></div>'
232
- f'本地文件地址: <br/>`{image_path}`<br/>'+
233
- f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
234
- ])
235
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新
236
- state.unlock_plugin(chatbot)
237
-
238
- def make_transparent(input_image_path, output_image_path):
239
- from PIL import Image
240
- image = Image.open(input_image_path)
241
- image = image.convert("RGBA")
242
- data = image.getdata()
243
- new_data = []
244
- for item in data:
245
- if item[0] == 255 and item[1] == 255 and item[2] == 255:
246
- new_data.append((255, 255, 255, 0))
247
- else:
248
- new_data.append(item)
249
- image.putdata(new_data)
250
- image.save(output_image_path, "PNG")
251
-
252
- def resize_image(input_path, output_path, max_size=1024):
253
- from PIL import Image
254
- with Image.open(input_path) as img:
255
- width, height = img.size
256
- if width > max_size or height > max_size:
257
- if width >= height:
258
- new_width = max_size
259
- new_height = int((max_size / width) * height)
260
- else:
261
- new_height = max_size
262
- new_width = int((max_size / height) * width)
263
-
264
- resized_img = img.resize(size=(new_width, new_height))
265
- resized_img.save(output_path)
266
- else:
267
- img.save(output_path)
268
-
269
- def make_square_image(input_path, output_path):
270
- from PIL import Image
271
- with Image.open(input_path) as img:
272
- width, height = img.size
273
- size = max(width, height)
274
- new_img = Image.new("RGBA", (size, size), color="black")
275
- new_img.paste(img, ((size - width) // 2, (size - height) // 2))
276
- new_img.save(output_path)
 
1
  from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log_folder
2
+ from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
3
+ import datetime
4
 
5
 
6
+ def gen_image(llm_kwargs, prompt, resolution="256x256"):
7
  import requests, json, time, os
8
+ from request_llm.bridge_all import model_info
9
 
10
+ proxies, = get_conf('proxies')
11
  # Set up OpenAI API key and model
12
  api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
13
  chat_endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
 
23
  'prompt': prompt,
24
  'n': 1,
25
  'size': resolution,
 
26
  'response_format': 'url'
27
  }
 
 
 
 
28
  response = requests.post(url, headers=headers, json=data, proxies=proxies)
29
  print(response.content)
30
  try:
 
42
  return image_url, file_path+file_name
43
 
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  @CatchException
47
+ def 图片生成(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
48
  """
49
+ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
50
+ llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
51
+ plugin_kwargs 插件模型的参数,暂时没有用武之地
52
+ chatbot 聊天显示框的句柄,用于显示给用户
53
+ history 聊天历史,前情提要
54
  system_prompt 给gpt的静默提醒
55
+ web_port 当前软件运行的端口号
56
  """
57
+ history = [] # 清空历史,以免输入溢出
58
+ chatbot.append(("这是什么功能?", "[Local Message] 生成图像, 请先把模型切换至gpt-*或者api2d-*。如果中文效果不理想, 请尝试英文Prompt。正在处理中 ....."))
59
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
 
 
 
 
60
  if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
61
+ resolution = plugin_kwargs.get("advanced_arg", '256x256')
62
  image_url, image_path = gen_image(llm_kwargs, prompt, resolution)
63
  chatbot.append([prompt,
64
  f'图像中转网址: <br/>`{image_url}`<br/>'+
 
66
  f'本地文件地址: <br/>`{image_path}`<br/>'+
67
  f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
68
  ])
69
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/多智能体.py DELETED
@@ -1,101 +0,0 @@
1
- # 本源代码中, ⭐ = 关键步骤
2
- """
3
- 测试:
4
- - show me the solution of $x^2=cos(x)$, solve this problem with figure, and plot and save image to t.jpg
5
-
6
- """
7
-
8
-
9
- from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, ProxyNetworkActivate
10
- from toolbox import get_conf, select_api_key, update_ui_lastest_msg, Singleton
11
- from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_plugin_arg
12
- from crazy_functions.crazy_utils import input_clipping, try_install_deps
13
- from crazy_functions.agent_fns.persistent import GradioMultiuserManagerForPersistentClasses
14
- from crazy_functions.agent_fns.auto_agent import AutoGenMath
15
- import time
16
-
17
- def remove_model_prefix(llm):
18
- if llm.startswith('api2d-'): llm = llm.replace('api2d-', '')
19
- if llm.startswith('azure-'): llm = llm.replace('azure-', '')
20
- return llm
21
-
22
-
23
- @CatchException
24
- def 多智能体终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
25
- """
26
- txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
27
- llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
28
- plugin_kwargs 插件模型的参数
29
- chatbot 聊天显示框的句柄,用于显示给用户
30
- history 聊天历史,前情提要
31
- system_prompt 给gpt的静默提醒
32
- user_request 当前用户的请求信息(IP地址等)
33
- """
34
- # 检查当前的模型是否符合要求
35
- supported_llms = [
36
- "gpt-3.5-turbo-16k",
37
- 'gpt-3.5-turbo-1106',
38
- "gpt-4",
39
- "gpt-4-32k",
40
- 'gpt-4-1106-preview',
41
- "azure-gpt-3.5-turbo-16k",
42
- "azure-gpt-3.5-16k",
43
- "azure-gpt-4",
44
- "azure-gpt-4-32k",
45
- ]
46
- from request_llms.bridge_all import model_info
47
- if model_info[llm_kwargs['llm_model']]["max_token"] < 8000: # 至少是8k上下文的模型
48
- chatbot.append([f"处理任务: {txt}", f"当前插件只支持{str(supported_llms)}, 当前模型{llm_kwargs['llm_model']}的最大上下文长度太短, 不能支撑AutoGen运行。"])
49
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
50
- return
51
- if model_info[llm_kwargs['llm_model']]["endpoint"] is not None: # 如果不是本地模型,加载API_KEY
52
- llm_kwargs['api_key'] = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
53
-
54
- # 尝试导入依赖,如果缺少依赖,则给出安装建议
55
- try:
56
- import autogen
57
- if get_conf("AUTOGEN_USE_DOCKER"):
58
- import docker
59
- except:
60
- chatbot.append([ f"处理任务: {txt}",
61
- f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pyautogen docker```。"])
62
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
63
- return
64
-
65
- # 尝试导入依赖,如果缺少依赖,则给出安装建议
66
- try:
67
- import autogen
68
- import glob, os, time, subprocess
69
- if get_conf("AUTOGEN_USE_DOCKER"):
70
- subprocess.Popen(["docker", "--version"])
71
- except:
72
- chatbot.append([f"处理任务: {txt}", f"缺少docker运行环境!"])
73
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
74
- return
75
-
76
- # 解锁插件
77
- chatbot.get_cookies()['lock_plugin'] = None
78
- persistent_class_multi_user_manager = GradioMultiuserManagerForPersistentClasses()
79
- user_uuid = chatbot.get_cookies().get('uuid')
80
- persistent_key = f"{user_uuid}->多智能体终端"
81
- if persistent_class_multi_user_manager.already_alive(persistent_key):
82
- # 当已经存在一个正在运行的多智能体终端时,直接将用户输入传递给它,而不是再次启动一个新的多智能体终端
83
- print('[debug] feed new user input')
84
- executor = persistent_class_multi_user_manager.get(persistent_key)
85
- exit_reason = yield from executor.main_process_ui_control(txt, create_or_resume="resume")
86
- else:
87
- # 运行多智能体终端 (首次)
88
- print('[debug] create new executor instance')
89
- history = []
90
- chatbot.append(["正在启动: 多智能体终端", "插件动态生成, 执行开始, 作者 Microsoft & Binary-Husky."])
91
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
92
- executor = AutoGenMath(llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)
93
- persistent_class_multi_user_manager.set(persistent_key, executor)
94
- exit_reason = yield from executor.main_process_ui_control(txt, create_or_resume="create")
95
-
96
- if exit_reason == "wait_feedback":
97
- # 当用户点击了“等待反馈”按钮时,将executor存储到cookie中,等待用户的再次调用
98
- executor.chatbot.get_cookies()['lock_plugin'] = 'crazy_functions.多智能体->多智能体终端'
99
- else:
100
- executor.chatbot.get_cookies()['lock_plugin'] = None
101
- yield from update_ui(chatbot=executor.chatbot, history=executor.history) # 更新状态
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/对话历史存档.py CHANGED
@@ -1,8 +1,7 @@
1
- from toolbox import CatchException, update_ui, promote_file_to_downloadzone, get_log_folder, get_user
 
2
  import re
3
 
4
- f_prefix = 'GPT-Academic对话存档'
5
-
6
  def write_chat_to_file(chatbot, history=None, file_name=None):
7
  """
8
  将对话记录history以Markdown格式写入文件中。如果没有指定文件名,则使用当前时间生成文件名。
@@ -10,8 +9,8 @@ def write_chat_to_file(chatbot, history=None, file_name=None):
10
  import os
11
  import time
12
  if file_name is None:
13
- file_name = f_prefix + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.html'
14
- fp = os.path.join(get_log_folder(get_user(chatbot), plugin_name='chat_history'), file_name)
15
  with open(fp, 'w', encoding='utf8') as f:
16
  from themes.theme import advanced_css
17
  f.write(f'<!DOCTYPE html><head><meta charset="utf-8"><title>对话历史</title><style>{advanced_css}</style></head>')
@@ -69,7 +68,7 @@ def read_file_to_chat(chatbot, history, file_name):
69
  return chatbot, history
70
 
71
  @CatchException
72
- def 对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
73
  """
74
  txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
75
  llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
@@ -77,11 +76,11 @@ def 对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_
77
  chatbot 聊天显示框的句柄,用于显示给用户
78
  history 聊天历史,前情提要
79
  system_prompt 给gpt的静默提醒
80
- user_request 当前用户的请求信息(IP地址等)
81
  """
82
 
83
  chatbot.append(("保存当前对话",
84
- f"[Local Message] {write_chat_to_file(chatbot, history)},您可以调用下拉菜单中的“载入对话历史存档”还原当下的对话。"))
85
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
86
 
87
  def hide_cwd(str):
@@ -91,7 +90,7 @@ def hide_cwd(str):
91
  return str.replace(current_path, replace_path)
92
 
93
  @CatchException
94
- def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
95
  """
96
  txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
97
  llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
@@ -99,7 +98,7 @@ def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
99
  chatbot 聊天显示框的句柄,用于显示给用户
100
  history 聊天历史,前情提要
101
  system_prompt 给gpt的静默提醒
102
- user_request 当前用户的请求信息(IP地址等)
103
  """
104
  from .crazy_utils import get_files_from_everything
105
  success, file_manifest, _ = get_files_from_everything(txt, type='.html')
@@ -107,12 +106,7 @@ def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
107
  if not success:
108
  if txt == "": txt = '空空如也的输入栏'
109
  import glob
110
- local_history = "<br/>".join([
111
- "`"+hide_cwd(f)+f" ({gen_file_preview(f)})"+"`"
112
- for f in glob.glob(
113
- f'{get_log_folder(get_user(chatbot), plugin_name="chat_history")}/**/{f_prefix}*.html',
114
- recursive=True
115
- )])
116
  chatbot.append([f"正在查找对话历史文件(html格式): {txt}", f"找不到任何html文件: {txt}。但本地存储了以下历史文件,您可以将任意一个文件路径粘贴到输入区,然后重试:<br/>{local_history}"])
117
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
118
  return
@@ -126,7 +120,7 @@ def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
126
  return
127
 
128
  @CatchException
129
- def 删除所有本地对话历史记录(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
130
  """
131
  txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
132
  llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
@@ -134,16 +128,12 @@ def 删除所有本地对话历史记录(txt, llm_kwargs, plugin_kwargs, chatbot
134
  chatbot 聊天显示框的句柄,用于显示给用户
135
  history 聊天历史,前情提要
136
  system_prompt 给gpt的静默提醒
137
- user_request 当前用户的请求信息(IP地址等)
138
  """
139
 
140
  import glob, os
141
- local_history = "<br/>".join([
142
- "`"+hide_cwd(f)+"`"
143
- for f in glob.glob(
144
- f'{get_log_folder(get_user(chatbot), plugin_name="chat_history")}/**/{f_prefix}*.html', recursive=True
145
- )])
146
- for f in glob.glob(f'{get_log_folder(get_user(chatbot), plugin_name="chat_history")}/**/{f_prefix}*.html', recursive=True):
147
  os.remove(f)
148
  chatbot.append([f"删除所有历史对话文件", f"已删除<br/>{local_history}"])
149
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 
1
+ from toolbox import CatchException, update_ui, promote_file_to_downloadzone, get_log_folder
2
+ from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
3
  import re
4
 
 
 
5
  def write_chat_to_file(chatbot, history=None, file_name=None):
6
  """
7
  将对话记录history以Markdown格式写入文件中。如果没有指定文件名,则使用当前时间生成文件名。
 
9
  import os
10
  import time
11
  if file_name is None:
12
+ file_name = 'chatGPT对话历史' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.html'
13
+ fp = os.path.join(get_log_folder(), file_name)
14
  with open(fp, 'w', encoding='utf8') as f:
15
  from themes.theme import advanced_css
16
  f.write(f'<!DOCTYPE html><head><meta charset="utf-8"><title>对话历史</title><style>{advanced_css}</style></head>')
 
68
  return chatbot, history
69
 
70
  @CatchException
71
+ def 对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
72
  """
73
  txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
74
  llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
 
76
  chatbot 聊天显示框的句柄,用于显示给用户
77
  history 聊天历史,前情提要
78
  system_prompt 给gpt的静默提醒
79
+ web_port 当前软件运行的端口号
80
  """
81
 
82
  chatbot.append(("保存当前对话",
83
+ f"[Local Message] {write_chat_to_file(chatbot, history)},您可以调用“载入对话历史存档”还原当下的对话。\n警告!被保存的对话历史可以被使用该系统的任何人查阅。"))
84
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
85
 
86
  def hide_cwd(str):
 
90
  return str.replace(current_path, replace_path)
91
 
92
  @CatchException
93
+ def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
94
  """
95
  txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
96
  llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
 
98
  chatbot 聊天显示框的句柄,用于显示给用户
99
  history 聊天历史,前情提要
100
  system_prompt 给gpt的静默提醒
101
+ web_port 当前软件运行的端口号
102
  """
103
  from .crazy_utils import get_files_from_everything
104
  success, file_manifest, _ = get_files_from_everything(txt, type='.html')
 
106
  if not success:
107
  if txt == "": txt = '空空如也的输入栏'
108
  import glob
109
+ local_history = "<br/>".join(["`"+hide_cwd(f)+f" ({gen_file_preview(f)})"+"`" for f in glob.glob(f'{get_log_folder()}/**/chatGPT对话历史*.html', recursive=True)])
 
 
 
 
 
110
  chatbot.append([f"正在查找对话历史文件(html格式): {txt}", f"找不到任何html文件: {txt}。但本地存储了以下历史文件,您可以将任意一个文件路径粘贴到输入区,然后重试:<br/>{local_history}"])
111
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
112
  return
 
120
  return
121
 
122
  @CatchException
123
+ def 删除所有本地对话历史记录(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
124
  """
125
  txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
126
  llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
 
128
  chatbot 聊天显示框的句柄,用于显示给用户
129
  history 聊天历史,前情提要
130
  system_prompt 给gpt的静默提醒
131
+ web_port 当前软件运行的端口号
132
  """
133
 
134
  import glob, os
135
+ local_history = "<br/>".join(["`"+hide_cwd(f)+"`" for f in glob.glob(f'{get_log_folder()}/**/chatGPT对话历史*.html', recursive=True)])
136
+ for f in glob.glob(f'{get_log_folder()}/**/chatGPT对话历史*.html', recursive=True):
 
 
 
 
137
  os.remove(f)
138
  chatbot.append([f"删除所有历史对话文件", f"已删除<br/>{local_history}"])
139
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面