Spaces:
Runtime error
Runtime error
SonyaX20
commited on
Commit
·
ebbac58
1
Parent(s):
bf191d3
new
Browse files- app.py +46 -15
- requirements.txt +3 -2
app.py
CHANGED
@@ -5,6 +5,7 @@ from dotenv import load_dotenv
|
|
5 |
from openai import OpenAI
|
6 |
from PIL import Image
|
7 |
import numpy as np
|
|
|
8 |
|
9 |
# 加载环境变量
|
10 |
load_dotenv()
|
@@ -12,23 +13,32 @@ load_dotenv()
|
|
12 |
# 初始化 OpenAI 客户端
|
13 |
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
14 |
|
15 |
-
#
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
def process_image(image):
|
19 |
"""处理上传的图片并返回识别结果和分析"""
|
20 |
if image is None:
|
21 |
return "请上传图片", "等待图片上传..."
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
32 |
|
33 |
def extract_text_from_image(image):
|
34 |
"""从图片中提取文字"""
|
@@ -41,8 +51,14 @@ def extract_text_from_image(image):
|
|
41 |
image_path = "temp_image.png"
|
42 |
image.save(image_path)
|
43 |
|
|
|
44 |
# 使用 EasyOCR 识别文字
|
45 |
-
result = reader.readtext(
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
# 删除临时文件
|
48 |
if image_path == "temp_image.png" and os.path.exists(image_path):
|
@@ -54,8 +70,12 @@ def extract_text_from_image(image):
|
|
54 |
if prob > 0.5: # 只保留置信度大于 0.5 的结果
|
55 |
sorted_text.append(text)
|
56 |
|
57 |
-
|
|
|
|
|
|
|
58 |
except Exception as e:
|
|
|
59 |
return f"图片处理出错: {str(e)}"
|
60 |
|
61 |
def analyze_slide(text):
|
@@ -120,7 +140,7 @@ def chat_with_assistant(message, history, slide_text):
|
|
120 |
|
121 |
# 创建 Gradio 界面
|
122 |
with gr.Blocks(title="课程幻灯片理解助手") as demo:
|
123 |
-
gr.Markdown("# 📚 课程幻灯片理解助手")
|
124 |
gr.Markdown("上传幻灯片图片,AI 将自动识别内容并提供详细讲解")
|
125 |
|
126 |
# 存储当前识别的文字,用于对话上下文
|
@@ -131,8 +151,9 @@ with gr.Blocks(title="课程幻灯片理解助手") as demo:
|
|
131 |
image_input = gr.Image(
|
132 |
label="上传幻灯片图片",
|
133 |
type="pil",
|
134 |
-
|
135 |
)
|
|
|
136 |
|
137 |
with gr.Column(scale=2):
|
138 |
text_output = gr.Textbox(
|
@@ -162,7 +183,14 @@ with gr.Blocks(title="课程幻灯片理解助手") as demo:
|
|
162 |
clear = gr.Button("🗑️ 清除对话", scale=1)
|
163 |
|
164 |
# 设置事件处理
|
|
|
|
|
|
|
165 |
image_input.change(
|
|
|
|
|
|
|
|
|
166 |
fn=process_image,
|
167 |
inputs=[image_input],
|
168 |
outputs=[text_output, analysis_output]
|
@@ -170,6 +198,9 @@ with gr.Blocks(title="课程幻灯片理解助手") as demo:
|
|
170 |
fn=lambda x: x,
|
171 |
inputs=[text_output],
|
172 |
outputs=[current_text]
|
|
|
|
|
|
|
173 |
)
|
174 |
|
175 |
msg.submit(
|
|
|
5 |
from openai import OpenAI
|
6 |
from PIL import Image
|
7 |
import numpy as np
|
8 |
+
import torch
|
9 |
|
10 |
# 加载环境变量
|
11 |
load_dotenv()
|
|
|
13 |
# 初始化 OpenAI 客户端
|
14 |
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
15 |
|
16 |
+
# 检查是否有 GPU
|
17 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
18 |
+
print(f"Running on device: {device}")
|
19 |
+
|
20 |
+
# 初始化 EasyOCR(添加进度提示)
|
21 |
+
print("Initializing EasyOCR and loading models...")
|
22 |
+
reader = easyocr.Reader(['ch_sim', 'en'], gpu=(device=='cuda'), download_enabled=True, verbose=True)
|
23 |
+
print("EasyOCR initialization completed!")
|
24 |
|
25 |
def process_image(image):
|
26 |
"""处理上传的图片并返回识别结果和分析"""
|
27 |
if image is None:
|
28 |
return "请上传图片", "等待图片上传..."
|
29 |
|
30 |
+
try:
|
31 |
+
# 提取文字
|
32 |
+
text = extract_text_from_image(image)
|
33 |
+
if not text.strip():
|
34 |
+
return "未能识别到文字内容,请尝试上传清晰的图片", "无法分析空白内容"
|
35 |
+
|
36 |
+
# 分析内容
|
37 |
+
analysis = analyze_slide(text)
|
38 |
+
|
39 |
+
return text, analysis
|
40 |
+
except Exception as e:
|
41 |
+
return f"处理出错: {str(e)}", "请重试或联系管理员"
|
42 |
|
43 |
def extract_text_from_image(image):
|
44 |
"""从图片中提取文字"""
|
|
|
51 |
image_path = "temp_image.png"
|
52 |
image.save(image_path)
|
53 |
|
54 |
+
print("开始识别文字...")
|
55 |
# 使用 EasyOCR 识别文字
|
56 |
+
result = reader.readtext(
|
57 |
+
image_path,
|
58 |
+
detail=1,
|
59 |
+
paragraph=True # 尝试将相近的文本组合成段落
|
60 |
+
)
|
61 |
+
print("文字识别完成")
|
62 |
|
63 |
# 删除临时文件
|
64 |
if image_path == "temp_image.png" and os.path.exists(image_path):
|
|
|
70 |
if prob > 0.5: # 只保留置信度大于 0.5 的结果
|
71 |
sorted_text.append(text)
|
72 |
|
73 |
+
final_text = ' '.join(sorted_text)
|
74 |
+
if not final_text.strip():
|
75 |
+
return "未能识别到清晰的文字,请尝试上传更清晰的图片"
|
76 |
+
return final_text
|
77 |
except Exception as e:
|
78 |
+
print(f"文字识别出错: {str(e)}")
|
79 |
return f"图片处理出错: {str(e)}"
|
80 |
|
81 |
def analyze_slide(text):
|
|
|
140 |
|
141 |
# 创建 Gradio 界面
|
142 |
with gr.Blocks(title="课程幻灯片理解助手") as demo:
|
143 |
+
gr.Markdown(f"# 📚 课程幻灯片理解助手 ({device.upper()} 模式)")
|
144 |
gr.Markdown("上传幻灯片图片,AI 将自动识别内容并提供详细讲解")
|
145 |
|
146 |
# 存储当前识别的文字,用于对话上下文
|
|
|
151 |
image_input = gr.Image(
|
152 |
label="上传幻灯片图片",
|
153 |
type="pil",
|
154 |
+
sources=["upload", "clipboard"]
|
155 |
)
|
156 |
+
status_text = gr.Markdown("等待上传图片...")
|
157 |
|
158 |
with gr.Column(scale=2):
|
159 |
text_output = gr.Textbox(
|
|
|
183 |
clear = gr.Button("🗑️ 清除对话", scale=1)
|
184 |
|
185 |
# 设置事件处理
|
186 |
+
def update_status(image):
|
187 |
+
return "正在处理图片..." if image is not None else "等待上传图片..."
|
188 |
+
|
189 |
image_input.change(
|
190 |
+
fn=update_status,
|
191 |
+
inputs=[image_input],
|
192 |
+
outputs=[status_text]
|
193 |
+
).then(
|
194 |
fn=process_image,
|
195 |
inputs=[image_input],
|
196 |
outputs=[text_output, analysis_output]
|
|
|
198 |
fn=lambda x: x,
|
199 |
inputs=[text_output],
|
200 |
outputs=[current_text]
|
201 |
+
).then(
|
202 |
+
fn=lambda: "处理完成",
|
203 |
+
outputs=[status_text]
|
204 |
)
|
205 |
|
206 |
msg.submit(
|
requirements.txt
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
huggingface_hub==0.25.2
|
2 |
-
gradio>=4.0.0
|
3 |
easyocr>=1.7.1
|
4 |
python-dotenv>=1.0.0
|
5 |
openai>=1.0.0
|
6 |
Pillow>=10.0.0
|
7 |
-
numpy>=1.24.0
|
|
|
|
1 |
huggingface_hub==0.25.2
|
2 |
+
gradio>=4.0.0,<5.0.0
|
3 |
easyocr>=1.7.1
|
4 |
python-dotenv>=1.0.0
|
5 |
openai>=1.0.0
|
6 |
Pillow>=10.0.0
|
7 |
+
numpy>=1.24.0
|
8 |
+
torch>=2.0.0
|