Spaces:

siyux1927
/

slides-comprehension

Runtime error

App Files Files Community

SonyaX20 commited on Feb 4

Commit

ebbac58

1 Parent(s): bf191d3

new

Browse files

Files changed (2) hide show

app.py +46 -15
requirements.txt +3 -2

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ from dotenv import load_dotenv
 from openai import OpenAI
 from PIL import Image
 import numpy as np
 # 加载环境变量
 load_dotenv()
@@ -12,23 +13,32 @@ load_dotenv()
 # 初始化 OpenAI 客户端
 client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
-# 初始化 EasyOCR
-reader = easyocr.Reader(['ch_sim', 'en'])
 def process_image(image):
     """处理上传的图片并返回识别结果和分析"""
     if image is None:
         return "请上传图片", "等待图片上传..."
-    # 提取文字
-    text = extract_text_from_image(image)
-    if not text.strip():
-        return "未能识别到文字内容，请尝试上传清晰的图片", "无法分析空白内容"
-    # 分析内容
-    analysis = analyze_slide(text)
-    return text, analysis
 def extract_text_from_image(image):
     """从图片中提取文字"""
@@ -41,8 +51,14 @@ def extract_text_from_image(image):
             image_path = "temp_image.png"
             image.save(image_path)
         # 使用 EasyOCR 识别文字
-        result = reader.readtext(image_path)
         # 删除临时文件
         if image_path == "temp_image.png" and os.path.exists(image_path):
@@ -54,8 +70,12 @@ def extract_text_from_image(image):
             if prob > 0.5:  # 只保留置信度大于 0.5 的结果
                 sorted_text.append(text)
-        return ' '.join(sorted_text)
     except Exception as e:
         return f"图片处理出错: {str(e)}"
 def analyze_slide(text):
@@ -120,7 +140,7 @@ def chat_with_assistant(message, history, slide_text):
 # 创建 Gradio 界面
 with gr.Blocks(title="课程幻灯片理解助手") as demo:
-    gr.Markdown("# 📚 课程幻灯片理解助手")
     gr.Markdown("上传幻灯片图片，AI 将自动识别内容并提供详细讲解")
     # 存储当前识别的文字，用于对话上下文
@@ -131,8 +151,9 @@ with gr.Blocks(title="课程幻灯片理解助手") as demo:
             image_input = gr.Image(
                 label="上传幻灯片图片",
                 type="pil",
-                tool="select"
             )
         with gr.Column(scale=2):
             text_output = gr.Textbox(
@@ -162,7 +183,14 @@ with gr.Blocks(title="课程幻灯片理解助手") as demo:
         clear = gr.Button("🗑️ 清除对话", scale=1)
     # 设置事件处理
     image_input.change(
         fn=process_image,
         inputs=[image_input],
         outputs=[text_output, analysis_output]
@@ -170,6 +198,9 @@ with gr.Blocks(title="课程幻灯片理解助手") as demo:
         fn=lambda x: x,
         inputs=[text_output],
         outputs=[current_text]
     )
     msg.submit(

 from openai import OpenAI
 from PIL import Image
 import numpy as np
+import torch
 # 加载环境变量
 load_dotenv()
 # 初始化 OpenAI 客户端
 client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
+# 检查是否有 GPU
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+print(f"Running on device: {device}")
+# 初始化 EasyOCR（添加进度提示）
+print("Initializing EasyOCR and loading models...")
+reader = easyocr.Reader(['ch_sim', 'en'], gpu=(device=='cuda'), download_enabled=True, verbose=True)
+print("EasyOCR initialization completed!")
 def process_image(image):
     """处理上传的图片并返回识别结果和分析"""
     if image is None:
         return "请上传图片", "等待图片上传..."
+    try:
+        # 提取文字
+        text = extract_text_from_image(image)
+        if not text.strip():
+            return "未能识别到文字内容，请尝试上传清晰的图片", "无法分析空白内容"
+        # 分析内容
+        analysis = analyze_slide(text)
+        return text, analysis
+    except Exception as e:
+        return f"处理出错: {str(e)}", "请重试或联系管理员"
 def extract_text_from_image(image):
     """从图片中提取文字"""
             image_path = "temp_image.png"
             image.save(image_path)
+        print("开始识别文字...")
         # 使用 EasyOCR 识别文字
+        result = reader.readtext(
+            image_path,
+            detail=1,
+            paragraph=True  # 尝试将相近的文本组合成段落
+        )
+        print("文字识别完成")
         # 删除临时文件
         if image_path == "temp_image.png" and os.path.exists(image_path):
             if prob > 0.5:  # 只保留置信度大于 0.5 的结果
                 sorted_text.append(text)
+        final_text = ' '.join(sorted_text)
+        if not final_text.strip():
+            return "未能识别到清晰的文字，请尝试上传更清晰的图片"
+        return final_text
     except Exception as e:
+        print(f"文字识别出错: {str(e)}")
         return f"图片处理出错: {str(e)}"
 def analyze_slide(text):
 # 创建 Gradio 界面
 with gr.Blocks(title="课程幻灯片理解助手") as demo:
+    gr.Markdown(f"# 📚 课程幻灯片理解助手 ({device.upper()} 模式)")
     gr.Markdown("上传幻灯片图片，AI 将自动识别内容并提供详细讲解")
     # 存储当前识别的文字，用于对话上下文
             image_input = gr.Image(
                 label="上传幻灯片图片",
                 type="pil",
+                sources=["upload", "clipboard"]
             )
+            status_text = gr.Markdown("等待上传图片...")
         with gr.Column(scale=2):
             text_output = gr.Textbox(
         clear = gr.Button("🗑️ 清除对话", scale=1)
     # 设置事件处理
+    def update_status(image):
+        return "正在处理图片..." if image is not None else "等待上传图片..."
     image_input.change(
+        fn=update_status,
+        inputs=[image_input],
+        outputs=[status_text]
+    ).then(
         fn=process_image,
         inputs=[image_input],
         outputs=[text_output, analysis_output]
         fn=lambda x: x,
         inputs=[text_output],
         outputs=[current_text]
+    ).then(
+        fn=lambda: "处理完成",
+        outputs=[status_text]
     )
     msg.submit(

requirements.txt CHANGED Viewed

@@ -1,7 +1,8 @@
 huggingface_hub==0.25.2
-gradio>=4.0.0
 easyocr>=1.7.1
 python-dotenv>=1.0.0
 openai>=1.0.0
 Pillow>=10.0.0
-numpy>=1.24.0

 huggingface_hub==0.25.2
+gradio>=4.0.0,<5.0.0
 easyocr>=1.7.1
 python-dotenv>=1.0.0
 openai>=1.0.0
 Pillow>=10.0.0
+numpy>=1.24.0
+torch>=2.0.0