Spaces:

siyux1927
/

slides-comprehension

Sleeping

App Files Files Community

SonyaX20 commited on Feb 4

Commit

bf191d3

1 Parent(s): 4d0f186

new

Browse files

Files changed (3) hide show

.gitignore +22 -0
README.md +29 -2
app.py +133 -68

.gitignore ADDED Viewed

	@@ -0,0 +1,22 @@

+# Virtual Environment
+venv/
+env/
+.env
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+# IDE
+.vscode/
+.idea/
+# Temporary files
+temp_image.png
+*.log
+# Distribution / packaging
+dist/
+build/
+*.egg-info/

README.md CHANGED Viewed

@@ -8,7 +8,34 @@ sdk_version: 5.0.1
 app_file: app.py
 pinned: false
 license: mit
-short_description: for final exams
 ---
-An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

 app_file: app.py
 pinned: false
 license: mit
+short_description: 课程幻灯片智能理解助手
 ---
+# 课程幻灯片理解助手
+这是一个基于 Gradio + EasyOCR + GPT-4 的课程幻灯片理解工具。
+## 功能特点
+- 支持上传幻灯片图片
+- 自动识别幻灯片中的文字内容（支持中英文）
+- 智能解析幻灯片内容并提供讲解
+- 支持与AI助手进行对话，深入理解课程内容
+## 技术栈
+- Gradio 5.0.1：构建Web界面
+- EasyOCR：图片文字识别
+- OpenAI GPT-4：内容理解与对话
+- Python 3.8+
+## 使用说明
+1. 上传幻灯片图片
+2. 点击"分析幻灯片"按钮
+3. 查看识别的文字内容和AI分析结果
+4. 使用对话框与AI助手交互，提出问题
+## License
+MIT License

app.py CHANGED Viewed

@@ -15,107 +15,172 @@ client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
 # 初始化 EasyOCR
 reader = easyocr.Reader(['ch_sim', 'en'])
-def extract_text_from_image(image):
-    """从图片中提取文字"""
-    if isinstance(image, str):
-        image_path = image
-    else:
-        # 将 numpy array 转换为 PIL Image
-        if isinstance(image, np.ndarray):
-            image = Image.fromarray(image)
-        # 保存临时文件
-        image_path = "temp_image.png"
-        image.save(image_path)
-    # 使用 EasyOCR 识别文字
-    result = reader.readtext(image_path)
-    # 如果是临时文件，删除它
-    if image_path == "temp_image.png":
-        os.remove(image_path)
-    # 提取文字内容
-    text = ' '.join([item[1] for item in result])
-    return text
 def analyze_slide(text):
     """使用 GPT-4 分析幻灯片内容"""
-    prompt = f"""请分析以下幻灯片内容，并提供详细的讲解：
-{text}
-请从以下几个方面进行分析：
-1. 主要内容概述
-2. 重点概念解释
-3. 与其他知识的联系
-4. 实际应用场景
-请用中文回答。"""
-    response = client.chat.completions.create(
-        model="gpt-4",
-        messages=[{"role": "user", "content": prompt}]
-    )
-    return response.choices[0].message.content
-def chat_with_assistant(message, history):
     """与 AI 助手对话"""
-    messages = [
-        {"role": "system", "content": "你是一位专业的课程助教，负责帮助学生理解课程内容。请用中文回答问题。"}
-    ]
-    # 添加历史对话
-    for human, assistant in history:
-        messages.append({"role": "user", "content": human})
-        messages.append({"role": "assistant", "content": assistant})
-    # 添加当前问题
-    messages.append({"role": "user", "content": message})
-    response = client.chat.completions.create(
-        model="gpt-4",
-        messages=messages
-    )
-    return response.choices[0].message.content
 # 创建 Gradio 界面
 with gr.Blocks(title="课程幻灯片理解助手") as demo:
-    gr.Markdown("# 课程幻灯片理解助手")
-    gr.Markdown("上传幻灯片图片，AI 将帮助你理解内容并回答问题。")
     with gr.Row():
-        with gr.Column():
-            image_input = gr.Image(label="上传幻灯片图片")
-            analyze_button = gr.Button("分析幻灯片")
-        with gr.Column():
-            text_output = gr.Textbox(label="识别的文字内容", lines=5)
-            analysis_output = gr.Textbox(label="AI 分析结果", lines=10)
     gr.Markdown("---")
-    gr.Markdown("### 与 AI 助手对话")
-    chatbot = gr.Chatbot()
-    msg = gr.Textbox(label="输入你的问题")
-    clear = gr.Button("清除对话历史")
     # 设置事件处理
-    analyze_button.click(
-        fn=lambda img: (extract_text_from_image(img), analyze_slide(extract_text_from_image(img))),
         inputs=[image_input],
         outputs=[text_output, analysis_output]
     )
     msg.submit(
         fn=chat_with_assistant,
-        inputs=[msg, chatbot],
         outputs=[chatbot],
         clear_input=True
     )
-    clear.click(lambda: None, None, chatbot, queue=False)
 # 启动应用
 if __name__ == "__main__":
-    demo.launch()

 # 初始化 EasyOCR
 reader = easyocr.Reader(['ch_sim', 'en'])
+def process_image(image):
+    """处理上传的图片并返回识别结果和分析"""
+    if image is None:
+        return "请上传图片", "等待图片上传..."
+    # 提取文字
+    text = extract_text_from_image(image)
+    if not text.strip():
+        return "未能识别到文字内容，请尝试上传清晰的图片", "无法分析空白内容"
+    # 分析内容
+    analysis = analyze_slide(text)
+    return text, analysis
+def extract_text_from_image(image):
+    """从图片中提取文字"""
+    try:
+        if isinstance(image, str):
+            image_path = image
+        else:
+            if isinstance(image, np.ndarray):
+                image = Image.fromarray(image)
+            image_path = "temp_image.png"
+            image.save(image_path)
+        # 使用 EasyOCR 识别文字
+        result = reader.readtext(image_path)
+        # 删除临时文件
+        if image_path == "temp_image.png" and os.path.exists(image_path):
+            os.remove(image_path)
+        # 按照位置排序并组织文字
+        sorted_text = []
+        for (bbox, text, prob) in result:
+            if prob > 0.5:  # 只保留置信度大于 0.5 的结果
+                sorted_text.append(text)
+        return ' '.join(sorted_text)
+    except Exception as e:
+        return f"图片处理出错: {str(e)}"
 def analyze_slide(text):
     """使用 GPT-4 分析幻灯片内容"""
+    try:
+        prompt = f"""请分析以下幻灯片内容，并提供清晰的讲解：
+内容：{text}
+请按照以下结构组织回答：
+1. 主要内容：用2-3句话概括核心内容
+2. 重点解释：详细解释重要概念和关键点
+3. 知识延伸：与其他知识的联系
+4. 应用场景：在实际中的应用示例
+请用中文回答，语言要通俗易懂。"""
+        response = client.chat.completions.create(
+            model="gpt-4",
+            messages=[{"role": "user", "content": prompt}],
+            temperature=0.7,
+            max_tokens=1000
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return f"内容分析出错: {str(e)}"
+def chat_with_assistant(message, history, slide_text):
     """与 AI 助手对话"""
+    if not message:
+        return history
+    try:
+        context = f"""当前幻灯片内容：{slide_text}
+请基于以上幻灯片内容，回答用户的问题。如果问题与幻灯片内容无关，也可以回答其他问题。"""
+        messages = [
+            {"role": "system", "content": "你是一位专业的课程助教，负责帮助学生理解课程内容。请用清晰易懂的中文回答问题。"},
+            {"role": "user", "content": context}
+        ]
+        # 添加历史对话
+        for human, assistant in history:
+            messages.append({"role": "user", "content": human})
+            messages.append({"role": "assistant", "content": assistant})
+        messages.append({"role": "user", "content": message})
+        response = client.chat.completions.create(
+            model="gpt-4",
+            messages=messages,
+            temperature=0.7
+        )
+        history.append((message, response.choices[0].message.content))
+        return history
+    except Exception as e:
+        history.append((message, f"回答出错: {str(e)}"))
+        return history
 # 创建 Gradio 界面
 with gr.Blocks(title="课程幻灯片理解助手") as demo:
+    gr.Markdown("# 📚 课程幻灯片理解助手")
+    gr.Markdown("上传幻灯片图片，AI 将自动识别内容并提供详细讲解")
+    # 存储当前识别的文字，用于对话上下文
+    current_text = gr.State("")
     with gr.Row():
+        with gr.Column(scale=1):
+            image_input = gr.Image(
+                label="上传幻灯片图片",
+                type="pil",
+                tool="select"
+            )
+        with gr.Column(scale=2):
+            text_output = gr.Textbox(
+                label="识别的文字内容",
+                lines=3,
+                placeholder="上传图片后将显示识别的文字内容..."
+            )
+            analysis_output = gr.Textbox(
+                label="AI 讲解分析",
+                lines=10,
+                placeholder="等待分析结果..."
+            )
     gr.Markdown("---")
+    gr.Markdown("### 💬 与 AI 助手对话")
+    chatbot = gr.Chatbot(
+        label="对话历史",
+        height=400,
+        placeholder="在这里可以看到对话历史..."
+    )
+    with gr.Row():
+        msg = gr.Textbox(
+            label="输入你的问题",
+            placeholder="请输入你的问题...",
+            scale=4
+        )
+        clear = gr.Button("🗑️ 清除对话", scale=1)
     # 设置事件处理
+    image_input.change(
+        fn=process_image,
         inputs=[image_input],
         outputs=[text_output, analysis_output]
+    ).then(
+        fn=lambda x: x,
+        inputs=[text_output],
+        outputs=[current_text]
     )
     msg.submit(
         fn=chat_with_assistant,
+        inputs=[msg, chatbot, current_text],
         outputs=[chatbot],
         clear_input=True
     )
+    clear.click(lambda: ([], ""), outputs=[chatbot, msg])
 # 启动应用
 if __name__ == "__main__":
+    demo.launch(share=True)