Spaces:

tbdavid2019
/

PDF2quizz

Running

App Files Files Community

tbdavid2019 commited on 19 days ago

Commit

1ed7952

verified ·

1 Parent(s): 788b616

格式化題目

Browse files

Files changed (1) hide show

app.py +214 -33

app.py CHANGED Viewed

@@ -44,7 +44,7 @@ def generate_questions(files, question_types, num_questions, lang, llm_key, base
         base = os.getenv("OPENAI_API_BASE") or baseurl
         model_name = model or "gpt-4.1"
         if not key or not base:
-            return "⚠️ 請輸入 LLM key 與 baseurl", ""
         client = OpenAI(api_key=key, base_url=base)
         type_map = {
@@ -74,11 +74,60 @@ def generate_questions(files, question_types, num_questions, lang, llm_key, base
             }
         }
         prompt_map = {
-            "繁體中文": "你是一位專業的出題者，請根據以下內容，設計 {n} 題以下類型的題目：{types}。每題後面請標註【答案】。內容如下：\n{text}",
-            "簡體中文": "你是一位专业的出题者，请根据以下内容，设计 {n} 题以下类型的题目：{types}。每题后面请标注【答案】。内容如下：\n{text}",
-            "English": "You are a professional exam writer. Based on the following content, generate {n} questions of types: {types}. Please mark the answer after each question using [Answer:]. Content:\n{text}",
-            "日本語": "あなたはプロの出題者です。以下の内容に基づいて、{types}を含む{n}問の問題を作成してください。各問題の後に【答え】を付けてください。内容：\n{text}"
         }
         lang_key_map = {
@@ -89,8 +138,28 @@ def generate_questions(files, question_types, num_questions, lang, llm_key, base
         }
         lang_key = lang_key_map[lang]
-        types_str = "、".join([type_map[t][lang_key] for t in question_types])
-        prompt = prompt_map[lang].format(n=num_questions, types=types_str, text=trimmed_text)
         response = client.chat.completions.create(
             model=model_name,
@@ -98,34 +167,125 @@ def generate_questions(files, question_types, num_questions, lang, llm_key, base
         )
         content = response.choices[0].message.content
-        questions, answers = [], []
-        for line in content.strip().split("\n"):
-            if not line.strip():
-                continue
-            try:
-                if "【答案】" in line:
-                    q, a = line.split("【答案】", 1)
-                elif "[Answer:" in line:
-                    q, a = line.split("[Answer:", 1)
-                    a = a.rstrip("]")
-                elif "【答え】" in line:
-                    q, a = line.split("【答え】", 1)
                 else:
-                    questions.append(line.strip())
-                    answers.append("")
                     continue
-                questions.append(q.strip())
-                answers.append(a.strip())
-            except Exception:
-                questions.append(line.strip())
-                answers.append("")
-        if not questions:
-            return "⚠️ 無法解析 AI 回傳內容，請檢查輸入內容或稍後再試。", ""
-        return "\n\n".join(questions), "\n\n".join(answers)
     except Exception as e:
-        return f"⚠️ 發生錯誤：{str(e)}", ""
 # ✅ 匯出 Markdown, Quizlet（TSV）
@@ -186,7 +346,28 @@ def build_gradio_blocks():
                 quizlet_out = gr.File(label="📋 Quizlet (TSV) 檔下載")
-        generate_btn.click(fn=generate_questions,
                            inputs=[file_input, question_types, num_questions, lang, llm_key, baseurl, model_box],
                            outputs=[qbox, abox])

         base = os.getenv("OPENAI_API_BASE") or baseurl
         model_name = model or "gpt-4.1"
         if not key or not base:
+            return {"error": "⚠️ 請輸入 LLM key 與 baseurl"}, ""
         client = OpenAI(api_key=key, base_url=base)
         type_map = {
             }
         }
+        # 修改提示詞，要求 LLM 直接產出結構化的題目和答案
         prompt_map = {
+            "繁體中文": """你是一位專業的出題者，請根據以下內容，設計 {n} 題以下類型的題目：{types}。
+請嚴格按照以下格式輸出每個題目和答案：
+題目1：[題目內容]
+答案1：[答案內容]
+題目2：[題目內容]
+答案2：[答案內容]
+...以此類推
+請確保題號和答案號一一對應，不要使用其他格式。內容如下：
+{text}""",
+            "簡體中文": """你是一位专业的出题者，请根据以下内容，设计 {n} 题以下类型的题目：{types}。
+请严格按照以下格式输出每个题目和答案：
+题目1：[题目内容]
+答案1：[答案内容]
+题目2：[题目内容]
+答案2：[答案内容]
+...以此类推
+请确保题号和答案号一一对应，不要使用其他格式。内容如下：
+{text}""",
+            "English": """You are a professional exam writer. Based on the following content, generate {n} questions of types: {types}.
+Please strictly follow this format for each question and answer:
+Question1: [question content]
+Answer1: [answer content]
+Question2: [question content]
+Answer2: [answer content]
+...and so on
+Ensure that question numbers and answer numbers correspond exactly. Do not use any other format. Content:
+{text}""",
+            "日本語": """あなたはプロの出題者です。以下の内容に基づいて、{types}を含む{n}問の問題を作成してください。
+以下の形式で各問題と回答を出力してください：
+問題1：[問題内容]
+回答1：[回答内容]
+問題2：[問題内容]
+回答2：[回答内容]
+...など
+問題番号と回答番号が正確に対応していることを確認してください。他の形式は使用しないでください。内容：
+{text}"""
         }
         lang_key_map = {
         }
         lang_key = lang_key_map[lang]
+        # 處理字串形式的 question_types（來自 API）
+        if isinstance(question_types, str):
+            # 先用逗號分隔，再用頓號分隔
+            qt_list = []
+            for part in question_types.split(","):
+                for subpart in part.split("、"):
+                    if subpart.strip():
+                        qt_list.append(subpart.strip())
+            question_types = qt_list
+        # 檢查每個題型是否有效
+        valid_types = list(type_map.keys())
+        for t in question_types:
+            if t not in valid_types:
+                return {"error": f"⚠️ 無效的題型：{t}。有效題型為：{', '.join(valid_types)}"}, ""
+        try:
+            types_str = "、".join([type_map[t][lang_key] for t in question_types])
+            prompt = prompt_map[lang].format(n=num_questions, types=types_str, text=trimmed_text)
+        except Exception as e:
+            return {"error": f"⚠️ 處理題型時發生錯誤：{str(e)}。question_types={question_types}"}, ""
         response = client.chat.completions.create(
             model=model_name,
         )
         content = response.choices[0].message.content
+        # 解析 LLM 回傳的結構化內容
+        import re
+        # 初始化結果
+        result = {
+            "questions": [],
+            "answers": []
+        }
+        # 根據語言選擇正則表達式模式
+        if lang == "English":
+            question_pattern = r"Question(\d+):\s*(.*?)(?=\nAnswer\d+:|$)"
+            answer_pattern = r"Answer(\d+):\s*(.*?)(?=\nQuestion\d+:|$)"
+        elif lang == "日本語":
+            question_pattern = r"問題(\d+)：\s*(.*?)(?=\n回答\d+：|$)"
+            answer_pattern = r"回答(\d+)：\s*(.*?)(?=\n問題\d+：|$)"
+        else:  # 繁體中文 or 簡體中文
+            question_pattern = r"題目(\d+)：\s*(.*?)(?=\n答案\d+：|$)"
+            answer_pattern = r"答案(\d+)：\s*(.*?)(?=\n題目\d+：|$)"
+        # 提取題目和答案
+        questions_matches = re.findall(question_pattern, content, re.DOTALL)
+        answers_matches = re.findall(answer_pattern, content, re.DOTALL)
+        # 組織題目和答案
+        questions_dict = {num: text.strip() for num, text in questions_matches}
+        answers_dict = {num: text.strip() for num, text in answers_matches}
+        # 確保題目和答案一一對應
+        all_numbers = sorted(set(list(questions_dict.keys()) + list(answers_dict.keys())), key=int)
+        for num in all_numbers:
+            question = questions_dict.get(num, f"題目 {num} 缺失")
+            answer = answers_dict.get(num, f"答案 {num} 缺失")
+            result["questions"].append({
+                "number": num,
+                "content": question
+            })
+            result["answers"].append({
+                "number": num,
+                "content": answer
+            })
+        # 如果沒有成功提取題目和答案，使用備用方法
+        if not result["questions"]:
+            # 備用方法：按行分析
+            lines = content.strip().split("\n")
+            current_number = ""
+            current_question = ""
+            current_answer = ""
+            for line in lines:
+                line = line.strip()
+                if not line:
+                    continue
+                # 嘗試匹配題目行
+                q_match = None
+                if lang == "English":
+                    q_match = re.match(r"Question\s*(\d+):\s*(.*)", line)
+                elif lang == "日本語":
+                    q_match = re.match(r"問題\s*(\d+)：\s*(.*)", line)
                 else:
+                    q_match = re.match(r"題目\s*(\d+)：\s*(.*)", line)
+                if q_match:
+                    # 保存前一個題目和答案
+                    if current_number and current_question:
+                        result["questions"].append({
+                            "number": current_number,
+                            "content": current_question
+                        })
+                        result["answers"].append({
+                            "number": current_number,
+                            "content": current_answer
+                        })
+                    # 開始新題目
+                    current_number = q_match.group(1)
+                    current_question = q_match.group(2)
+                    current_answer = ""
                     continue
+                # 嘗試匹配答案行
+                a_match = None
+                if lang == "English":
+                    a_match = re.match(r"Answer\s*(\d+):\s*(.*)", line)
+                elif lang == "日本語":
+                    a_match = re.match(r"回答\s*(\d+)：\s*(.*)", line)
+                else:
+                    a_match = re.match(r"答案\s*(\d+)：\s*(.*)", line)
+                if a_match and a_match.group(1) == current_number:
+                    current_answer = a_match.group(2)
+            # 保存最後一個題目和答案
+            if current_number and current_question:
+                result["questions"].append({
+                    "number": current_number,
+                    "content": current_question
+                })
+                result["answers"].append({
+                    "number": current_number,
+                    "content": current_answer
+                })
+        # 如果仍然沒有提取到題目和答案，返回錯誤
+        if not result["questions"]:
+            return {"error": "⚠️ 無法解析 AI 回傳內容，請檢查輸入內容或稍後再試。"}, ""
+        # 為了向後兼容，同時返回原始文本格式
+        questions_text = "\n\n".join([f"題目{q['number']}：{q['content']}" for q in result["questions"]])
+        answers_text = "\n\n".join([f"答案{a['number']}：{a['content']}" for a in result["answers"]])
+        return result, questions_text + "\n\n" + answers_text
     except Exception as e:
+        return {"error": f"⚠️ 發生錯誤：{str(e)}"}, ""
 # ✅ 匯出 Markdown, Quizlet（TSV）
                 quizlet_out = gr.File(label="📋 Quizlet (TSV) 檔下載")
+        # 包裝函數，將 generate_questions 的回傳值轉換為 Gradio UI 需要的格式
+        def generate_questions_for_gradio(files, question_types, num_questions, lang, llm_key, baseurl, model):
+            result, raw_text = generate_questions(files, question_types, num_questions, lang, llm_key, baseurl, model)
+            # 檢查是否有錯誤
+            if isinstance(result, dict) and "error" in result:
+                return result["error"], ""
+            # 分割原始文本為題目和答案
+            parts = raw_text.split("\n\n")
+            questions_part = ""
+            answers_part = ""
+            for part in parts:
+                if part.startswith("題目") or part.startswith("Question") or part.startswith("問題"):
+                    questions_part += part + "\n\n"
+                elif part.startswith("答案") or part.startswith("Answer") or part.startswith("回答"):
+                    answers_part += part + "\n\n"
+            return questions_part.strip(), answers_part.strip()
+        generate_btn.click(fn=generate_questions_for_gradio,
                            inputs=[file_input, question_types, num_questions, lang, llm_key, baseurl, model_box],
                            outputs=[qbox, abox])