tbdavid2019 commited on
Commit
1ed7952
·
verified ·
1 Parent(s): 788b616

格式化題目

Browse files
Files changed (1) hide show
  1. app.py +214 -33
app.py CHANGED
@@ -44,7 +44,7 @@ def generate_questions(files, question_types, num_questions, lang, llm_key, base
44
  base = os.getenv("OPENAI_API_BASE") or baseurl
45
  model_name = model or "gpt-4.1"
46
  if not key or not base:
47
- return "⚠️ 請輸入 LLM key 與 baseurl", ""
48
  client = OpenAI(api_key=key, base_url=base)
49
 
50
  type_map = {
@@ -74,11 +74,60 @@ def generate_questions(files, question_types, num_questions, lang, llm_key, base
74
  }
75
  }
76
 
 
77
  prompt_map = {
78
- "繁體中文": "你是一位專業的出題者,請根據以下內容,設計 {n} 題以下類型的題目:{types}。每題後面請標註【答案】。內容如下:\n{text}",
79
- "簡體中文": "你是一位专业的出题者,请根据以下内容,设计 {n} 题以下类型的题目:{types}。每题后面请标注【答案】。内容如下:\n{text}",
80
- "English": "You are a professional exam writer. Based on the following content, generate {n} questions of types: {types}. Please mark the answer after each question using [Answer:]. Content:\n{text}",
81
- "日本語": "あなたはプロの出題者です。以下の内容に基づいて、{types}を含む{n}問の問題を作成してください。各問題の後に【答え】を付けてください。内容:\n{text}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  }
83
 
84
  lang_key_map = {
@@ -89,8 +138,28 @@ def generate_questions(files, question_types, num_questions, lang, llm_key, base
89
  }
90
 
91
  lang_key = lang_key_map[lang]
92
- types_str = "、".join([type_map[t][lang_key] for t in question_types])
93
- prompt = prompt_map[lang].format(n=num_questions, types=types_str, text=trimmed_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
  response = client.chat.completions.create(
96
  model=model_name,
@@ -98,34 +167,125 @@ def generate_questions(files, question_types, num_questions, lang, llm_key, base
98
  )
99
  content = response.choices[0].message.content
100
 
101
- questions, answers = [], []
102
- for line in content.strip().split("\n"):
103
- if not line.strip():
104
- continue
105
- try:
106
- if "【答案】" in line:
107
- q, a = line.split("【答案】", 1)
108
- elif "[Answer:" in line:
109
- q, a = line.split("[Answer:", 1)
110
- a = a.rstrip("]")
111
- elif "【答え】" in line:
112
- q, a = line.split("【答え】", 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  else:
114
- questions.append(line.strip())
115
- answers.append("")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  continue
117
- questions.append(q.strip())
118
- answers.append(a.strip())
119
- except Exception:
120
- questions.append(line.strip())
121
- answers.append("")
122
-
123
- if not questions:
124
- return "⚠️ 無法解析 AI 回傳內容,請檢查輸入內容或稍後再試。", ""
125
-
126
- return "\n\n".join(questions), "\n\n".join(answers)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  except Exception as e:
128
- return f"⚠️ 發生錯誤:{str(e)}", ""
129
 
130
  # ✅ 匯出 Markdown, Quizlet(TSV)
131
 
@@ -186,7 +346,28 @@ def build_gradio_blocks():
186
  quizlet_out = gr.File(label="📋 Quizlet (TSV) 檔下載")
187
 
188
 
189
- generate_btn.click(fn=generate_questions,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  inputs=[file_input, question_types, num_questions, lang, llm_key, baseurl, model_box],
191
  outputs=[qbox, abox])
192
 
 
44
  base = os.getenv("OPENAI_API_BASE") or baseurl
45
  model_name = model or "gpt-4.1"
46
  if not key or not base:
47
+ return {"error": "⚠️ 請輸入 LLM key 與 baseurl"}, ""
48
  client = OpenAI(api_key=key, base_url=base)
49
 
50
  type_map = {
 
74
  }
75
  }
76
 
77
+ # 修改提示詞,要求 LLM 直接產出結構化的題目和答案
78
  prompt_map = {
79
+ "繁體中文": """你是一位專業的出題者,請根據以下內容,設計 {n} 題以下類型的題目:{types}
80
+ 請嚴格按照以下格式輸出每個題目和答案:
81
+
82
+ 題目1:[題目內容]
83
+ 答案1:[答案內容]
84
+
85
+ 題目2:[題目內容]
86
+ 答案2:[答案內容]
87
+
88
+ ...以此類推
89
+
90
+ 請確保題號和答案號一一對應,不要使用其他格式。內容如下:
91
+ {text}""",
92
+ "簡體中文": """你是一位专业的出题者,请根据以下内容,设计 {n} 题以下类型的题目:{types}。
93
+ 请严格按照以下格式输出每个题目和答案:
94
+
95
+ 题目1:[题目内容]
96
+ 答案1:[答案内容]
97
+
98
+ 题目2:[题目内容]
99
+ 答案2:[答案内容]
100
+
101
+ ...以此类推
102
+
103
+ 请确保题号和答案号一一对应,不要使用其他格式。内容如下:
104
+ {text}""",
105
+ "English": """You are a professional exam writer. Based on the following content, generate {n} questions of types: {types}.
106
+ Please strictly follow this format for each question and answer:
107
+
108
+ Question1: [question content]
109
+ Answer1: [answer content]
110
+
111
+ Question2: [question content]
112
+ Answer2: [answer content]
113
+
114
+ ...and so on
115
+
116
+ Ensure that question numbers and answer numbers correspond exactly. Do not use any other format. Content:
117
+ {text}""",
118
+ "日本語": """あなたはプロの出題者です。以下の内容に基づいて、{types}を含む{n}問の問題を作成してください。
119
+ 以下の形式で各問題と回答を出力してください:
120
+
121
+ 問題1:[問題内容]
122
+ 回答1:[回答内容]
123
+
124
+ 問題2:[問題内容]
125
+ 回答2:[回答内容]
126
+
127
+ ...など
128
+
129
+ 問題番号と回答番号が正確に対応していることを確認してください。他の形式は使用しないでください。内容:
130
+ {text}"""
131
  }
132
 
133
  lang_key_map = {
 
138
  }
139
 
140
  lang_key = lang_key_map[lang]
141
+
142
+ # 處理字串形式的 question_types(來自 API)
143
+ if isinstance(question_types, str):
144
+ # 先用逗號分隔,再用頓號分隔
145
+ qt_list = []
146
+ for part in question_types.split(","):
147
+ for subpart in part.split("、"):
148
+ if subpart.strip():
149
+ qt_list.append(subpart.strip())
150
+ question_types = qt_list
151
+
152
+ # 檢查每個題型是否有效
153
+ valid_types = list(type_map.keys())
154
+ for t in question_types:
155
+ if t not in valid_types:
156
+ return {"error": f"⚠️ 無效的題型:{t}。有效題型為:{', '.join(valid_types)}"}, ""
157
+
158
+ try:
159
+ types_str = "、".join([type_map[t][lang_key] for t in question_types])
160
+ prompt = prompt_map[lang].format(n=num_questions, types=types_str, text=trimmed_text)
161
+ except Exception as e:
162
+ return {"error": f"⚠️ 處理題型時發生錯誤:{str(e)}。question_types={question_types}"}, ""
163
 
164
  response = client.chat.completions.create(
165
  model=model_name,
 
167
  )
168
  content = response.choices[0].message.content
169
 
170
+ # 解析 LLM 回傳的結構化內容
171
+ import re
172
+
173
+ # 初始化結果
174
+ result = {
175
+ "questions": [],
176
+ "answers": []
177
+ }
178
+
179
+ # 根據語言選擇正則表達式模式
180
+ if lang == "English":
181
+ question_pattern = r"Question(\d+):\s*(.*?)(?=\nAnswer\d+:|$)"
182
+ answer_pattern = r"Answer(\d+):\s*(.*?)(?=\nQuestion\d+:|$)"
183
+ elif lang == "日本語":
184
+ question_pattern = r"問題(\d+):\s*(.*?)(?=\n回答\d+:|$)"
185
+ answer_pattern = r"回答(\d+):\s*(.*?)(?=\n問題\d+:|$)"
186
+ else: # 繁體中文 or 簡體中文
187
+ question_pattern = r"題目(\d+):\s*(.*?)(?=\n答案\d+:|$)"
188
+ answer_pattern = r"答案(\d+):\s*(.*?)(?=\n題目\d+:|$)"
189
+
190
+ # 提取題目和答案
191
+ questions_matches = re.findall(question_pattern, content, re.DOTALL)
192
+ answers_matches = re.findall(answer_pattern, content, re.DOTALL)
193
+
194
+ # 組織題目和答案
195
+ questions_dict = {num: text.strip() for num, text in questions_matches}
196
+ answers_dict = {num: text.strip() for num, text in answers_matches}
197
+
198
+ # 確保題目和答案一一對應
199
+ all_numbers = sorted(set(list(questions_dict.keys()) + list(answers_dict.keys())), key=int)
200
+
201
+ for num in all_numbers:
202
+ question = questions_dict.get(num, f"題目 {num} 缺失")
203
+ answer = answers_dict.get(num, f"答案 {num} 缺失")
204
+
205
+ result["questions"].append({
206
+ "number": num,
207
+ "content": question
208
+ })
209
+
210
+ result["answers"].append({
211
+ "number": num,
212
+ "content": answer
213
+ })
214
+
215
+ # 如果沒有成功提取題目和答案,使用備用方法
216
+ if not result["questions"]:
217
+ # 備用方法:按行分析
218
+ lines = content.strip().split("\n")
219
+ current_number = ""
220
+ current_question = ""
221
+ current_answer = ""
222
+
223
+ for line in lines:
224
+ line = line.strip()
225
+ if not line:
226
+ continue
227
+
228
+ # 嘗試匹配題目行
229
+ q_match = None
230
+ if lang == "English":
231
+ q_match = re.match(r"Question\s*(\d+):\s*(.*)", line)
232
+ elif lang == "日本語":
233
+ q_match = re.match(r"問題\s*(\d+):\s*(.*)", line)
234
  else:
235
+ q_match = re.match(r"題目\s*(\d+):\s*(.*)", line)
236
+
237
+ if q_match:
238
+ # 保存前一個題目和答案
239
+ if current_number and current_question:
240
+ result["questions"].append({
241
+ "number": current_number,
242
+ "content": current_question
243
+ })
244
+ result["answers"].append({
245
+ "number": current_number,
246
+ "content": current_answer
247
+ })
248
+
249
+ # 開始新題目
250
+ current_number = q_match.group(1)
251
+ current_question = q_match.group(2)
252
+ current_answer = ""
253
  continue
254
+
255
+ # 嘗試匹配答案行
256
+ a_match = None
257
+ if lang == "English":
258
+ a_match = re.match(r"Answer\s*(\d+):\s*(.*)", line)
259
+ elif lang == "日本語":
260
+ a_match = re.match(r"回答\s*(\d+):\s*(.*)", line)
261
+ else:
262
+ a_match = re.match(r"答案\s*(\d+):\s*(.*)", line)
263
+
264
+ if a_match and a_match.group(1) == current_number:
265
+ current_answer = a_match.group(2)
266
+
267
+ # 保存最後一個題目和答案
268
+ if current_number and current_question:
269
+ result["questions"].append({
270
+ "number": current_number,
271
+ "content": current_question
272
+ })
273
+ result["answers"].append({
274
+ "number": current_number,
275
+ "content": current_answer
276
+ })
277
+
278
+ # 如果仍然沒有提取到題目和答案,返回錯誤
279
+ if not result["questions"]:
280
+ return {"error": "⚠️ 無法解析 AI 回傳內容,請檢查輸入內容或稍後再試。"}, ""
281
+
282
+ # 為了向後兼容,同時返回原始文本格式
283
+ questions_text = "\n\n".join([f"題目{q['number']}:{q['content']}" for q in result["questions"]])
284
+ answers_text = "\n\n".join([f"答案{a['number']}:{a['content']}" for a in result["answers"]])
285
+
286
+ return result, questions_text + "\n\n" + answers_text
287
  except Exception as e:
288
+ return {"error": f"⚠️ 發生錯誤:{str(e)}"}, ""
289
 
290
  # ✅ 匯出 Markdown, Quizlet(TSV)
291
 
 
346
  quizlet_out = gr.File(label="📋 Quizlet (TSV) 檔下載")
347
 
348
 
349
+ # 包裝函數,將 generate_questions 的回傳值轉換為 Gradio UI 需要的格式
350
+ def generate_questions_for_gradio(files, question_types, num_questions, lang, llm_key, baseurl, model):
351
+ result, raw_text = generate_questions(files, question_types, num_questions, lang, llm_key, baseurl, model)
352
+
353
+ # 檢查是否有錯誤
354
+ if isinstance(result, dict) and "error" in result:
355
+ return result["error"], ""
356
+
357
+ # 分割原始文本為題目和答案
358
+ parts = raw_text.split("\n\n")
359
+ questions_part = ""
360
+ answers_part = ""
361
+
362
+ for part in parts:
363
+ if part.startswith("題目") or part.startswith("Question") or part.startswith("問題"):
364
+ questions_part += part + "\n\n"
365
+ elif part.startswith("答案") or part.startswith("Answer") or part.startswith("回答"):
366
+ answers_part += part + "\n\n"
367
+
368
+ return questions_part.strip(), answers_part.strip()
369
+
370
+ generate_btn.click(fn=generate_questions_for_gradio,
371
  inputs=[file_input, question_types, num_questions, lang, llm_key, baseurl, model_box],
372
  outputs=[qbox, abox])
373