mkdirjason commited on
Commit
07ac240
·
verified ·
1 Parent(s): 34b4f9f

modle_change

Browse files
Files changed (1) hide show
  1. app.py +47 -138
app.py CHANGED
@@ -1,68 +1,34 @@
1
-
2
- from transformers import pipeline
3
  import gradio as gr
4
  import easyocr
5
  import pdfplumber
6
- import random
7
-
8
-
9
- import os
10
- os.system("rm -rf /home/user/.cache/huggingface")
11
-
12
-
13
- #適用於Interface、Block
14
- title = "<h1>產生英文題目</h1>"
15
- description = """這是一個利用hugging face 產生英文題目的小專案"""
16
- textbox = gr.Textbox(label="請輸入英文文章:", placeholder="While lily is setting...", lines=5)
17
-
18
- #加入磚
19
- demo = gr.Blocks()
20
-
21
- # 加載 Hugging Face 上的問答模型
22
- question_generator = pipeline("text2text-generation", model="valhalla/t5-base-qg-hl")
23
 
24
- # def question_generate(context):
25
- # # 讓模型根據文章生成問題
26
- # question_result = []
27
- # for i in range(5):
28
- # question = question_generator(f"question: {context}",
29
- # max_length=100,
30
- # do_sample = True, # 啟用採樣以增加多樣性
31
- # temperature=0.8 + (i * 0.1), # 逐漸增加溫度參數來獲得更多樣的結果
32
- # top_p=0.9
33
- # )
34
- # question_result.append(f"Q{i+1}. {question[0]['generated_text']}")
35
 
36
- # return "\n".join(question_result) #以換行符號返回多個問題
 
 
 
 
37
 
38
  def question_generator_with_answer(context):
39
-
40
- #產生題目
41
- question_data = question_generator(f"question:{context}",
42
- max_length=100, do_sample=True, temperature=0.8, top_p=0.9)
43
- question = question_data[0]['generated_text']
44
-
45
- #產生正確答案
46
- answer_data = question_generator(f"answer:{context}",
47
- max_length=100, do_sample=True, temperature=1, top_p=0.9)
48
- correct_answer = answer_data[0]['generated_text']
49
-
50
- #產生錯誤答案
51
  wrong_answers = set()
52
  while len(wrong_answers) < 3:
53
- wrong_data = question_generator(f"answer: {context}", max_length=50,
54
- do_sample=True, temperature=1.0, top_p=0.8)
55
- wrong_answer = wrong_data[0]['generated_text']
56
- if wrong_answer != correct_answer and "?" not in wrong_answer: # 避免重複正確答案
57
  wrong_answers.add(wrong_answer)
58
-
59
 
60
- # 將正確答案加入選項,並打亂順序
61
  choices = list(wrong_answers) + [correct_answer]
62
  random.shuffle(choices)
63
-
64
-
65
- # 回傳題目與選項
66
  return {
67
  "question": question,
68
  "choices": choices,
@@ -70,114 +36,57 @@ def question_generator_with_answer(context):
70
  }
71
 
72
  def format_question_output(context):
73
- question_result=[]
74
  for j in range(4):
75
  result = question_generator_with_answer(context)
76
  question_text = f"{result['question']}\n"
77
  choices_text = "\n".join([f"{chr(65+i)}. {choice}" for i, choice in enumerate(result['choices'])])
78
  question_result.append(f"\nQ{j+1}.{question_text}\n{choices_text}\n")
79
- return "\n".join(question_result) #用換行來連接
80
-
81
- # def format_question_output(context):
82
- # result = question_generator_with_answer(context)
83
- # question_text = f"**{result['question']}**\n\n"
84
- # choices_text = "\n".join([f"{chr(65+i)}. {choice}" for i, choice in enumerate(result['choices'])])
85
- # return f"{question_text}\n{choices_text}\n\n✅ 正確答案: {result['correct_answer']}"
86
-
87
 
88
- #pdf辨識
89
  def extract_text_from_pdf(pdf_path):
90
  text = ""
91
- with pdfplumber.open(pdf_path.name) as pdf:
92
  for page in pdf.pages:
93
  text += page.extract_text() + "\n"
94
- ls = format_question_output(text)
95
- return ls
96
 
97
-
98
-
99
- #圖片辨識(辨識度太低)
100
  def OCR(photo):
101
- text_inner = ""
102
- questions = []
103
  reader = easyocr.Reader(['en', 'ch_tra'])
104
  results = reader.readtext(photo)
105
- for (bbox, text, prob) in results:
106
- text_inner += text
107
- return text_inner
108
 
109
-
110
- #確認辨識結果沒有問題後,產生題目
111
  def OCR_gen(text):
112
- if not text.strip(): # 確保輸入的 text 不是空的
113
  return "錯誤:OCR 沒有輸出任何��用的文字,請重新檢查圖片內容。"
114
- ls = format_question_output(text)
115
- return ls
116
-
117
 
 
118
  with demo:
119
- gr.Markdown(title)
120
- gr.Markdown(description)
 
121
  with gr.Tabs():
122
  with gr.TabItem("輸入文字"):
123
- with gr.Row():
124
- text_input = gr.Textbox(label="請輸入英文文章:", placeholder="While lily is setting...", lines=5)
125
- with gr.Column():
126
- text_output = gr.Textbox(label="題目")
127
- text_button = gr.Button("產生題目")
128
  with gr.TabItem("PDF文件辨識"):
129
- with gr.Row():
130
- PDF_input = gr.File(label="請上傳PDF文件")
131
- with gr.Column():
132
- PDF_output = gr.Textbox()
133
- PDF_button = gr.Button("產生題目")
134
  with gr.TabItem("圖片辨識"):
135
- with gr.Row():
136
- image_input = gr.Image()
137
- #解析圖片文字
138
- with gr.Column():
139
- img_tem = gr.Textbox(placeholder="請確認辨識結果",label="辨識結果")
140
- img_button = gr.Button("開始解析")
141
- image_button = gr.Button("產生題目")
142
- # #產生題目
143
- with gr.Column():
144
- image_output = gr.Textbox(label="題目")
145
-
146
- #判別有沒有輸入文章
147
- def validate_and_generate(text):
148
- if not text.strip():
149
- return "請輸入文章以產生題目"
150
- return format_question_output(text)
151
-
152
- #文字輸入 物件
153
- text_button.click(validate_and_generate, inputs=text_input, outputs=text_output)
154
-
155
- #判別有沒有上傳檔案
156
- def test_PDF(file):
157
- if not file:
158
- return "請上傳PDF文件以產生題目"
159
- return extract_text_from_pdf(file)
160
-
161
- #PDF輸入
162
- PDF_button.click(test_PDF, inputs=PDF_input, outputs=PDF_output)
163
-
164
- #判別有沒有上傳照片
165
- def test_image(image):
166
- if image is None:
167
- return "請上傳圖片以產生題目"
168
- return OCR(image)
169
-
170
- #辨識文章
171
- img_button.click(test_image, inputs=image_input, outputs=img_tem)
172
-
173
-
174
- #檢查辨識結果有沒有存在
175
- def test_finished(text):
176
- if (not text.strip() or text == "請上傳圖片以產生題目"):
177
- return "請確認文章已經輸入"
178
- return OCR_gen(text)
179
- image_button.click(test_finished, inputs=img_tem, outputs=image_output)
180
-
181
-
182
 
183
  demo.launch()
 
1
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
2
+ import torch
3
  import gradio as gr
4
  import easyocr
5
  import pdfplumber
6
+ import random
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ # 載入本地模型與 tokenizer
9
+ MODEL_PATH = "models/t5-base-qg-hl"
10
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
11
+ model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_PATH)
 
 
 
 
 
 
 
12
 
13
+ def generate_text(prompt, max_length=100, temperature=0.8, top_p=0.9):
14
+ inputs = tokenizer(prompt, return_tensors="pt")
15
+ with torch.no_grad():
16
+ outputs = model.generate(**inputs, max_length=max_length, do_sample=True, temperature=temperature, top_p=top_p)
17
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
18
 
19
  def question_generator_with_answer(context):
20
+ question = generate_text(f"question: {context}")
21
+ correct_answer = generate_text(f"answer: {context}", temperature=1.0)
22
+
 
 
 
 
 
 
 
 
 
23
  wrong_answers = set()
24
  while len(wrong_answers) < 3:
25
+ wrong_answer = generate_text(f"answer: {context}", max_length=50, temperature=1.0, top_p=0.8)
26
+ if wrong_answer != correct_answer and "?" not in wrong_answer:
 
 
27
  wrong_answers.add(wrong_answer)
 
28
 
 
29
  choices = list(wrong_answers) + [correct_answer]
30
  random.shuffle(choices)
31
+
 
 
32
  return {
33
  "question": question,
34
  "choices": choices,
 
36
  }
37
 
38
  def format_question_output(context):
39
+ question_result = []
40
  for j in range(4):
41
  result = question_generator_with_answer(context)
42
  question_text = f"{result['question']}\n"
43
  choices_text = "\n".join([f"{chr(65+i)}. {choice}" for i, choice in enumerate(result['choices'])])
44
  question_result.append(f"\nQ{j+1}.{question_text}\n{choices_text}\n")
45
+ return "\n".join(question_result)
 
 
 
 
 
 
 
46
 
 
47
  def extract_text_from_pdf(pdf_path):
48
  text = ""
49
+ with pdfplumber.open(pdf_path.name) as pdf:
50
  for page in pdf.pages:
51
  text += page.extract_text() + "\n"
52
+ return format_question_output(text)
 
53
 
 
 
 
54
  def OCR(photo):
 
 
55
  reader = easyocr.Reader(['en', 'ch_tra'])
56
  results = reader.readtext(photo)
57
+ return "".join([text for (_, text, _) in results])
 
 
58
 
 
 
59
  def OCR_gen(text):
60
+ if not text.strip():
61
  return "錯誤:OCR 沒有輸出任何��用的文字,請重新檢查圖片內容。"
62
+ return format_question_output(text)
 
 
63
 
64
+ demo = gr.Blocks()
65
  with demo:
66
+ gr.Markdown("<h1>產生英文題目</h1>")
67
+ gr.Markdown("這是一個利用 hugging face 產生英文題目的小專案")
68
+
69
  with gr.Tabs():
70
  with gr.TabItem("輸入文字"):
71
+ text_input = gr.Textbox(label="請輸入英文文章:", placeholder="While lily is setting...", lines=5)
72
+ text_output = gr.Textbox(label="題目")
73
+ text_button = gr.Button("產生題目")
74
+
 
75
  with gr.TabItem("PDF文件辨識"):
76
+ PDF_input = gr.File(label="請上傳PDF文件")
77
+ PDF_output = gr.Textbox()
78
+ PDF_button = gr.Button("產生題目")
79
+
 
80
  with gr.TabItem("圖片辨識"):
81
+ image_input = gr.Image()
82
+ img_tem = gr.Textbox(placeholder="請確認辨識結果", label="辨識結果")
83
+ img_button = gr.Button("開始解析")
84
+ image_button = gr.Button("產生題目")
85
+ image_output = gr.Textbox(label="題目")
86
+
87
+ text_button.click(format_question_output, inputs=text_input, outputs=text_output)
88
+ PDF_button.click(extract_text_from_pdf, inputs=PDF_input, outputs=PDF_output)
89
+ img_button.click(OCR, inputs=image_input, outputs=img_tem)
90
+ image_button.click(OCR_gen, inputs=img_tem, outputs=image_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  demo.launch()