openfree commited on
Commit
fdc1e97
ยท
verified ยท
1 Parent(s): 22b544c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -83
app.py CHANGED
@@ -12,13 +12,11 @@ import re
12
  import uuid
13
  import pymupdf
14
 
15
- # (์›๋ž˜ ์„ค์น˜ & ์„ธํŒ…)
16
  os.system('pip uninstall -y magic-pdf')
17
  os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
18
  os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
19
  os.system('python download_models_hf.py')
20
 
21
- # magic-pdf.json ์ˆ˜์ •
22
  with open('/home/user/magic-pdf.json', 'r') as file:
23
  data = json.load(file)
24
 
@@ -41,22 +39,19 @@ from magic_pdf.tools.common import do_parse, prepare_env
41
 
42
  def create_css():
43
  """
44
- ์Šคํฌ๋กค ๊ฐ€๋Šฅํ•˜๋„๋ก .gradio-container์—์„œ overflow-y๋ฅผ auto๋กœ,
45
- ๋†’์ด๋Š” ์ตœ์†Œ 100vh๋กœ ์„ค์ •
46
  """
47
  return """
48
- /* ํ™”๋ฉด ์ „์ฒด๋ฅผ ์ฐจ์ง€ํ•˜๋ฉด์„œ, ์Šคํฌ๋กค์ด ๊ฐ€๋Šฅํ•˜๋„๋ก ์„ค์ • */
49
  .gradio-container {
50
  width: 100vw !important;
51
- min-height: 100vh !important;
52
  margin: 0 !important;
53
  padding: 0 !important;
54
  background: linear-gradient(135deg, #EFF6FF 0%, #F5F3FF 100%);
55
  display: flex;
56
  flex-direction: column;
57
- overflow-y: auto !important; /* ์Šคํฌ๋กค ํ™œ์„ฑํ™” */
58
  }
59
- /* ํƒ€์ดํ‹€ ์˜์—ญ */
60
  .title-area {
61
  text-align: center;
62
  margin: 1rem auto;
@@ -78,11 +73,9 @@ def create_css():
78
  color: #6B7280;
79
  font-size: 1.1rem;
80
  }
81
- /* ์ˆจ๊ธฐ๊ณ  ์‹ถ์€ ์ปดํฌ๋„ŒํŠธ */
82
  .invisible {
83
  display: none !important;
84
  }
85
- /* ๊ธฐํƒ€ padding ์กฐ์ • */
86
  .gr-block, .gr-box {
87
  padding: 0.5rem !important;
88
  }
@@ -98,10 +91,7 @@ def parse_pdf(doc_path, output_dir, end_page_id, is_ocr, layout_mode, formula_en
98
  try:
99
  file_name = f"{str(Path(doc_path).stem)}_{time.time()}"
100
  pdf_data = read_fn(doc_path)
101
- if is_ocr:
102
- parse_method = "ocr"
103
- else:
104
- parse_method = "auto"
105
  local_image_dir, local_md_dir = prepare_env(output_dir, file_name, parse_method)
106
  do_parse(
107
  output_dir,
@@ -148,9 +138,6 @@ def replace_image_with_base64(markdown_text, image_dir_path):
148
  return re.sub(pattern, replace, markdown_text)
149
 
150
  def to_pdf(file_path):
151
- """
152
- PDF๊ฐ€ ์•„๋‹Œ ํŒŒ์ผ(์ด๋ฏธ์ง€ ๋“ฑ)์„ pymupdf๋กœ PDF๋กœ ๋ณ€ํ™˜
153
- """
154
  with pymupdf.open(file_path) as f:
155
  if f.is_pdf:
156
  return file_path
@@ -162,37 +149,53 @@ def to_pdf(file_path):
162
  tmp_pdf_file.write(pdf_bytes)
163
  return tmp_file_path
164
 
165
- def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language):
166
- file_path = to_pdf(file_path)
167
- if end_pages > 20:
168
- end_pages = 20
169
- local_md_dir, file_name = parse_pdf(file_path, './output', end_pages - 1, is_ocr,
170
- layout_mode, formula_enable, table_enable, language)
171
- archive_zip_path = os.path.join("./output", compute_sha256(local_md_dir) + ".zip")
172
- zip_archive_success = compress_directory_to_zip(local_md_dir, archive_zip_path)
173
- if zip_archive_success == 0:
174
- logger.info("์••์ถ• ์„ฑ๊ณต")
175
- else:
176
- logger.error("์••์ถ• ์‹คํŒจ")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
- md_path = os.path.join(local_md_dir, file_name + ".md")
179
- with open(md_path, 'r', encoding='utf-8') as f:
180
- txt_content = f.read()
181
 
182
- md_content = replace_image_with_base64(txt_content, local_md_dir)
183
- return md_content
184
 
185
  def init_model():
186
- """
187
- magic_pdf ๋ชจ๋ธ(๋ ˆ์ด์•„์›ƒ, OCR ๋“ฑ) ๋ฏธ๋ฆฌ ์ดˆ๊ธฐํ™”
188
- """
189
  from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
190
  try:
191
  model_manager = ModelSingleton()
192
  txt_model = model_manager.get_model(False, False)
193
- logger.info(f"txt_model init final")
194
  ocr_model = model_manager.get_model(True, False)
195
- logger.info(f"ocr_model init final")
196
  return 0
197
  except Exception as e:
198
  logger.exception(e)
@@ -202,21 +205,14 @@ model_init = init_model()
202
  logger.info(f"model_init: {model_init}")
203
 
204
  latin_lang = [
205
- 'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr',
206
- 'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl',
207
- 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv',
208
- 'sw', 'tl', 'tr', 'uz', 'vi', 'french', 'german'
209
  ]
210
- arabic_lang = ['ar', 'fa', 'ug', 'ur']
211
- cyrillic_lang = [
212
- 'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava',
213
- 'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
214
- ]
215
- devanagari_lang = [
216
- 'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',
217
- 'sa', 'bgc'
218
- ]
219
- other_lang = ['ch', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka']
220
 
221
  all_lang = ['', 'auto']
222
  all_lang.extend([*other_lang, *latin_lang, *arabic_lang, *cyrillic_lang, *devanagari_lang])
@@ -232,6 +228,7 @@ import time
232
 
233
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
234
  genai.configure(api_key=GEMINI_API_KEY)
 
235
  model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")
236
 
237
  def format_chat_history(messages: list) -> list:
@@ -240,7 +237,9 @@ def format_chat_history(messages: list) -> list:
240
  """
241
  formatted_history = []
242
  for message in messages:
243
- # ChatMessage.role, ChatMessage.content ํ™œ์šฉ
 
 
244
  if not (message.role == "assistant" and hasattr(message, "metadata")):
245
  formatted_history.append({
246
  "role": "user" if message.role == "user" else "assistant",
@@ -250,16 +249,17 @@ def format_chat_history(messages: list) -> list:
250
 
251
  def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
252
  """
253
- Gemini ์‘๋‹ต์„ ์ŠคํŠธ๋ฆฌ๋ฐ
254
- => ์ตœ์ข…์ ์œผ๋กœ (์œ ์ €, ๋ด‡) ํŠœํ”Œ ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜ํ•˜์—ฌ yield
255
  """
256
- if not user_message.strip():
257
- messages.append(ChatMessage(role="assistant", content="Please provide a non-empty text message."))
258
- yield convert_chat_messages_to_gradio_format(messages)
259
- return
 
260
 
261
  try:
262
- print(f"\n=== New Request (Text) ===\nUser message: {user_message}")
263
 
264
  chat_history = format_chat_history(messages)
265
  chat = model.start_chat(history=chat_history)
@@ -302,7 +302,7 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
302
  response_buffer += current_chunk
303
  messages[-1] = ChatMessage(role="assistant", content=response_buffer)
304
  else:
305
- # Still in "thinking"
306
  thought_buffer += current_chunk
307
  messages[-1] = ChatMessage(
308
  role="assistant",
@@ -312,16 +312,16 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
312
 
313
  yield convert_chat_messages_to_gradio_format(messages)
314
 
315
- print(f"\n=== Final Response ===\n{response_buffer}")
316
 
317
  except Exception as e:
318
- print(f"\n=== Error ===\n{str(e)}")
319
  messages.append(ChatMessage(role="assistant", content=f"I encountered an error: {str(e)}"))
320
  yield convert_chat_messages_to_gradio_format(messages)
321
 
322
  def convert_chat_messages_to_gradio_format(messages):
323
  """
324
- ChatMessage list -> Gradio Chatbot์šฉ [(์œ ์ €๋ฐœํ™”, ๋ด‡์‘๋‹ต), (์œ ์ €๋ฐœํ™”2, ๋ด‡์‘๋‹ต2), ...]
325
  """
326
  gradio_chat = []
327
  user_text, assistant_text = None, None
@@ -330,7 +330,6 @@ def convert_chat_messages_to_gradio_format(messages):
330
  role = msg.role
331
  content = msg.content
332
  if role == "user":
333
- # ์ด์ „ ํ„ด์ด ์žˆ๋‹ค๋ฉด ์ €์žฅ
334
  if user_text is not None or assistant_text is not None:
335
  gradio_chat.append((user_text or "", assistant_text or ""))
336
  user_text = content
@@ -351,12 +350,13 @@ def convert_chat_messages_to_gradio_format(messages):
351
 
352
  def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
353
  """
354
- ์‚ฌ์šฉ์ž ์ž…๋ ฅ -> doc_text(๋งˆํฌ๋‹ค์šด) ์ฐธ๊ณ  ๋ฌธ๊ตฌ๋ฅผ ์ฒจ๊ฐ€ํ•ด์„œ history์— ChatMessage ์ถ”๊ฐ€
 
355
  """
356
- if not doc_text.strip():
357
- user_query = msg
358
- else:
359
  user_query = f"๋‹ค์Œ ๋ฌธ์„œ๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ๋‹ต๋ณ€:\n\n{doc_text}\n\n์งˆ๋ฌธ: {msg}"
 
 
360
 
361
  history.append(ChatMessage(role="user", content=user_query))
362
  return "", history
@@ -369,23 +369,24 @@ with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
369
  gr.HTML("""
370
  <div class="title-area">
371
  <h1>OCR FLEX + Gemini Chat</h1>
372
- <p>PDF/์ด๋ฏธ์ง€ -> ํ…์ŠคํŠธ(๋งˆํฌ๋‹ค์šด) ๋ณ€ํ™˜ ํ›„, Gemini LLM์œผ๋กœ ๋Œ€ํ™”</p>
373
  </div>
374
  """)
375
 
376
- md_state = gr.State("") # PDF -> ๋งˆํฌ๋‹ค์šด ๋ณ€ํ™˜ ๊ฒฐ๊ณผ
377
- chat_history = gr.State([]) # ChatMessage ๋ฆฌ์ŠคํŠธ
 
378
 
379
  # ์—…๋กœ๋“œ & ๋ณ€ํ™˜
380
  with gr.Row():
381
  file = gr.File(
382
- label="PDF / ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ",
383
  file_types=[".pdf", ".png", ".jpeg", ".jpg"],
384
  interactive=True
385
  )
386
  convert_btn = gr.Button("๋ณ€ํ™˜ํ•˜๊ธฐ")
387
 
388
- # ์ˆจ๊ธด ์ปดํฌ๋„ŒํŠธ
389
  max_pages = gr.Slider(1, 20, 10, visible=False, elem_classes="invisible")
390
  layout_mode = gr.Dropdown(["layoutlmv3", "doclayout_yolo"], value="doclayout_yolo", visible=False, elem_classes="invisible")
391
  language = gr.Dropdown(all_lang, value='auto', visible=False, elem_classes="invisible")
@@ -393,21 +394,22 @@ with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
393
  is_ocr = gr.Checkbox(value=False, visible=False, elem_classes="invisible")
394
  table_enable = gr.Checkbox(value=True, visible=False, elem_classes="invisible")
395
 
396
- # ๋ณ€ํ™˜ ๋ฒ„ํŠผ โ†’ md_state ์ €์žฅ
397
  convert_btn.click(
398
  fn=to_markdown,
399
  inputs=[file, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
400
- outputs=md_state
 
401
  )
402
 
403
- # Gemini ์ฑ„ํŒ…
404
  gr.Markdown("## Gemini 2.0 Flash (Thinking) Chat")
405
  chatbot = gr.Chatbot(height=600)
406
  with gr.Row():
407
  chat_input = gr.Textbox(lines=1, placeholder="์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”...")
408
  clear_button = gr.Button("๋Œ€ํ™” ์ดˆ๊ธฐํ™”")
409
 
410
- # ์ฑ„ํŒ… ํ๋ฆ„
411
  chat_input.submit(
412
  fn=user_message,
413
  inputs=[chat_input, chat_history, md_state],
@@ -418,7 +420,7 @@ with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
418
  outputs=chatbot
419
  )
420
 
421
- # ์ดˆ๊ธฐํ™”
422
  def clear_states():
423
  return [], ""
424
  clear_button.click(
@@ -432,8 +434,5 @@ with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
432
  )
433
 
434
 
435
- ##############################
436
- # 4) ์•ฑ ์‹คํ–‰
437
- ##############################
438
  if __name__ == "__main__":
439
  demo.launch(debug=True, server_name="0.0.0.0", server_port=7860)
 
12
  import uuid
13
  import pymupdf
14
 
 
15
  os.system('pip uninstall -y magic-pdf')
16
  os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
17
  os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
18
  os.system('python download_models_hf.py')
19
 
 
20
  with open('/home/user/magic-pdf.json', 'r') as file:
21
  data = json.load(file)
22
 
 
39
 
40
  def create_css():
41
  """
42
+ ํ™”๋ฉด์„ ๊ฝ‰ ์ฑ„์šฐ๊ณ  ์Šคํฌ๋กค ๊ฐ€๋Šฅํ•˜๋„๋ก ์„ค์ •
 
43
  """
44
  return """
 
45
  .gradio-container {
46
  width: 100vw !important;
47
+ min-height: 100vh !important;
48
  margin: 0 !important;
49
  padding: 0 !important;
50
  background: linear-gradient(135deg, #EFF6FF 0%, #F5F3FF 100%);
51
  display: flex;
52
  flex-direction: column;
53
+ overflow-y: auto !important;
54
  }
 
55
  .title-area {
56
  text-align: center;
57
  margin: 1rem auto;
 
73
  color: #6B7280;
74
  font-size: 1.1rem;
75
  }
 
76
  .invisible {
77
  display: none !important;
78
  }
 
79
  .gr-block, .gr-box {
80
  padding: 0.5rem !important;
81
  }
 
91
  try:
92
  file_name = f"{str(Path(doc_path).stem)}_{time.time()}"
93
  pdf_data = read_fn(doc_path)
94
+ parse_method = "ocr" if is_ocr else "auto"
 
 
 
95
  local_image_dir, local_md_dir = prepare_env(output_dir, file_name, parse_method)
96
  do_parse(
97
  output_dir,
 
138
  return re.sub(pattern, replace, markdown_text)
139
 
140
  def to_pdf(file_path):
 
 
 
141
  with pymupdf.open(file_path) as f:
142
  if f.is_pdf:
143
  return file_path
 
149
  tmp_pdf_file.write(pdf_bytes)
150
  return tmp_file_path
151
 
152
+ def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language, progress=gr.Progress(track_tqdm=False)):
153
+ """
154
+ - PDF ๋ณ€ํ™˜ ๊ณผ์ • ๋‹จ๊ณ„๋ณ„๋กœ progress ํ‘œ์‹œ
155
+ - ๋ฐ˜ํ™˜๊ฐ’: ์ตœ์ข… ๋งˆํฌ๋‹ค์šด ํ…์ŠคํŠธ
156
+ """
157
+ with progress:
158
+ progress(0, "ํŒŒ์ผ์„ PDF๋กœ ๋ณ€ํ™˜ ์ค‘...")
159
+ file_path = to_pdf(file_path)
160
+ time.sleep(0.5) # ์˜ˆ์‹œ๋กœ ๋”œ๋ ˆ์ด
161
+
162
+ if end_pages > 20:
163
+ end_pages = 20
164
+ progress(30, "PDF ํŒŒ์‹ฑ ์ค‘...")
165
+ local_md_dir, file_name = parse_pdf(file_path, './output', end_pages - 1, is_ocr,
166
+ layout_mode, formula_enable, table_enable, language)
167
+ time.sleep(0.5)
168
+
169
+ progress(50, "์••์ถ•(Zip) ์ƒ์„ฑ ์ค‘...")
170
+ archive_zip_path = os.path.join("./output", compute_sha256(local_md_dir) + ".zip")
171
+ zip_archive_success = compress_directory_to_zip(local_md_dir, archive_zip_path)
172
+ if zip_archive_success == 0:
173
+ logger.info("์••์ถ• ์„ฑ๊ณต")
174
+ else:
175
+ logger.error("์••์ถ• ์‹คํŒจ")
176
+ time.sleep(0.5)
177
+
178
+ progress(70, "๋งˆํฌ๋‹ค์šด ์ฝ๋Š” ์ค‘...")
179
+ md_path = os.path.join(local_md_dir, file_name + ".md")
180
+ with open(md_path, 'r', encoding='utf-8') as f:
181
+ txt_content = f.read()
182
+ time.sleep(0.5)
183
 
184
+ progress(90, "์ด๋ฏธ์ง€ base64 ๋ณ€ํ™˜ ์ค‘...")
185
+ md_content = replace_image_with_base64(txt_content, local_md_dir)
186
+ time.sleep(0.5)
187
 
188
+ progress(100, "๋ณ€ํ™˜ ์™„๋ฃŒ!")
189
+ return md_content
190
 
191
  def init_model():
 
 
 
192
  from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
193
  try:
194
  model_manager = ModelSingleton()
195
  txt_model = model_manager.get_model(False, False)
196
+ logger.info("txt_model init final")
197
  ocr_model = model_manager.get_model(True, False)
198
+ logger.info("ocr_model init final")
199
  return 0
200
  except Exception as e:
201
  logger.exception(e)
 
205
  logger.info(f"model_init: {model_init}")
206
 
207
  latin_lang = [
208
+ 'af','az','bs','cs','cy','da','de','es','et','fr','ga','hr','hu','id','is','it','ku',
209
+ 'la','lt','lv','mi','ms','mt','nl','no','oc','pi','pl','pt','ro','rs_latin','sk','sl',
210
+ 'sq','sv','sw','tl','tr','uz','vi','french','german'
 
211
  ]
212
+ arabic_lang = ['ar','fa','ug','ur']
213
+ cyrillic_lang = ['ru','rs_cyrillic','be','bg','uk','mn','abq','ady','kbd','ava','dar','inh','che','lbe','lez','tab']
214
+ devanagari_lang = ['hi','mr','ne','bh','mai','ang','bho','mah','sck','new','gom','sa','bgc']
215
+ other_lang = ['ch','en','korean','japan','chinese_cht','ta','te','ka']
 
 
 
 
 
 
216
 
217
  all_lang = ['', 'auto']
218
  all_lang.extend([*other_lang, *latin_lang, *arabic_lang, *cyrillic_lang, *devanagari_lang])
 
228
 
229
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
230
  genai.configure(api_key=GEMINI_API_KEY)
231
+
232
  model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")
233
 
234
  def format_chat_history(messages: list) -> list:
 
237
  """
238
  formatted_history = []
239
  for message in messages:
240
+ # ChatMessage => role, content
241
+ # metadata๋Š” "thinking" ํ‘œ์‹œ ์šฉ๋„
242
+ # Gemini input์—์„œ๋Š” ์ œ์™ธ
243
  if not (message.role == "assistant" and hasattr(message, "metadata")):
244
  formatted_history.append({
245
  "role": "user" if message.role == "user" else "assistant",
 
249
 
250
  def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
251
  """
252
+ Gemini ์‘๋‹ต ์ŠคํŠธ๋ฆฌ๋ฐ
253
+ - user_message๊ฐ€ ๊ณต๋ฐฑ์ด์–ด๋„ ์˜ค๋ฅ˜ X
254
  """
255
+ # 1) ๊ณต๋ฐฑ ์ž…๋ ฅ๋„ ๊ทธ๋ƒฅ ํ†ต๊ณผ(์˜ค๋ฅ˜ ์•ˆ๋‚ด ๋ฉ”์‹œ์ง€ ์ œ๊ฑฐ)
256
+ # if not user_message.strip():
257
+ # messages.append(ChatMessage(role="assistant", content="Please provide a non-empty text message."))
258
+ # yield convert_chat_messages_to_gradio_format(messages)
259
+ # return
260
 
261
  try:
262
+ print(f"\n=== [Gemini] New Request ===\nUser message: '{user_message}'")
263
 
264
  chat_history = format_chat_history(messages)
265
  chat = model.start_chat(history=chat_history)
 
302
  response_buffer += current_chunk
303
  messages[-1] = ChatMessage(role="assistant", content=response_buffer)
304
  else:
305
+ # Still in "Thinking"
306
  thought_buffer += current_chunk
307
  messages[-1] = ChatMessage(
308
  role="assistant",
 
312
 
313
  yield convert_chat_messages_to_gradio_format(messages)
314
 
315
+ print(f"\n=== [Gemini] Final Response ===\n{response_buffer}")
316
 
317
  except Exception as e:
318
+ print(f"\n=== [Gemini] Error ===\n{str(e)}")
319
  messages.append(ChatMessage(role="assistant", content=f"I encountered an error: {str(e)}"))
320
  yield convert_chat_messages_to_gradio_format(messages)
321
 
322
  def convert_chat_messages_to_gradio_format(messages):
323
  """
324
+ ChatMessage ๋ฆฌ์ŠคํŠธ => Gradio (์œ ์ €, ๋ด‡) ํŠœํ”Œ ๋ฆฌ์ŠคํŠธ
325
  """
326
  gradio_chat = []
327
  user_text, assistant_text = None, None
 
330
  role = msg.role
331
  content = msg.content
332
  if role == "user":
 
333
  if user_text is not None or assistant_text is not None:
334
  gradio_chat.append((user_text or "", assistant_text or ""))
335
  user_text = content
 
350
 
351
  def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
352
  """
353
+ - doc_text(๋งˆํฌ๋‹ค์šด) ์ฐธ๊ณ  ๋ฌธ๊ตฌ๋ฅผ ์ž๋™์œผ๋กœ ์ถ”๊ฐ€
354
+ - ๊ณต๋ฐฑ ์ž…๋ ฅ๋„ ์—๋Ÿฌ ์—†์ด ์ง„ํ–‰
355
  """
356
+ if doc_text.strip():
 
 
357
  user_query = f"๋‹ค์Œ ๋ฌธ์„œ๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ๋‹ต๋ณ€:\n\n{doc_text}\n\n์งˆ๋ฌธ: {msg}"
358
+ else:
359
+ user_query = msg
360
 
361
  history.append(ChatMessage(role="user", content=user_query))
362
  return "", history
 
369
  gr.HTML("""
370
  <div class="title-area">
371
  <h1>OCR FLEX + Gemini Chat</h1>
372
+ <p>PDF/์ด๋ฏธ์ง€ -> ํ…์ŠคํŠธ(๋งˆํฌ๋‹ค์šด) ๋ณ€ํ™˜ ํ›„, Gemini LLM๊ณผ ๋Œ€ํ™”</p>
373
  </div>
374
  """)
375
 
376
+ # ๋ณ€ํ™˜๋œ ๋งˆํฌ๋‹ค์šด, ์ฑ„ํŒ… ์ด๋ ฅ
377
+ md_state = gr.State("")
378
+ chat_history = gr.State([])
379
 
380
  # ์—…๋กœ๋“œ & ๋ณ€ํ™˜
381
  with gr.Row():
382
  file = gr.File(
383
+ label="PDF/์ด๋ฏธ์ง€ ์—…๋กœ๋“œ",
384
  file_types=[".pdf", ".png", ".jpeg", ".jpg"],
385
  interactive=True
386
  )
387
  convert_btn = gr.Button("๋ณ€ํ™˜ํ•˜๊ธฐ")
388
 
389
+ # ์ˆจ๊น€ ์ปดํฌ๋„ŒํŠธ
390
  max_pages = gr.Slider(1, 20, 10, visible=False, elem_classes="invisible")
391
  layout_mode = gr.Dropdown(["layoutlmv3", "doclayout_yolo"], value="doclayout_yolo", visible=False, elem_classes="invisible")
392
  language = gr.Dropdown(all_lang, value='auto', visible=False, elem_classes="invisible")
 
394
  is_ocr = gr.Checkbox(value=False, visible=False, elem_classes="invisible")
395
  table_enable = gr.Checkbox(value=True, visible=False, elem_classes="invisible")
396
 
397
+ # ๋ณ€ํ™˜ ๋ฒ„ํŠผ โ†’ to_markdown + progress
398
  convert_btn.click(
399
  fn=to_markdown,
400
  inputs=[file, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
401
+ outputs=md_state,
402
+ show_progress=True # Gradio ์ž์ฒด ๋กœ๋”ฉ ์Šคํ”ผ๋„ˆ๋„ ํ‘œ์‹œ
403
  )
404
 
405
+ # Gemini Chat
406
  gr.Markdown("## Gemini 2.0 Flash (Thinking) Chat")
407
  chatbot = gr.Chatbot(height=600)
408
  with gr.Row():
409
  chat_input = gr.Textbox(lines=1, placeholder="์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”...")
410
  clear_button = gr.Button("๋Œ€ํ™” ์ดˆ๊ธฐํ™”")
411
 
412
+ # ํ”„๋กฌํ”„ํŠธ ์ž…๋ ฅ -> user_message -> stream_gemini_response
413
  chat_input.submit(
414
  fn=user_message,
415
  inputs=[chat_input, chat_history, md_state],
 
420
  outputs=chatbot
421
  )
422
 
423
+ # ์ดˆ๊ธฐํ™”: ์ƒํƒœ ๋ฆฌ์…‹ + ์ฑ—๋ด‡ ์ดˆ๊ธฐํ™”
424
  def clear_states():
425
  return [], ""
426
  clear_button.click(
 
434
  )
435
 
436
 
 
 
 
437
  if __name__ == "__main__":
438
  demo.launch(debug=True, server_name="0.0.0.0", server_port=7860)