openfree commited on
Commit
be51037
ยท
verified ยท
1 Parent(s): b9a59b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -163
app.py CHANGED
@@ -12,12 +12,13 @@ import re
12
  import uuid
13
  import pymupdf
14
 
15
- # ์›๋ž˜ ์ฝ”๋“œ์— ์žˆ๋˜ os.system() ํ˜ธ์ถœ๋“ค
16
  os.system('pip uninstall -y magic-pdf')
17
  os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
18
  os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
19
  os.system('python download_models_hf.py')
20
 
 
21
  with open('/home/user/magic-pdf.json', 'r') as file:
22
  data = json.load(file)
23
 
@@ -29,8 +30,8 @@ if os.getenv('apikey'):
29
  with open('/home/user/magic-pdf.json', 'w') as file:
30
  json.dump(data, file, indent=4)
31
 
 
32
  os.system('cp -r paddleocr /home/user/.paddleocr')
33
- # from gradio_pdf import PDF # PDF ๋ฏธ๋ฆฌ๋ณด๊ธฐ๋ฅผ ์œ„ํ•œ ์ปดํฌ๋„ŒํŠธ์ด์ง€๋งŒ, ์ง€๊ธˆ์€ ์ˆจ๊ธธ ์˜ˆ์ •
34
 
35
  import gradio as gr
36
  from loguru import logger
@@ -40,22 +41,31 @@ from magic_pdf.libs.hash_utils import compute_sha256
40
  from magic_pdf.tools.common import do_parse, prepare_env
41
 
42
  def create_css():
 
 
 
 
43
  return """
44
- /* ์ „์ฒด ์Šคํƒ€์ผ */
45
  .gradio-container {
 
 
 
 
 
46
  background: linear-gradient(135deg, #EFF6FF 0%, #F5F3FF 100%);
47
- max-width: 1200px !important;
48
- margin: 0 auto !important;
49
- padding: 2rem !important;
50
  }
51
- /* ์ œ๋ชฉ ์Šคํƒ€์ผ */
52
  .title-area {
53
  text-align: center;
54
- margin-bottom: 2rem;
55
  padding: 1rem;
56
  background: white;
57
  border-radius: 1rem;
58
  box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
 
59
  }
60
  .title-area h1 {
61
  background: linear-gradient(90deg, #2563EB 0%, #7C3AED 100%);
@@ -69,10 +79,14 @@ def create_css():
69
  color: #6B7280;
70
  font-size: 1.1rem;
71
  }
72
- /* ์ˆจ๊ธธ ์˜ˆ์ •์ธ ์ปดํฌ๋„ŒํŠธ ์Šคํƒ€์ผ */
73
  .invisible {
74
  display: none !important;
75
  }
 
 
 
 
76
  """
77
 
78
  def read_fn(path):
@@ -136,7 +150,7 @@ def replace_image_with_base64(markdown_text, image_dir_path):
136
 
137
  def to_pdf(file_path):
138
  """
139
- PDF๊ฐ€ ์•„๋‹Œ ๊ฒฝ์šฐ(์˜ˆ: PNG, JPG ํŒŒ์ผ)์—๋„ pymupdf๋ฅผ ์ด์šฉํ•˜์—ฌ PDF๋กœ ๋ณ€ํ™˜ํ•˜๊ธฐ ์œ„ํ•œ ํ•จ์ˆ˜.
140
  """
141
  with pymupdf.open(file_path) as f:
142
  if f.is_pdf:
@@ -151,8 +165,8 @@ def to_pdf(file_path):
151
 
152
  def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language):
153
  """
154
- ํŒŒ์ผ์„ ๋ฐ›์•„ ์ตœ๋Œ€ end_pages ํŽ˜์ด์ง€๊นŒ์ง€ ๋งˆํฌ๋‹ค์šด ์ถ”์ถœ ํ›„,
155
- base64 ์ด๋ฏธ์ง€๊ฐ€ ํฌํ•จ๋œ md_content๋ฅผ ๋ฐ˜ํ™˜.
156
  """
157
  file_path = to_pdf(file_path)
158
  if end_pages > 20:
@@ -171,10 +185,10 @@ def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table
171
  txt_content = f.read()
172
 
173
  md_content = replace_image_with_base64(txt_content, local_md_dir)
174
- # new_pdf_path = os.path.join(local_md_dir, file_name + "_layout.pdf") # ์›๋ž˜ pdf ๋ฏธ๋ฆฌ๋ณด๊ธฐ์šฉ
175
 
176
- return md_content # base64 ์ด๋ฏธ์ง€๊ฐ€ ํฌํ•จ๋œ ์ตœ์ข… ๋งˆํฌ๋‹ค์šด ํ…์ŠคํŠธ๋งŒ ๋ฐ˜ํ™˜
177
 
 
178
  latex_delimiters = [
179
  {"left": "$$", "right": "$$", "display": True},
180
  {"left": '$', "right": '$', "display": False}
@@ -182,7 +196,7 @@ latex_delimiters = [
182
 
183
  def init_model():
184
  """
185
- magic_pdf์˜ ๋ชจ๋ธ์„ ๋ฏธ๋ฆฌ ์ดˆ๊ธฐํ™”.
186
  """
187
  from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
188
  try:
@@ -223,29 +237,25 @@ all_lang.extend([*other_lang, *latin_lang, *arabic_lang, *cyrillic_lang, *devana
223
  ##############################
224
  # 2) Gemini LLM ์ฑ— ์ฝ”๋“œ
225
  ##############################
226
-
227
- # (์ค‘๋ณต import์ด์ง€๋งŒ "๋ˆ„๋ฝ ์—†์ด" ์ถœ๋ ฅํ•ด์•ผ ํ•˜๋ฏ€๋กœ ์ฃผ์„ ์ฒ˜๋ฆฌ)
228
- # import os
229
- # import gradio as gr
230
  from gradio import ChatMessage
231
  from typing import Iterator
232
- import google.generativeai as genai
233
  import time
234
 
235
- # get Gemini API Key from the environ variable
236
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
237
  genai.configure(api_key=GEMINI_API_KEY)
238
 
239
- # we will be using the Gemini 2.0 Flash model with Thinking capabilities
240
  model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")
241
 
242
  def format_chat_history(messages: list) -> list:
243
  """
244
- Formats the chat history into a structure Gemini can understand
245
  """
246
  formatted_history = []
247
  for message in messages:
248
- # Skip thinking messages (messages with metadata)
249
  if not (message.get("role") == "assistant" and "metadata" in message):
250
  formatted_history.append({
251
  "role": "user" if message.get("role") == "user" else "assistant",
@@ -255,11 +265,13 @@ def format_chat_history(messages: list) -> list:
255
 
256
  def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
257
  """
258
- Streams thoughts and response with conversation history support for text input only.
 
 
259
  """
260
  if not user_message.strip():
261
- messages.append(ChatMessage(role="assistant", content="Please provide a non-empty text message. Empty input is not allowed."))
262
- yield messages
263
  return
264
 
265
  try:
@@ -274,6 +286,7 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
274
  response_buffer = ""
275
  thinking_complete = False
276
 
 
277
  messages.append(
278
  ChatMessage(
279
  role="assistant",
@@ -281,225 +294,191 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
281
  metadata={"title": "โš™๏ธ Thinking: *The thoughts produced by the model are experimental"}
282
  )
283
  )
 
284
 
285
  for chunk in response:
286
  parts = chunk.candidates[0].content.parts
287
  current_chunk = parts[0].text
288
 
289
  if len(parts) == 2 and not thinking_complete:
290
- # Complete thought and start response
291
  thought_buffer += current_chunk
292
- print(f"\n=== Complete Thought ===\n{thought_buffer}")
293
-
294
  messages[-1] = ChatMessage(
295
  role="assistant",
296
  content=thought_buffer,
297
  metadata={"title": "โš™๏ธ Thinking: *The thoughts produced by the model are experimental"}
298
  )
299
- yield messages
300
 
301
  # Start response
302
  response_buffer = parts[1].text
303
- print(f"\n=== Starting Response ===\n{response_buffer}")
304
-
305
- messages.append(
306
- ChatMessage(
307
- role="assistant",
308
- content=response_buffer
309
- )
310
- )
311
  thinking_complete = True
312
-
313
  elif thinking_complete:
 
314
  response_buffer += current_chunk
315
- print(f"\n=== Response Chunk ===\n{current_chunk}")
316
-
317
- messages[-1] = ChatMessage(
318
- role="assistant",
319
- content=response_buffer
320
- )
321
-
322
  else:
 
323
  thought_buffer += current_chunk
324
- print(f"\n=== Thinking Chunk ===\n{current_chunk}")
325
-
326
  messages[-1] = ChatMessage(
327
  role="assistant",
328
  content=thought_buffer,
329
  metadata={"title": "โš™๏ธ Thinking: *The thoughts produced by the model are experimental"}
330
  )
331
 
332
- # time.sleep(0.05) #Optional debugging delay
333
- yield messages
334
 
335
  print(f"\n=== Final Response ===\n{response_buffer}")
336
 
337
  except Exception as e:
338
  print(f"\n=== Error ===\n{str(e)}")
339
- messages.append(
340
- ChatMessage(
341
- role="assistant",
342
- content=f"I apologize, but I encountered an error: {str(e)}"
343
- )
344
- )
345
- yield messages
346
 
347
- def user_message(msg: str, history: list) -> tuple[str, list]:
348
- """Adds user message to chat history"""
349
- history.append(ChatMessage(role="user", content=msg))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
  return "", history
351
 
352
 
353
- ######################################################
354
  # 3) ํ†ตํ•ฉ Gradio ์•ฑ ๊ตฌ์„ฑ
355
- # - PDF ์—…๋กœ๋“œ๋งŒ ๋ณด์ด๊ฒŒ ํ•˜๊ณ (๋‚˜๋จธ์ง€๋Š” hidden)
356
- # - ์—…๋กœ๋“œ ํ›„ "๋ณ€ํ™˜" ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ, ๋งˆํฌ๋‹ค์šด์„ ๋งŒ๋“ค์–ด
357
- # Chatbot๊ณผ ๋Œ€ํ™”ํ•  ์ˆ˜ ์žˆ๋„๋ก ์ „๋‹ฌ
358
- ######################################################
359
- with gr.Blocks(title="ํ†ตํ•ฉ OCR & Gemini Chat", css=create_css(), theme=gr.themes.Soft(primary_hue="teal", secondary_hue="slate", neutral_hue="neutral")) as demo:
360
  gr.HTML("""
361
  <div class="title-area">
362
  <h1>OCR FLEX + Gemini Chat</h1>
363
- <p>PDF/์ด๋ฏธ์ง€ -> ํ…์ŠคํŠธ(๋งˆํฌ๋‹ค์šด) ๋ณ€ํ™˜ ํ›„, LLM Gemini์™€ ๋Œ€ํ™”</p>
364
  </div>
365
  """)
366
-
367
- # ๋‚ด๋ถ€ ์ƒํƒœ(๋งˆํฌ๋‹ค์šด ํ…์ŠคํŠธ)
368
- md_state = gr.State("")
369
- chat_history = gr.State([]) # Gemini ์ฑ— ๊ธฐ๋ก ์ƒํƒœ
370
 
371
- # 1) ํŒŒ์ผ ์—…๋กœ๋“œ UI
 
 
 
372
  with gr.Row():
373
  file = gr.File(
374
- label="PDF ๋˜๋Š” ์ด๋ฏธ์ง€ ํŒŒ์ผ ์—…๋กœ๋“œ",
375
  file_types=[".pdf", ".png", ".jpeg", ".jpg"],
376
  interactive=True
377
  )
378
- convert_btn = gr.Button(
379
- "๋ณ€ํ™˜",
380
- elem_classes="primary-button"
381
- )
382
 
383
- # 2) ์›๋ž˜ ์กด์žฌํ•˜๋˜ ์Šฌ๋ผ์ด๋”, ์ฒดํฌ๋ฐ•์Šค ๋“ฑ์€ ์ „๋ถ€ hidden
384
  max_pages = gr.Slider(
385
  1, 20, 10,
386
  step=1,
387
  label='์ตœ๋Œ€ ๋ณ€ํ™˜ ํŽ˜์ด์ง€ ์ˆ˜',
388
- elem_classes="invisible",
389
- visible=False
390
  )
391
  layout_mode = gr.Dropdown(
392
  ["layoutlmv3", "doclayout_yolo"],
393
  label="๋ ˆ์ด์•„์›ƒ ๋ชจ๋ธ",
394
  value="doclayout_yolo",
395
- elem_classes="invisible",
396
- visible=False
397
  )
398
  language = gr.Dropdown(
399
  all_lang,
400
  label="์–ธ์–ด",
401
  value='auto',
402
- elem_classes="invisible",
403
- visible=False
404
  )
405
  formula_enable = gr.Checkbox(
406
- label="์ˆ˜์‹ ์ธ์‹ ํ™œ์„ฑํ™”",
407
  value=True,
408
- elem_classes="invisible",
409
- visible=False
410
  )
411
  is_ocr = gr.Checkbox(
412
- label="OCR ๊ฐ•์ œ ํ™œ์„ฑํ™”",
413
  value=False,
414
- elem_classes="invisible",
415
- visible=False
416
  )
417
  table_enable = gr.Checkbox(
418
- label="ํ‘œ ์ธ์‹ ํ™œ์„ฑํ™”(ํ…Œ์ŠคํŠธ)",
419
  value=True,
420
- elem_classes="invisible",
421
- visible=False
422
  )
423
 
424
- # 3) ์ถœ๋ ฅ ๊ฒฐ๊ณผ(ํŒŒ์ผ, ๋งˆํฌ๋‹ค์šด ๋“ฑ)๏ฟฝ๏ฟฝ๏ฟฝ ์ˆจ๊น€
425
- # ํ•„์š”ํ•˜๋ฉด ์ฃผ์„ ํ•ด์ œํ•˜์—ฌ ํ™•์ธ ๊ฐ€๋Šฅ
426
- # output_file = gr.File(
427
- # label="๋ณ€ํ™˜ ๊ฒฐ๊ณผ",
428
- # interactive=False,
429
- # visible=False
430
- # )
431
- # md = gr.Markdown(
432
- # label="๋งˆํฌ๋‹ค์šด ๋ Œ๋”๋ง",
433
- # visible=False
434
- # )
435
- # md_text = gr.TextArea(
436
- # lines=45,
437
- # visible=False
438
- # )
439
- # pdf_show = PDF(
440
- # label='PDF ๋ฏธ๋ฆฌ๋ณด๊ธฐ',
441
- # interactive=False,
442
- # visible=False,
443
- # height=800
444
- # )
445
-
446
- # 4) ํŒŒ์ผ ์—…๋กœ๋“œ -> '๋ณ€ํ™˜' ๋ฒ„ํŠผ ํด๋ฆญ์‹œ ๋™์ž‘:
447
- # to_markdown ํ•จ์ˆ˜๋ฅผ ํ†ตํ•ด md_state์— ๋งˆํฌ๋‹ค์šด ์ €์žฅ
448
  convert_btn.click(
449
  fn=to_markdown,
450
  inputs=[file, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
451
  outputs=md_state
452
  )
453
 
454
- # ==========================
455
- # Gemini Chat ๋ถ€๋ถ„
456
- # ==========================
457
- gr.Markdown("## Gemini 2.0 Flash (With Thinking) Chat")
458
 
459
  chatbot = gr.Chatbot(
460
- label="Gemini2.0 Chatbot (Streaming Output)",
461
- render_markdown=True,
462
- height=400
463
  )
464
-
465
  with gr.Row():
466
- chat_input = gr.Textbox(
467
- lines=1,
468
- label="์งˆ๋ฌธ ์ž…๋ ฅ",
469
- placeholder="์ถ”์ถœ๋œ ๋ฌธ์„œ(๋งˆํฌ๋‹ค์šด ๋‚ด์šฉ)์— ๋Œ€ํ•ด ๊ถ๊ธˆํ•œ ์ ์„ ๋ฌผ์–ด๋ณด์„ธ์š”..."
470
- )
471
  clear_button = gr.Button("๋Œ€ํ™” ์ดˆ๊ธฐํ™”")
472
 
473
- # ์‚ฌ์šฉ์ž๊ฐ€ ์งˆ๋ฌธ -> user_message -> Gemini ์ฒ˜๋ฆฌ -> stream_gemini_response
474
- def user_message_wrapper(msg, history, doc_text):
475
- """
476
- ์‚ฌ์šฉ์ž๊ฐ€ ์ž…๋ ฅํ•  ๋•Œ๋งˆ๋‹ค, doc_text(๋งˆํฌ๋‹ค์šด)๋ฅผ ์ฐธ๊ณ ํ•˜๋„๋ก
477
- ์งˆ๋ฌธ์„ ์•ฝ๊ฐ„ ๋ณ€ํ˜•ํ•ด์„œ history์— ์ถ”๊ฐ€ํ•˜๋Š” ๋ฐฉ์‹(๊ฐ„๋‹จ ์˜ˆ์‹œ).
478
- """
479
- if not doc_text:
480
- # ์•„์ง ๋ณ€ํ™˜๋œ ๋ฌธ์„œ๊ฐ€ ์—†๋‹ค๋ฉด ๊ทธ๋ƒฅ ์งˆ๋ฌธ
481
- user_query = msg
482
- else:
483
- # ๋ฌธ์„œ ๋‚ด์šฉ(doc_text)์„ "์ฐธ๊ณ " ์š”์ฒญํ•˜๋Š” ๊ฐ„๋‹จ ํ”„๋กฌํ”„ํŠธ ์˜ˆ์‹œ
484
- user_query = f"๋‹ค์Œ ๋ฌธ์„œ๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ๋‹ต๋ณ€:\n\n{doc_text}\n\n์งˆ๋ฌธ: {msg}"
485
-
486
- history.append(ChatMessage(role="user", content=user_query))
487
- return "", history
488
-
489
  chat_input.submit(
490
- fn=user_message_wrapper,
491
  inputs=[chat_input, chat_history, md_state],
492
  outputs=[chat_input, chat_history]
493
  ).then(
494
  fn=stream_gemini_response,
495
  inputs=[chat_input, chat_history],
496
- outputs=chat_history
497
- ).then(
498
- fn=lambda h: h,
499
- inputs=chat_history,
500
  outputs=chatbot
501
  )
502
 
 
503
  clear_button.click(
504
  fn=lambda: ([], ""),
505
  inputs=[],
@@ -515,14 +494,4 @@ with gr.Blocks(title="ํ†ตํ•ฉ OCR & Gemini Chat", css=create_css(), theme=gr.them
515
  # 4) ์‹ค์ œ ์‹คํ–‰
516
  ##############################
517
  if __name__ == "__main__":
518
- # ์ฒซ ๋ฒˆ์งธ demo.launch() - ํ†ตํ•ฉ ์•ฑ ์‹คํ–‰
519
- demo.launch(ssr_mode=True, debug=True)
520
-
521
-
522
- ###############################################
523
- # ์•„๋ž˜๋Š” "Gemini ์ฑ— ์ฝ”๋“œ" ์›๋ณธ์— ์žˆ๋˜
524
- # ๋ณ„๋„์˜ demo.launch() ๋ถ€๋ถ„ (๋ˆ„๋ฝ ์—†์ด ์ฃผ์„ ๋ณด์กด)
525
- ###############################################
526
- # if __name__ == "__main__":
527
- # demo.launch(debug=True)
528
-
 
12
  import uuid
13
  import pymupdf
14
 
15
+ # (์›๋ž˜ ์„ค์น˜ & ์„ธํŒ… ์ฝ”๋“œ)
16
  os.system('pip uninstall -y magic-pdf')
17
  os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
18
  os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
19
  os.system('python download_models_hf.py')
20
 
21
+ # magic-pdf.json ์ˆ˜์ •
22
  with open('/home/user/magic-pdf.json', 'r') as file:
23
  data = json.load(file)
24
 
 
30
  with open('/home/user/magic-pdf.json', 'w') as file:
31
  json.dump(data, file, indent=4)
32
 
33
+ # (OCR ํด๋” ๋ณต์‚ฌ)
34
  os.system('cp -r paddleocr /home/user/.paddleocr')
 
35
 
36
  import gradio as gr
37
  from loguru import logger
 
41
  from magic_pdf.tools.common import do_parse, prepare_env
42
 
43
  def create_css():
44
+ """
45
+ ํ™”๋ฉด์„ ๋„“๊ณ  ๊ฝ‰ ์ฐจ๊ฒŒ ์ฑ„์šฐ๋„๋ก ์„ค์ •.
46
+ ํ•„์š”ํ•˜๋ฉด ๋ฐฐ๊ฒฝ์ƒ‰/ํฐํŠธ ์ˆ˜์ • ๊ฐ€๋Šฅ
47
+ """
48
  return """
49
+ /* ์ „์ฒด ์ปจํ…Œ์ด๋„ˆ: ํ™”๋ฉด ๊ฐ€๋“ ์ฑ„์šฐ๊ธฐ */
50
  .gradio-container {
51
+ width: 100vw !important;
52
+ height: 100vh !important;
53
+ margin: 0 !important;
54
+ padding: 0 !important;
55
+ /* ๋ฐฐ๊ฒฝ ๊ทธ๋ผ๋””์–ธํŠธ ์˜ˆ์‹œ */
56
  background: linear-gradient(135deg, #EFF6FF 0%, #F5F3FF 100%);
57
+ display: flex;
58
+ flex-direction: column;
 
59
  }
60
+ /* ํƒ€์ดํ‹€ ์˜์—ญ */
61
  .title-area {
62
  text-align: center;
63
+ margin: 1rem auto;
64
  padding: 1rem;
65
  background: white;
66
  border-radius: 1rem;
67
  box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
68
+ max-width: 800px;
69
  }
70
  .title-area h1 {
71
  background: linear-gradient(90deg, #2563EB 0%, #7C3AED 100%);
 
79
  color: #6B7280;
80
  font-size: 1.1rem;
81
  }
82
+ /* ์ˆจ๊ธฐ๊ณ  ์‹ถ์€ ์ปดํฌ๋„ŒํŠธ */
83
  .invisible {
84
  display: none !important;
85
  }
86
+ /* ๊ธฐ๋ณธ ํŒจ๋”ฉ ์ค„์ด๊ธฐ */
87
+ .gr-block, .gr-box {
88
+ padding: 0.5rem !important;
89
+ }
90
  """
91
 
92
  def read_fn(path):
 
150
 
151
  def to_pdf(file_path):
152
  """
153
+ PDF๊ฐ€ ์•„๋‹Œ ๊ฒฝ์šฐ(์˜ˆ: PNG, JPG ํŒŒ์ผ)์—๋„ pymupdf๋ฅผ ์ด์šฉํ•˜์—ฌ PDF๋กœ ๋ณ€ํ™˜
154
  """
155
  with pymupdf.open(file_path) as f:
156
  if f.is_pdf:
 
165
 
166
  def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language):
167
  """
168
+ ํŒŒ์ผ(์ด๋ฏธ์ง€/PDF)์„ ๋ฐ›์•„ ์ตœ๋Œ€ end_pages ํŽ˜์ด์ง€๊นŒ์ง€ ๋งˆํฌ๋‹ค์šด ์ถ”์ถœ ํ›„,
169
+ base64 ์ด๋ฏธ์ง€๊ฐ€ ํฌํ•จ๋œ ์ตœ์ข… md_content ๋ฐ˜ํ™˜
170
  """
171
  file_path = to_pdf(file_path)
172
  if end_pages > 20:
 
185
  txt_content = f.read()
186
 
187
  md_content = replace_image_with_base64(txt_content, local_md_dir)
 
188
 
189
+ return md_content # ์ตœ์ข… ๋งˆํฌ๋‹ค์šด ํ…์ŠคํŠธ
190
 
191
+ # latex ์ˆ˜์‹ ๊ตฌ๋ถ„์ž
192
  latex_delimiters = [
193
  {"left": "$$", "right": "$$", "display": True},
194
  {"left": '$', "right": '$', "display": False}
 
196
 
197
  def init_model():
198
  """
199
+ magic_pdf์˜ ๋ชจ๋ธ(๋ ˆ์ด์•„์›ƒ, OCR ๋“ฑ)์„ ๋ฏธ๋ฆฌ ์ดˆ๊ธฐํ™”
200
  """
201
  from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
202
  try:
 
237
  ##############################
238
  # 2) Gemini LLM ์ฑ— ์ฝ”๋“œ
239
  ##############################
240
+ # (์ค‘๋ณต import์ด์ง€๋งŒ "๋ˆ„๋ฝ ์—†์ด" ์ถœ๋ ฅํ•ด์•ผ ํ•˜๋ฏ€๋กœ ์ฃผ์„ ์ฒ˜๋ฆฌ ์•ˆํ•จ)
241
+ import google.generativeai as genai
 
 
242
  from gradio import ChatMessage
243
  from typing import Iterator
 
244
  import time
245
 
246
+ # Gemini API Key ์„ธํŒ…
247
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
248
  genai.configure(api_key=GEMINI_API_KEY)
249
 
250
+ # Gemini 2.0 Flash (Thinking)
251
  model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")
252
 
253
  def format_chat_history(messages: list) -> list:
254
  """
255
+ Gemini๊ฐ€ ์ดํ•ดํ•  ์ˆ˜ ์žˆ๋Š” ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜
256
  """
257
  formatted_history = []
258
  for message in messages:
 
259
  if not (message.get("role") == "assistant" and "metadata" in message):
260
  formatted_history.append({
261
  "role": "user" if message.get("role") == "user" else "assistant",
 
265
 
266
  def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
267
  """
268
+ Gemini ์‘๋‹ต์„ ์ŠคํŠธ๋ฆฌ๋ฐ.
269
+ ChatMessage ํ˜•์‹์˜ messages๋ฅผ ๋ฐ›์•„ ์ตœ์ข…์ ์œผ๋กœ Gradio๊ฐ€
270
+ ์ธ์‹ํ•  ์ˆ˜ ์žˆ๋Š” (user, assistant) ํŠœํ”Œ ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜ํ•ด์„œ yield
271
  """
272
  if not user_message.strip():
273
+ messages.append(ChatMessage(role="assistant", content="Please provide a non-empty text message."))
274
+ yield convert_chat_messages_to_gradio_format(messages)
275
  return
276
 
277
  try:
 
286
  response_buffer = ""
287
  thinking_complete = False
288
 
289
+ # ์šฐ์„  "Thinking" ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€
290
  messages.append(
291
  ChatMessage(
292
  role="assistant",
 
294
  metadata={"title": "โš™๏ธ Thinking: *The thoughts produced by the model are experimental"}
295
  )
296
  )
297
+ yield convert_chat_messages_to_gradio_format(messages)
298
 
299
  for chunk in response:
300
  parts = chunk.candidates[0].content.parts
301
  current_chunk = parts[0].text
302
 
303
  if len(parts) == 2 and not thinking_complete:
304
+ # Complete thought
305
  thought_buffer += current_chunk
 
 
306
  messages[-1] = ChatMessage(
307
  role="assistant",
308
  content=thought_buffer,
309
  metadata={"title": "โš™๏ธ Thinking: *The thoughts produced by the model are experimental"}
310
  )
311
+ yield convert_chat_messages_to_gradio_format(messages)
312
 
313
  # Start response
314
  response_buffer = parts[1].text
315
+ messages.append(ChatMessage(role="assistant", content=response_buffer))
 
 
 
 
 
 
 
316
  thinking_complete = True
 
317
  elif thinking_complete:
318
+ # Response ongoing
319
  response_buffer += current_chunk
320
+ messages[-1] = ChatMessage(role="assistant", content=response_buffer)
 
 
 
 
 
 
321
  else:
322
+ # Still in "thinking"
323
  thought_buffer += current_chunk
 
 
324
  messages[-1] = ChatMessage(
325
  role="assistant",
326
  content=thought_buffer,
327
  metadata={"title": "โš™๏ธ Thinking: *The thoughts produced by the model are experimental"}
328
  )
329
 
330
+ yield convert_chat_messages_to_gradio_format(messages)
 
331
 
332
  print(f"\n=== Final Response ===\n{response_buffer}")
333
 
334
  except Exception as e:
335
  print(f"\n=== Error ===\n{str(e)}")
336
+ messages.append(ChatMessage(role="assistant", content=f"I encountered an error: {str(e)}"))
337
+ yield convert_chat_messages_to_gradio_format(messages)
 
 
 
 
 
338
 
339
+ def convert_chat_messages_to_gradio_format(messages):
340
+ """
341
+ ChatMessage list -> Gradio Chatbot ํฌ๋งท( (user_str, assistant_str) ํŠœํ”Œ ๋ฆฌ์ŠคํŠธ )
342
+ """
343
+ gradio_chat = []
344
+ user_text, assistant_text = None, None
345
+
346
+ for msg in messages:
347
+ role = msg.get("role")
348
+ content = msg.get("content", "")
349
+ if role == "user":
350
+ # ์ด์ „ ํ„ด์ด ๋‚จ์•„์žˆ์œผ๋ฉด ์ถ”๊ฐ€
351
+ if user_text is not None or assistant_text is not None:
352
+ gradio_chat.append((user_text or "", assistant_text or ""))
353
+ user_text = content
354
+ assistant_text = None
355
+ else:
356
+ # assistant
357
+ if user_text is None:
358
+ user_text = ""
359
+ if assistant_text is None:
360
+ assistant_text = content
361
+ else:
362
+ assistant_text += content # ์ŠคํŠธ๋ฆฌ๋ฐ ์‹œ ์ด์–ด๋ถ™์ž„
363
+ # ๋งˆ์ง€๋ง‰ ํ„ด
364
+ if user_text is not None or assistant_text is not None:
365
+ gradio_chat.append((user_text or "", assistant_text or ""))
366
+
367
+ return gradio_chat
368
+
369
+ def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
370
+ """
371
+ ์‚ฌ์šฉ์ž๊ฐ€ ์ž…๋ ฅํ•  ๋•Œ๋งˆ๋‹ค doc_text (๋งˆํฌ๋‹ค์šด)๋ฅผ
372
+ '์ฐธ๊ณ ํ•˜๋ผ'๋Š” ํ”„๋กฌํ”„ํŠธ๋ฅผ ์ž๋™์œผ๋กœ ์ถ”๊ฐ€ (๊ฐ„๋‹จ ์˜ˆ์‹œ)
373
+ """
374
+ if not doc_text.strip():
375
+ # ๋ณ€ํ™˜๋œ ๋ฌธ์„œ๊ฐ€ ์—†์œผ๋ฉด ๊ทธ๋ƒฅ ์ž…๋ ฅ
376
+ user_query = msg
377
+ else:
378
+ # ๋ฌธ์„œ๋ฅผ ์ฐธ์กฐํ•ด ๋‹ฌ๋ผ๋Š” ์ง€์‹œ์–ด ์ถ”๊ฐ€
379
+ user_query = f"๋‹ค์Œ ๋ฌธ์„œ๋ฅผ ์ฐธ๊ณ ํ•ด์„œ ๋‹ต๋ณ€:\n\n{doc_text}\n\n์งˆ๋ฌธ: {msg}"
380
+
381
+ history.append(ChatMessage(role="user", content=user_query))
382
  return "", history
383
 
384
 
385
+ ##############################
386
  # 3) ํ†ตํ•ฉ Gradio ์•ฑ ๊ตฌ์„ฑ
387
+ # - ํ™”๋ฉด์„ ์ตœ๋Œ€ํ•œ ๋„“๊ฒŒ,
388
+ # PDF ์—…๋กœ๋“œ์™€ ๋ณ€ํ™˜ ๋ฒ„ํŠผ,
389
+ # Gemini ์ฑ„ํŒ…๋งŒ ๋ณด์ด๊ฒŒ
390
+ ##############################
391
+ with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
392
  gr.HTML("""
393
  <div class="title-area">
394
  <h1>OCR FLEX + Gemini Chat</h1>
395
+ <p>PDF/์ด๋ฏธ์ง€ -> ํ…์ŠคํŠธ(๋งˆํฌ๋‹ค์šด) ๋ณ€ํ™˜ ํ›„, Gemini LLM์— ์งˆ์˜์‘๋‹ต</p>
396
  </div>
397
  """)
 
 
 
 
398
 
399
+ md_state = gr.State("") # ๋ณ€ํ™˜๋œ ๋งˆํฌ๋‹ค์šด ํ…์ŠคํŠธ
400
+ chat_history = gr.State([]) # ChatMessage ๋ฆฌ์ŠคํŠธ (Gemini ๋Œ€ํ™” ์ด๋ ฅ)
401
+
402
+ # 1) ํŒŒ์ผ ์—…๋กœ๋“œ & ๋ณ€ํ™˜ ๋ฒ„ํŠผ
403
  with gr.Row():
404
  file = gr.File(
405
+ label="PDF / ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ",
406
  file_types=[".pdf", ".png", ".jpeg", ".jpg"],
407
  interactive=True
408
  )
409
+ convert_btn = gr.Button("๋ณ€ํ™˜ํ•˜๊ธฐ")
 
 
 
410
 
411
+ # 2) ์ˆจ๊ธด ์ปดํฌ๋„ŒํŠธ๋“ค
412
  max_pages = gr.Slider(
413
  1, 20, 10,
414
  step=1,
415
  label='์ตœ๋Œ€ ๋ณ€ํ™˜ ํŽ˜์ด์ง€ ์ˆ˜',
416
+ visible=False, # ์ˆจ๊น€
417
+ elem_classes="invisible"
418
  )
419
  layout_mode = gr.Dropdown(
420
  ["layoutlmv3", "doclayout_yolo"],
421
  label="๋ ˆ์ด์•„์›ƒ ๋ชจ๋ธ",
422
  value="doclayout_yolo",
423
+ visible=False,
424
+ elem_classes="invisible"
425
  )
426
  language = gr.Dropdown(
427
  all_lang,
428
  label="์–ธ์–ด",
429
  value='auto',
430
+ visible=False,
431
+ elem_classes="invisible"
432
  )
433
  formula_enable = gr.Checkbox(
434
+ label="์ˆ˜์‹ ์ธ์‹",
435
  value=True,
436
+ visible=False,
437
+ elem_classes="invisible"
438
  )
439
  is_ocr = gr.Checkbox(
440
+ label="OCR ๊ฐ•์ œ",
441
  value=False,
442
+ visible=False,
443
+ elem_classes="invisible"
444
  )
445
  table_enable = gr.Checkbox(
446
+ label="ํ‘œ ์ธ์‹",
447
  value=True,
448
+ visible=False,
449
+ elem_classes="invisible"
450
  )
451
 
452
+ # convert_btn ๋ˆ„๋ฅด๋ฉด ๋งˆํฌ๋‹ค์šด ์ถ”์ถœ -> md_state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
  convert_btn.click(
454
  fn=to_markdown,
455
  inputs=[file, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
456
  outputs=md_state
457
  )
458
 
459
+ # 3) Gemini Chat ์˜์—ญ
460
+ gr.Markdown("## Gemini 2.0 Flash (Thinking) Chat")
 
 
461
 
462
  chatbot = gr.Chatbot(
463
+ label="Gemini2.0 Chatbot (Streaming)",
464
+ height=600
 
465
  )
 
466
  with gr.Row():
467
+ chat_input = gr.Textbox(lines=1, placeholder="์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”...")
 
 
 
 
468
  clear_button = gr.Button("๋Œ€ํ™” ์ดˆ๊ธฐํ™”")
469
 
470
+ # ์‚ฌ์šฉ์ž๊ฐ€ ์งˆ๋ฌธํ•˜๋ฉด -> user_message -> Gemini ์‘๋‹ต
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  chat_input.submit(
472
+ fn=user_message,
473
  inputs=[chat_input, chat_history, md_state],
474
  outputs=[chat_input, chat_history]
475
  ).then(
476
  fn=stream_gemini_response,
477
  inputs=[chat_input, chat_history],
 
 
 
 
478
  outputs=chatbot
479
  )
480
 
481
+ # ์ดˆ๊ธฐํ™” ๋ฒ„ํŠผ -> ๋Œ€ํ™” ๊ธฐ๋ก, md_state ๋ชจ๋‘ ๋น„์šฐ๊ณ  -> chatbot๋„ ์ดˆ๊ธฐํ™”
482
  clear_button.click(
483
  fn=lambda: ([], ""),
484
  inputs=[],
 
494
  # 4) ์‹ค์ œ ์‹คํ–‰
495
  ##############################
496
  if __name__ == "__main__":
497
+ demo.launch(debug=True, server_name="0.0.0.0", server_port=7860)