Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -12,12 +12,13 @@ import re
|
|
12 |
import uuid
|
13 |
import pymupdf
|
14 |
|
15 |
-
# ์๋
|
16 |
os.system('pip uninstall -y magic-pdf')
|
17 |
os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
|
18 |
os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
|
19 |
os.system('python download_models_hf.py')
|
20 |
|
|
|
21 |
with open('/home/user/magic-pdf.json', 'r') as file:
|
22 |
data = json.load(file)
|
23 |
|
@@ -29,8 +30,8 @@ if os.getenv('apikey'):
|
|
29 |
with open('/home/user/magic-pdf.json', 'w') as file:
|
30 |
json.dump(data, file, indent=4)
|
31 |
|
|
|
32 |
os.system('cp -r paddleocr /home/user/.paddleocr')
|
33 |
-
# from gradio_pdf import PDF # PDF ๋ฏธ๋ฆฌ๋ณด๊ธฐ๋ฅผ ์ํ ์ปดํฌ๋ํธ์ด์ง๋ง, ์ง๊ธ์ ์จ๊ธธ ์์
|
34 |
|
35 |
import gradio as gr
|
36 |
from loguru import logger
|
@@ -40,22 +41,31 @@ from magic_pdf.libs.hash_utils import compute_sha256
|
|
40 |
from magic_pdf.tools.common import do_parse, prepare_env
|
41 |
|
42 |
def create_css():
|
|
|
|
|
|
|
|
|
43 |
return """
|
44 |
-
/* ์ ์ฒด
|
45 |
.gradio-container {
|
|
|
|
|
|
|
|
|
|
|
46 |
background: linear-gradient(135deg, #EFF6FF 0%, #F5F3FF 100%);
|
47 |
-
|
48 |
-
|
49 |
-
padding: 2rem !important;
|
50 |
}
|
51 |
-
/*
|
52 |
.title-area {
|
53 |
text-align: center;
|
54 |
-
margin
|
55 |
padding: 1rem;
|
56 |
background: white;
|
57 |
border-radius: 1rem;
|
58 |
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
|
|
|
59 |
}
|
60 |
.title-area h1 {
|
61 |
background: linear-gradient(90deg, #2563EB 0%, #7C3AED 100%);
|
@@ -69,10 +79,14 @@ def create_css():
|
|
69 |
color: #6B7280;
|
70 |
font-size: 1.1rem;
|
71 |
}
|
72 |
-
/*
|
73 |
.invisible {
|
74 |
display: none !important;
|
75 |
}
|
|
|
|
|
|
|
|
|
76 |
"""
|
77 |
|
78 |
def read_fn(path):
|
@@ -136,7 +150,7 @@ def replace_image_with_base64(markdown_text, image_dir_path):
|
|
136 |
|
137 |
def to_pdf(file_path):
|
138 |
"""
|
139 |
-
PDF๊ฐ ์๋ ๊ฒฝ์ฐ(์: PNG, JPG ํ์ผ)์๋ pymupdf๋ฅผ ์ด์ฉํ์ฌ PDF๋ก
|
140 |
"""
|
141 |
with pymupdf.open(file_path) as f:
|
142 |
if f.is_pdf:
|
@@ -151,8 +165,8 @@ def to_pdf(file_path):
|
|
151 |
|
152 |
def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language):
|
153 |
"""
|
154 |
-
|
155 |
-
base64 ์ด๋ฏธ์ง๊ฐ ํฌํจ๋ md_content
|
156 |
"""
|
157 |
file_path = to_pdf(file_path)
|
158 |
if end_pages > 20:
|
@@ -171,10 +185,10 @@ def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table
|
|
171 |
txt_content = f.read()
|
172 |
|
173 |
md_content = replace_image_with_base64(txt_content, local_md_dir)
|
174 |
-
# new_pdf_path = os.path.join(local_md_dir, file_name + "_layout.pdf") # ์๋ pdf ๋ฏธ๋ฆฌ๋ณด๊ธฐ์ฉ
|
175 |
|
176 |
-
return md_content #
|
177 |
|
|
|
178 |
latex_delimiters = [
|
179 |
{"left": "$$", "right": "$$", "display": True},
|
180 |
{"left": '$', "right": '$', "display": False}
|
@@ -182,7 +196,7 @@ latex_delimiters = [
|
|
182 |
|
183 |
def init_model():
|
184 |
"""
|
185 |
-
magic_pdf์
|
186 |
"""
|
187 |
from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
|
188 |
try:
|
@@ -223,29 +237,25 @@ all_lang.extend([*other_lang, *latin_lang, *arabic_lang, *cyrillic_lang, *devana
|
|
223 |
##############################
|
224 |
# 2) Gemini LLM ์ฑ ์ฝ๋
|
225 |
##############################
|
226 |
-
|
227 |
-
|
228 |
-
# import os
|
229 |
-
# import gradio as gr
|
230 |
from gradio import ChatMessage
|
231 |
from typing import Iterator
|
232 |
-
import google.generativeai as genai
|
233 |
import time
|
234 |
|
235 |
-
#
|
236 |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
237 |
genai.configure(api_key=GEMINI_API_KEY)
|
238 |
|
239 |
-
#
|
240 |
model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")
|
241 |
|
242 |
def format_chat_history(messages: list) -> list:
|
243 |
"""
|
244 |
-
|
245 |
"""
|
246 |
formatted_history = []
|
247 |
for message in messages:
|
248 |
-
# Skip thinking messages (messages with metadata)
|
249 |
if not (message.get("role") == "assistant" and "metadata" in message):
|
250 |
formatted_history.append({
|
251 |
"role": "user" if message.get("role") == "user" else "assistant",
|
@@ -255,11 +265,13 @@ def format_chat_history(messages: list) -> list:
|
|
255 |
|
256 |
def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
|
257 |
"""
|
258 |
-
|
|
|
|
|
259 |
"""
|
260 |
if not user_message.strip():
|
261 |
-
messages.append(ChatMessage(role="assistant", content="Please provide a non-empty text message.
|
262 |
-
yield messages
|
263 |
return
|
264 |
|
265 |
try:
|
@@ -274,6 +286,7 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
|
|
274 |
response_buffer = ""
|
275 |
thinking_complete = False
|
276 |
|
|
|
277 |
messages.append(
|
278 |
ChatMessage(
|
279 |
role="assistant",
|
@@ -281,225 +294,191 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
|
|
281 |
metadata={"title": "โ๏ธ Thinking: *The thoughts produced by the model are experimental"}
|
282 |
)
|
283 |
)
|
|
|
284 |
|
285 |
for chunk in response:
|
286 |
parts = chunk.candidates[0].content.parts
|
287 |
current_chunk = parts[0].text
|
288 |
|
289 |
if len(parts) == 2 and not thinking_complete:
|
290 |
-
# Complete thought
|
291 |
thought_buffer += current_chunk
|
292 |
-
print(f"\n=== Complete Thought ===\n{thought_buffer}")
|
293 |
-
|
294 |
messages[-1] = ChatMessage(
|
295 |
role="assistant",
|
296 |
content=thought_buffer,
|
297 |
metadata={"title": "โ๏ธ Thinking: *The thoughts produced by the model are experimental"}
|
298 |
)
|
299 |
-
yield messages
|
300 |
|
301 |
# Start response
|
302 |
response_buffer = parts[1].text
|
303 |
-
|
304 |
-
|
305 |
-
messages.append(
|
306 |
-
ChatMessage(
|
307 |
-
role="assistant",
|
308 |
-
content=response_buffer
|
309 |
-
)
|
310 |
-
)
|
311 |
thinking_complete = True
|
312 |
-
|
313 |
elif thinking_complete:
|
|
|
314 |
response_buffer += current_chunk
|
315 |
-
|
316 |
-
|
317 |
-
messages[-1] = ChatMessage(
|
318 |
-
role="assistant",
|
319 |
-
content=response_buffer
|
320 |
-
)
|
321 |
-
|
322 |
else:
|
|
|
323 |
thought_buffer += current_chunk
|
324 |
-
print(f"\n=== Thinking Chunk ===\n{current_chunk}")
|
325 |
-
|
326 |
messages[-1] = ChatMessage(
|
327 |
role="assistant",
|
328 |
content=thought_buffer,
|
329 |
metadata={"title": "โ๏ธ Thinking: *The thoughts produced by the model are experimental"}
|
330 |
)
|
331 |
|
332 |
-
|
333 |
-
yield messages
|
334 |
|
335 |
print(f"\n=== Final Response ===\n{response_buffer}")
|
336 |
|
337 |
except Exception as e:
|
338 |
print(f"\n=== Error ===\n{str(e)}")
|
339 |
-
messages.append(
|
340 |
-
|
341 |
-
role="assistant",
|
342 |
-
content=f"I apologize, but I encountered an error: {str(e)}"
|
343 |
-
)
|
344 |
-
)
|
345 |
-
yield messages
|
346 |
|
347 |
-
def
|
348 |
-
"""
|
349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
return "", history
|
351 |
|
352 |
|
353 |
-
|
354 |
# 3) ํตํฉ Gradio ์ฑ ๊ตฌ์ฑ
|
355 |
-
#
|
356 |
-
#
|
357 |
-
#
|
358 |
-
|
359 |
-
with gr.Blocks(title="
|
360 |
gr.HTML("""
|
361 |
<div class="title-area">
|
362 |
<h1>OCR FLEX + Gemini Chat</h1>
|
363 |
-
<p>PDF/์ด๋ฏธ์ง -> ํ
์คํธ(๋งํฌ๋ค์ด) ๋ณํ ํ, LLM
|
364 |
</div>
|
365 |
""")
|
366 |
-
|
367 |
-
# ๋ด๋ถ ์ํ(๋งํฌ๋ค์ด ํ
์คํธ)
|
368 |
-
md_state = gr.State("")
|
369 |
-
chat_history = gr.State([]) # Gemini ์ฑ ๊ธฐ๋ก ์ํ
|
370 |
|
371 |
-
|
|
|
|
|
|
|
372 |
with gr.Row():
|
373 |
file = gr.File(
|
374 |
-
label="PDF
|
375 |
file_types=[".pdf", ".png", ".jpeg", ".jpg"],
|
376 |
interactive=True
|
377 |
)
|
378 |
-
convert_btn = gr.Button(
|
379 |
-
"๋ณํ",
|
380 |
-
elem_classes="primary-button"
|
381 |
-
)
|
382 |
|
383 |
-
# 2)
|
384 |
max_pages = gr.Slider(
|
385 |
1, 20, 10,
|
386 |
step=1,
|
387 |
label='์ต๋ ๋ณํ ํ์ด์ง ์',
|
388 |
-
|
389 |
-
|
390 |
)
|
391 |
layout_mode = gr.Dropdown(
|
392 |
["layoutlmv3", "doclayout_yolo"],
|
393 |
label="๋ ์ด์์ ๋ชจ๋ธ",
|
394 |
value="doclayout_yolo",
|
395 |
-
|
396 |
-
|
397 |
)
|
398 |
language = gr.Dropdown(
|
399 |
all_lang,
|
400 |
label="์ธ์ด",
|
401 |
value='auto',
|
402 |
-
|
403 |
-
|
404 |
)
|
405 |
formula_enable = gr.Checkbox(
|
406 |
-
label="์์ ์ธ์
|
407 |
value=True,
|
408 |
-
|
409 |
-
|
410 |
)
|
411 |
is_ocr = gr.Checkbox(
|
412 |
-
label="OCR ๊ฐ์
|
413 |
value=False,
|
414 |
-
|
415 |
-
|
416 |
)
|
417 |
table_enable = gr.Checkbox(
|
418 |
-
label="ํ ์ธ์
|
419 |
value=True,
|
420 |
-
|
421 |
-
|
422 |
)
|
423 |
|
424 |
-
#
|
425 |
-
# ํ์ํ๋ฉด ์ฃผ์ ํด์ ํ์ฌ ํ์ธ ๊ฐ๋ฅ
|
426 |
-
# output_file = gr.File(
|
427 |
-
# label="๋ณํ ๊ฒฐ๊ณผ",
|
428 |
-
# interactive=False,
|
429 |
-
# visible=False
|
430 |
-
# )
|
431 |
-
# md = gr.Markdown(
|
432 |
-
# label="๋งํฌ๋ค์ด ๋ ๋๋ง",
|
433 |
-
# visible=False
|
434 |
-
# )
|
435 |
-
# md_text = gr.TextArea(
|
436 |
-
# lines=45,
|
437 |
-
# visible=False
|
438 |
-
# )
|
439 |
-
# pdf_show = PDF(
|
440 |
-
# label='PDF ๋ฏธ๋ฆฌ๋ณด๊ธฐ',
|
441 |
-
# interactive=False,
|
442 |
-
# visible=False,
|
443 |
-
# height=800
|
444 |
-
# )
|
445 |
-
|
446 |
-
# 4) ํ์ผ ์
๋ก๋ -> '๋ณํ' ๋ฒํผ ํด๋ฆญ์ ๋์:
|
447 |
-
# to_markdown ํจ์๋ฅผ ํตํด md_state์ ๋งํฌ๋ค์ด ์ ์ฅ
|
448 |
convert_btn.click(
|
449 |
fn=to_markdown,
|
450 |
inputs=[file, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
|
451 |
outputs=md_state
|
452 |
)
|
453 |
|
454 |
-
#
|
455 |
-
|
456 |
-
# ==========================
|
457 |
-
gr.Markdown("## Gemini 2.0 Flash (With Thinking) Chat")
|
458 |
|
459 |
chatbot = gr.Chatbot(
|
460 |
-
label="Gemini2.0 Chatbot (Streaming
|
461 |
-
|
462 |
-
height=400
|
463 |
)
|
464 |
-
|
465 |
with gr.Row():
|
466 |
-
chat_input = gr.Textbox(
|
467 |
-
lines=1,
|
468 |
-
label="์ง๋ฌธ ์
๋ ฅ",
|
469 |
-
placeholder="์ถ์ถ๋ ๋ฌธ์(๋งํฌ๋ค์ด ๋ด์ฉ)์ ๋ํด ๊ถ๊ธํ ์ ์ ๋ฌผ์ด๋ณด์ธ์..."
|
470 |
-
)
|
471 |
clear_button = gr.Button("๋ํ ์ด๊ธฐํ")
|
472 |
|
473 |
-
# ์ฌ์ฉ์๊ฐ
|
474 |
-
def user_message_wrapper(msg, history, doc_text):
|
475 |
-
"""
|
476 |
-
์ฌ์ฉ์๊ฐ ์
๋ ฅํ ๋๋ง๋ค, doc_text(๋งํฌ๋ค์ด)๋ฅผ ์ฐธ๊ณ ํ๋๋ก
|
477 |
-
์ง๋ฌธ์ ์ฝ๊ฐ ๋ณํํด์ history์ ์ถ๊ฐํ๋ ๋ฐฉ์(๊ฐ๋จ ์์).
|
478 |
-
"""
|
479 |
-
if not doc_text:
|
480 |
-
# ์์ง ๋ณํ๋ ๋ฌธ์๊ฐ ์๋ค๋ฉด ๊ทธ๋ฅ ์ง๋ฌธ
|
481 |
-
user_query = msg
|
482 |
-
else:
|
483 |
-
# ๋ฌธ์ ๋ด์ฉ(doc_text)์ "์ฐธ๊ณ " ์์ฒญํ๋ ๊ฐ๋จ ํ๋กฌํํธ ์์
|
484 |
-
user_query = f"๋ค์ ๋ฌธ์๋ฅผ ์ฐธ๊ณ ํ์ฌ ๋ต๋ณ:\n\n{doc_text}\n\n์ง๋ฌธ: {msg}"
|
485 |
-
|
486 |
-
history.append(ChatMessage(role="user", content=user_query))
|
487 |
-
return "", history
|
488 |
-
|
489 |
chat_input.submit(
|
490 |
-
fn=
|
491 |
inputs=[chat_input, chat_history, md_state],
|
492 |
outputs=[chat_input, chat_history]
|
493 |
).then(
|
494 |
fn=stream_gemini_response,
|
495 |
inputs=[chat_input, chat_history],
|
496 |
-
outputs=chat_history
|
497 |
-
).then(
|
498 |
-
fn=lambda h: h,
|
499 |
-
inputs=chat_history,
|
500 |
outputs=chatbot
|
501 |
)
|
502 |
|
|
|
503 |
clear_button.click(
|
504 |
fn=lambda: ([], ""),
|
505 |
inputs=[],
|
@@ -515,14 +494,4 @@ with gr.Blocks(title="ํตํฉ OCR & Gemini Chat", css=create_css(), theme=gr.them
|
|
515 |
# 4) ์ค์ ์คํ
|
516 |
##############################
|
517 |
if __name__ == "__main__":
|
518 |
-
|
519 |
-
demo.launch(ssr_mode=True, debug=True)
|
520 |
-
|
521 |
-
|
522 |
-
###############################################
|
523 |
-
# ์๋๋ "Gemini ์ฑ ์ฝ๋" ์๋ณธ์ ์๋
|
524 |
-
# ๋ณ๋์ demo.launch() ๋ถ๋ถ (๋๋ฝ ์์ด ์ฃผ์ ๋ณด์กด)
|
525 |
-
###############################################
|
526 |
-
# if __name__ == "__main__":
|
527 |
-
# demo.launch(debug=True)
|
528 |
-
|
|
|
12 |
import uuid
|
13 |
import pymupdf
|
14 |
|
15 |
+
# (์๋ ์ค์น & ์ธํ
์ฝ๋)
|
16 |
os.system('pip uninstall -y magic-pdf')
|
17 |
os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
|
18 |
os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
|
19 |
os.system('python download_models_hf.py')
|
20 |
|
21 |
+
# magic-pdf.json ์์
|
22 |
with open('/home/user/magic-pdf.json', 'r') as file:
|
23 |
data = json.load(file)
|
24 |
|
|
|
30 |
with open('/home/user/magic-pdf.json', 'w') as file:
|
31 |
json.dump(data, file, indent=4)
|
32 |
|
33 |
+
# (OCR ํด๋ ๋ณต์ฌ)
|
34 |
os.system('cp -r paddleocr /home/user/.paddleocr')
|
|
|
35 |
|
36 |
import gradio as gr
|
37 |
from loguru import logger
|
|
|
41 |
from magic_pdf.tools.common import do_parse, prepare_env
|
42 |
|
43 |
def create_css():
|
44 |
+
"""
|
45 |
+
ํ๋ฉด์ ๋๊ณ ๊ฝ ์ฐจ๊ฒ ์ฑ์ฐ๋๋ก ์ค์ .
|
46 |
+
ํ์ํ๋ฉด ๋ฐฐ๊ฒฝ์/ํฐํธ ์์ ๊ฐ๋ฅ
|
47 |
+
"""
|
48 |
return """
|
49 |
+
/* ์ ์ฒด ์ปจํ
์ด๋: ํ๋ฉด ๊ฐ๋ ์ฑ์ฐ๊ธฐ */
|
50 |
.gradio-container {
|
51 |
+
width: 100vw !important;
|
52 |
+
height: 100vh !important;
|
53 |
+
margin: 0 !important;
|
54 |
+
padding: 0 !important;
|
55 |
+
/* ๋ฐฐ๊ฒฝ ๊ทธ๋ผ๋์ธํธ ์์ */
|
56 |
background: linear-gradient(135deg, #EFF6FF 0%, #F5F3FF 100%);
|
57 |
+
display: flex;
|
58 |
+
flex-direction: column;
|
|
|
59 |
}
|
60 |
+
/* ํ์ดํ ์์ญ */
|
61 |
.title-area {
|
62 |
text-align: center;
|
63 |
+
margin: 1rem auto;
|
64 |
padding: 1rem;
|
65 |
background: white;
|
66 |
border-radius: 1rem;
|
67 |
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
|
68 |
+
max-width: 800px;
|
69 |
}
|
70 |
.title-area h1 {
|
71 |
background: linear-gradient(90deg, #2563EB 0%, #7C3AED 100%);
|
|
|
79 |
color: #6B7280;
|
80 |
font-size: 1.1rem;
|
81 |
}
|
82 |
+
/* ์จ๊ธฐ๊ณ ์ถ์ ์ปดํฌ๋ํธ */
|
83 |
.invisible {
|
84 |
display: none !important;
|
85 |
}
|
86 |
+
/* ๊ธฐ๋ณธ ํจ๋ฉ ์ค์ด๊ธฐ */
|
87 |
+
.gr-block, .gr-box {
|
88 |
+
padding: 0.5rem !important;
|
89 |
+
}
|
90 |
"""
|
91 |
|
92 |
def read_fn(path):
|
|
|
150 |
|
151 |
def to_pdf(file_path):
|
152 |
"""
|
153 |
+
PDF๊ฐ ์๋ ๊ฒฝ์ฐ(์: PNG, JPG ํ์ผ)์๋ pymupdf๋ฅผ ์ด์ฉํ์ฌ PDF๋ก ๋ณํ
|
154 |
"""
|
155 |
with pymupdf.open(file_path) as f:
|
156 |
if f.is_pdf:
|
|
|
165 |
|
166 |
def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language):
|
167 |
"""
|
168 |
+
ํ์ผ(์ด๋ฏธ์ง/PDF)์ ๋ฐ์ ์ต๋ end_pages ํ์ด์ง๊น์ง ๋งํฌ๋ค์ด ์ถ์ถ ํ,
|
169 |
+
base64 ์ด๋ฏธ์ง๊ฐ ํฌํจ๋ ์ต์ข
md_content ๋ฐํ
|
170 |
"""
|
171 |
file_path = to_pdf(file_path)
|
172 |
if end_pages > 20:
|
|
|
185 |
txt_content = f.read()
|
186 |
|
187 |
md_content = replace_image_with_base64(txt_content, local_md_dir)
|
|
|
188 |
|
189 |
+
return md_content # ์ต์ข
๋งํฌ๋ค์ด ํ
์คํธ
|
190 |
|
191 |
+
# latex ์์ ๊ตฌ๋ถ์
|
192 |
latex_delimiters = [
|
193 |
{"left": "$$", "right": "$$", "display": True},
|
194 |
{"left": '$', "right": '$', "display": False}
|
|
|
196 |
|
197 |
def init_model():
|
198 |
"""
|
199 |
+
magic_pdf์ ๋ชจ๋ธ(๋ ์ด์์, OCR ๋ฑ)์ ๋ฏธ๋ฆฌ ์ด๊ธฐํ
|
200 |
"""
|
201 |
from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
|
202 |
try:
|
|
|
237 |
##############################
|
238 |
# 2) Gemini LLM ์ฑ ์ฝ๋
|
239 |
##############################
|
240 |
+
# (์ค๋ณต import์ด์ง๋ง "๋๋ฝ ์์ด" ์ถ๋ ฅํด์ผ ํ๋ฏ๋ก ์ฃผ์ ์ฒ๋ฆฌ ์ํจ)
|
241 |
+
import google.generativeai as genai
|
|
|
|
|
242 |
from gradio import ChatMessage
|
243 |
from typing import Iterator
|
|
|
244 |
import time
|
245 |
|
246 |
+
# Gemini API Key ์ธํ
|
247 |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
248 |
genai.configure(api_key=GEMINI_API_KEY)
|
249 |
|
250 |
+
# Gemini 2.0 Flash (Thinking)
|
251 |
model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")
|
252 |
|
253 |
def format_chat_history(messages: list) -> list:
|
254 |
"""
|
255 |
+
Gemini๊ฐ ์ดํดํ ์ ์๋ ํ์์ผ๋ก ๋ณํ
|
256 |
"""
|
257 |
formatted_history = []
|
258 |
for message in messages:
|
|
|
259 |
if not (message.get("role") == "assistant" and "metadata" in message):
|
260 |
formatted_history.append({
|
261 |
"role": "user" if message.get("role") == "user" else "assistant",
|
|
|
265 |
|
266 |
def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
|
267 |
"""
|
268 |
+
Gemini ์๋ต์ ์คํธ๋ฆฌ๋ฐ.
|
269 |
+
ChatMessage ํ์์ messages๋ฅผ ๋ฐ์ ์ต์ข
์ ์ผ๋ก Gradio๊ฐ
|
270 |
+
์ธ์ํ ์ ์๋ (user, assistant) ํํ ๋ฆฌ์คํธ๋ก ๋ณํํด์ yield
|
271 |
"""
|
272 |
if not user_message.strip():
|
273 |
+
messages.append(ChatMessage(role="assistant", content="Please provide a non-empty text message."))
|
274 |
+
yield convert_chat_messages_to_gradio_format(messages)
|
275 |
return
|
276 |
|
277 |
try:
|
|
|
286 |
response_buffer = ""
|
287 |
thinking_complete = False
|
288 |
|
289 |
+
# ์ฐ์ "Thinking" ๋ฉ์์ง ์ถ๊ฐ
|
290 |
messages.append(
|
291 |
ChatMessage(
|
292 |
role="assistant",
|
|
|
294 |
metadata={"title": "โ๏ธ Thinking: *The thoughts produced by the model are experimental"}
|
295 |
)
|
296 |
)
|
297 |
+
yield convert_chat_messages_to_gradio_format(messages)
|
298 |
|
299 |
for chunk in response:
|
300 |
parts = chunk.candidates[0].content.parts
|
301 |
current_chunk = parts[0].text
|
302 |
|
303 |
if len(parts) == 2 and not thinking_complete:
|
304 |
+
# Complete thought
|
305 |
thought_buffer += current_chunk
|
|
|
|
|
306 |
messages[-1] = ChatMessage(
|
307 |
role="assistant",
|
308 |
content=thought_buffer,
|
309 |
metadata={"title": "โ๏ธ Thinking: *The thoughts produced by the model are experimental"}
|
310 |
)
|
311 |
+
yield convert_chat_messages_to_gradio_format(messages)
|
312 |
|
313 |
# Start response
|
314 |
response_buffer = parts[1].text
|
315 |
+
messages.append(ChatMessage(role="assistant", content=response_buffer))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
316 |
thinking_complete = True
|
|
|
317 |
elif thinking_complete:
|
318 |
+
# Response ongoing
|
319 |
response_buffer += current_chunk
|
320 |
+
messages[-1] = ChatMessage(role="assistant", content=response_buffer)
|
|
|
|
|
|
|
|
|
|
|
|
|
321 |
else:
|
322 |
+
# Still in "thinking"
|
323 |
thought_buffer += current_chunk
|
|
|
|
|
324 |
messages[-1] = ChatMessage(
|
325 |
role="assistant",
|
326 |
content=thought_buffer,
|
327 |
metadata={"title": "โ๏ธ Thinking: *The thoughts produced by the model are experimental"}
|
328 |
)
|
329 |
|
330 |
+
yield convert_chat_messages_to_gradio_format(messages)
|
|
|
331 |
|
332 |
print(f"\n=== Final Response ===\n{response_buffer}")
|
333 |
|
334 |
except Exception as e:
|
335 |
print(f"\n=== Error ===\n{str(e)}")
|
336 |
+
messages.append(ChatMessage(role="assistant", content=f"I encountered an error: {str(e)}"))
|
337 |
+
yield convert_chat_messages_to_gradio_format(messages)
|
|
|
|
|
|
|
|
|
|
|
338 |
|
339 |
+
def convert_chat_messages_to_gradio_format(messages):
|
340 |
+
"""
|
341 |
+
ChatMessage list -> Gradio Chatbot ํฌ๋งท( (user_str, assistant_str) ํํ ๋ฆฌ์คํธ )
|
342 |
+
"""
|
343 |
+
gradio_chat = []
|
344 |
+
user_text, assistant_text = None, None
|
345 |
+
|
346 |
+
for msg in messages:
|
347 |
+
role = msg.get("role")
|
348 |
+
content = msg.get("content", "")
|
349 |
+
if role == "user":
|
350 |
+
# ์ด์ ํด์ด ๋จ์์์ผ๋ฉด ์ถ๊ฐ
|
351 |
+
if user_text is not None or assistant_text is not None:
|
352 |
+
gradio_chat.append((user_text or "", assistant_text or ""))
|
353 |
+
user_text = content
|
354 |
+
assistant_text = None
|
355 |
+
else:
|
356 |
+
# assistant
|
357 |
+
if user_text is None:
|
358 |
+
user_text = ""
|
359 |
+
if assistant_text is None:
|
360 |
+
assistant_text = content
|
361 |
+
else:
|
362 |
+
assistant_text += content # ์คํธ๋ฆฌ๋ฐ ์ ์ด์ด๋ถ์
|
363 |
+
# ๋ง์ง๋ง ํด
|
364 |
+
if user_text is not None or assistant_text is not None:
|
365 |
+
gradio_chat.append((user_text or "", assistant_text or ""))
|
366 |
+
|
367 |
+
return gradio_chat
|
368 |
+
|
369 |
+
def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
|
370 |
+
"""
|
371 |
+
์ฌ์ฉ์๊ฐ ์
๋ ฅํ ๋๋ง๋ค doc_text (๋งํฌ๋ค์ด)๋ฅผ
|
372 |
+
'์ฐธ๊ณ ํ๋ผ'๋ ํ๋กฌํํธ๋ฅผ ์๋์ผ๋ก ์ถ๊ฐ (๊ฐ๋จ ์์)
|
373 |
+
"""
|
374 |
+
if not doc_text.strip():
|
375 |
+
# ๋ณํ๋ ๋ฌธ์๊ฐ ์์ผ๋ฉด ๊ทธ๋ฅ ์
๋ ฅ
|
376 |
+
user_query = msg
|
377 |
+
else:
|
378 |
+
# ๋ฌธ์๋ฅผ ์ฐธ์กฐํด ๋ฌ๋ผ๋ ์ง์์ด ์ถ๊ฐ
|
379 |
+
user_query = f"๋ค์ ๋ฌธ์๋ฅผ ์ฐธ๊ณ ํด์ ๋ต๋ณ:\n\n{doc_text}\n\n์ง๋ฌธ: {msg}"
|
380 |
+
|
381 |
+
history.append(ChatMessage(role="user", content=user_query))
|
382 |
return "", history
|
383 |
|
384 |
|
385 |
+
##############################
|
386 |
# 3) ํตํฉ Gradio ์ฑ ๊ตฌ์ฑ
|
387 |
+
# - ํ๋ฉด์ ์ต๋ํ ๋๊ฒ,
|
388 |
+
# PDF ์
๋ก๋์ ๋ณํ ๋ฒํผ,
|
389 |
+
# Gemini ์ฑํ
๋ง ๋ณด์ด๊ฒ
|
390 |
+
##############################
|
391 |
+
with gr.Blocks(title="OCR FLEX + Gemini Chat", css=create_css()) as demo:
|
392 |
gr.HTML("""
|
393 |
<div class="title-area">
|
394 |
<h1>OCR FLEX + Gemini Chat</h1>
|
395 |
+
<p>PDF/์ด๋ฏธ์ง -> ํ
์คํธ(๋งํฌ๋ค์ด) ๋ณํ ํ, Gemini LLM์ ์ง์์๋ต</p>
|
396 |
</div>
|
397 |
""")
|
|
|
|
|
|
|
|
|
398 |
|
399 |
+
md_state = gr.State("") # ๋ณํ๋ ๋งํฌ๋ค์ด ํ
์คํธ
|
400 |
+
chat_history = gr.State([]) # ChatMessage ๋ฆฌ์คํธ (Gemini ๋ํ ์ด๋ ฅ)
|
401 |
+
|
402 |
+
# 1) ํ์ผ ์
๋ก๋ & ๋ณํ ๋ฒํผ
|
403 |
with gr.Row():
|
404 |
file = gr.File(
|
405 |
+
label="PDF / ์ด๋ฏธ์ง ์
๋ก๋",
|
406 |
file_types=[".pdf", ".png", ".jpeg", ".jpg"],
|
407 |
interactive=True
|
408 |
)
|
409 |
+
convert_btn = gr.Button("๋ณํํ๊ธฐ")
|
|
|
|
|
|
|
410 |
|
411 |
+
# 2) ์จ๊ธด ์ปดํฌ๋ํธ๋ค
|
412 |
max_pages = gr.Slider(
|
413 |
1, 20, 10,
|
414 |
step=1,
|
415 |
label='์ต๋ ๋ณํ ํ์ด์ง ์',
|
416 |
+
visible=False, # ์จ๊น
|
417 |
+
elem_classes="invisible"
|
418 |
)
|
419 |
layout_mode = gr.Dropdown(
|
420 |
["layoutlmv3", "doclayout_yolo"],
|
421 |
label="๋ ์ด์์ ๋ชจ๋ธ",
|
422 |
value="doclayout_yolo",
|
423 |
+
visible=False,
|
424 |
+
elem_classes="invisible"
|
425 |
)
|
426 |
language = gr.Dropdown(
|
427 |
all_lang,
|
428 |
label="์ธ์ด",
|
429 |
value='auto',
|
430 |
+
visible=False,
|
431 |
+
elem_classes="invisible"
|
432 |
)
|
433 |
formula_enable = gr.Checkbox(
|
434 |
+
label="์์ ์ธ์",
|
435 |
value=True,
|
436 |
+
visible=False,
|
437 |
+
elem_classes="invisible"
|
438 |
)
|
439 |
is_ocr = gr.Checkbox(
|
440 |
+
label="OCR ๊ฐ์ ",
|
441 |
value=False,
|
442 |
+
visible=False,
|
443 |
+
elem_classes="invisible"
|
444 |
)
|
445 |
table_enable = gr.Checkbox(
|
446 |
+
label="ํ ์ธ์",
|
447 |
value=True,
|
448 |
+
visible=False,
|
449 |
+
elem_classes="invisible"
|
450 |
)
|
451 |
|
452 |
+
# convert_btn ๋๋ฅด๋ฉด ๋งํฌ๋ค์ด ์ถ์ถ -> md_state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
453 |
convert_btn.click(
|
454 |
fn=to_markdown,
|
455 |
inputs=[file, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
|
456 |
outputs=md_state
|
457 |
)
|
458 |
|
459 |
+
# 3) Gemini Chat ์์ญ
|
460 |
+
gr.Markdown("## Gemini 2.0 Flash (Thinking) Chat")
|
|
|
|
|
461 |
|
462 |
chatbot = gr.Chatbot(
|
463 |
+
label="Gemini2.0 Chatbot (Streaming)",
|
464 |
+
height=600
|
|
|
465 |
)
|
|
|
466 |
with gr.Row():
|
467 |
+
chat_input = gr.Textbox(lines=1, placeholder="์ง๋ฌธ์ ์
๋ ฅํ์ธ์...")
|
|
|
|
|
|
|
|
|
468 |
clear_button = gr.Button("๋ํ ์ด๊ธฐํ")
|
469 |
|
470 |
+
# ์ฌ์ฉ์๊ฐ ์ง๋ฌธํ๋ฉด -> user_message -> Gemini ์๋ต
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
471 |
chat_input.submit(
|
472 |
+
fn=user_message,
|
473 |
inputs=[chat_input, chat_history, md_state],
|
474 |
outputs=[chat_input, chat_history]
|
475 |
).then(
|
476 |
fn=stream_gemini_response,
|
477 |
inputs=[chat_input, chat_history],
|
|
|
|
|
|
|
|
|
478 |
outputs=chatbot
|
479 |
)
|
480 |
|
481 |
+
# ์ด๊ธฐํ ๋ฒํผ -> ๋ํ ๊ธฐ๋ก, md_state ๋ชจ๋ ๋น์ฐ๊ณ -> chatbot๋ ์ด๊ธฐํ
|
482 |
clear_button.click(
|
483 |
fn=lambda: ([], ""),
|
484 |
inputs=[],
|
|
|
494 |
# 4) ์ค์ ์คํ
|
495 |
##############################
|
496 |
if __name__ == "__main__":
|
497 |
+
demo.launch(debug=True, server_name="0.0.0.0", server_port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|