ColPali-multi

Running on Zero

App Files Files Community

ginipick commited on Feb 3

Commit

832e4f3

verified ·

1 Parent(s): 45b6f79

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -25

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ from PIL import Image
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
-# ----------------------- 모델 및 프로세서 로드 ----------------------- #
 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     "Qwen/Qwen2.5-VL-7B-Instruct",
     torch_dtype=torch.bfloat16,
@@ -29,7 +29,7 @@ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
 )
 processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
-# ----------------------- Pydantic 모델 정의 ----------------------- #
 class GeneralRetrievalQuery(BaseModel):
     broad_topical_query: str
     broad_topical_explanation: str
@@ -84,7 +84,7 @@ Generate the queries based on this image and provide the response in the specifi
 prompt, pydantic_model = get_retrieval_prompt("general")
-# ----------------------- 입력 데이터 전처리 ----------------------- #
 def _prep_data_for_input(image):
     messages = [
         {
@@ -107,32 +107,31 @@ def _prep_data_for_input(image):
         return_tensors="pt",
     )
-# ----------------------- 출력 형식 변환 함수 ----------------------- #
 def format_output(data: dict, output_format: str) -> str:
     """
-    data: 파싱된 JSON 딕셔너리
-    output_format: "JSON", "Markdown", "Table" 중 하나
     """
     if output_format == "JSON":
-        return json.dumps(data, indent=2, ensure_ascii=False)
     elif output_format == "Markdown":
-        # 각 항목을 Markdown 문단 형식으로 출력
         md_lines = []
         for key, value in data.items():
             md_lines.append(f"**{key.replace('_', ' ').title()}:** {value}")
         return "\n\n".join(md_lines)
     elif output_format == "Table":
-        # 간단한 Markdown 표 형식으로 변환
         headers = ["Field", "Content"]
-        separator = "|".join(["---"] * len(headers))
-        rows = [f"| {' | '.join(headers)} |", f"|{separator}|"]
         for key, value in data.items():
             rows.append(f"| {key.replace('_', ' ').title()} | {value} |")
         return "\n".join(rows)
     else:
-        return json.dumps(data, indent=2, ensure_ascii=False)
-# ----------------------- 응답 생성 함수 ----------------------- #
 @spaces.GPU
 def generate_response(image, output_format: str = "JSON"):
     inputs = _prep_data_for_input(image)
@@ -158,17 +157,18 @@ def generate_response(image, output_format: str = "JSON"):
         gr.Warning("Failed to parse JSON from output")
         return output_text
-# ----------------------- 인터페이스 제목 및 설명 ----------------------- #
 title = "Elegant ColPali Query Generator using Qwen2.5-VL"
-description = """**ColPali**는 문서 검색에 최적화된 멀티모달 접근법입니다.
-이 인터페이스는 [Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) 모델을 사용하여, 문서 이미지로부터 관련 검색 쿼리를 생성합니다.
-- **Broad Topical Query:** 문서의 주요 주제를 포괄하는 쿼리
-- **Specific Detail Query:** 문서 내 특정 사실이나 수치를 포함한 쿼리
-- **Visual Element Query:** 문서의 시각적 요소(예: 차트, 그래프 등)를 기반으로 한 쿼리
-아래 예제를 참고하여, 문서 이미지에 적합한 쿼리를 생성해 보세요.
-더 자세한 정보는 [블로그 포스트](https://danielvanstrien.xyz/posts/post-with-code/colpali/2024-09-23-generate_colpali_dataset.html)를 참조하세요.
 """
 examples = [
@@ -176,7 +176,7 @@ examples = [
     "examples/SRCCL_Technical-Summary.pdf_page_7.jpg",
 ]
-# ----------------------- 커스텀 CSS ----------------------- #
 custom_css = """
 body {
     background: #f7f9fb;
@@ -219,7 +219,7 @@ footer {
 }
 """
-# ----------------------- Gradio 인터페이스 구성 ----------------------- #
 with gr.Blocks(css=custom_css, title=title) as demo:
     with gr.Column(variant="panel"):
         gr.Markdown(f"<header><h1>{title}</h1></header>")
@@ -231,7 +231,6 @@ with gr.Blocks(css=custom_css, title=title) as demo:
                 with gr.Row():
                     image_input = gr.Image(label="Upload Document Image", type="pil")
                 with gr.Row():
-                    # 출력 형식 선택 옵션 추가
                     output_format = gr.Radio(
                         choices=["JSON", "Markdown", "Table"],
                         value="JSON",
@@ -239,7 +238,8 @@ with gr.Blocks(css=custom_css, title=title) as demo:
                         info="Select the desired output format."
                     )
                 generate_button = gr.Button("Generate Query")
-                output_text = gr.Textbox(label="Generated Query", lines=10)
                 with gr.Accordion("Examples", open=False):
                     gr.Examples(
                         label="Query Examples",

 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
+# ----------------------- Model and Processor Loading ----------------------- #
 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     "Qwen/Qwen2.5-VL-7B-Instruct",
     torch_dtype=torch.bfloat16,
 )
 processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
+# ----------------------- Pydantic Model Definition ----------------------- #
 class GeneralRetrievalQuery(BaseModel):
     broad_topical_query: str
     broad_topical_explanation: str
 prompt, pydantic_model = get_retrieval_prompt("general")
+# ----------------------- Input Preprocessing ----------------------- #
 def _prep_data_for_input(image):
     messages = [
         {
         return_tensors="pt",
     )
+# ----------------------- Output Formatting ----------------------- #
 def format_output(data: dict, output_format: str) -> str:
     """
+    Convert the JSON data into the desired output format.
+    output_format: "JSON", "Markdown", "Table"
     """
     if output_format == "JSON":
+        # Wrap with code block for better display in Markdown view
+        return f"```json\n{json.dumps(data, indent=2, ensure_ascii=False)}\n```"
     elif output_format == "Markdown":
         md_lines = []
         for key, value in data.items():
             md_lines.append(f"**{key.replace('_', ' ').title()}:** {value}")
         return "\n\n".join(md_lines)
     elif output_format == "Table":
         headers = ["Field", "Content"]
+        separator = " | ".join(["---"] * len(headers))
+        rows = [f"| {' | '.join(headers)} |", f"| {separator} |"]
         for key, value in data.items():
             rows.append(f"| {key.replace('_', ' ').title()} | {value} |")
         return "\n".join(rows)
     else:
+        return f"```json\n{json.dumps(data, indent=2, ensure_ascii=False)}\n```"
+# ----------------------- Response Generation ----------------------- #
 @spaces.GPU
 def generate_response(image, output_format: str = "JSON"):
     inputs = _prep_data_for_input(image)
         gr.Warning("Failed to parse JSON from output")
         return output_text
+# ----------------------- Interface Title and Description (in English) ----------------------- #
 title = "Elegant ColPali Query Generator using Qwen2.5-VL"
+description = """**ColPali** is a multimodal approach optimized for document retrieval.
+This interface uses the [Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) model to generate relevant retrieval queries based on a document image.
+The queries include:
+- **Broad Topical Query:** Covers the main subject of the document.
+- **Specific Detail Query:** Focuses on a particular fact, figure, or point from the document.
+- **Visual Element Query:** References a visual component (e.g., chart, graph) from the document.
+Refer to the examples below to generate queries suitable for your document image.
+For more information, please see the associated blog post.
 """
 examples = [
     "examples/SRCCL_Technical-Summary.pdf_page_7.jpg",
 ]
+# ----------------------- Custom CSS ----------------------- #
 custom_css = """
 body {
     background: #f7f9fb;
 }
 """
+# ----------------------- Gradio Interface ----------------------- #
 with gr.Blocks(css=custom_css, title=title) as demo:
     with gr.Column(variant="panel"):
         gr.Markdown(f"<header><h1>{title}</h1></header>")
                 with gr.Row():
                     image_input = gr.Image(label="Upload Document Image", type="pil")
                 with gr.Row():
                     output_format = gr.Radio(
                         choices=["JSON", "Markdown", "Table"],
                         value="JSON",
                         info="Select the desired output format."
                     )
                 generate_button = gr.Button("Generate Query")
+                # 출력 컴포넌트를 gr.Markdown으로 변경하여 Markdown 및 Table 형식이 제대로 렌더링되도록 함.
+                output_text = gr.Markdown(label="Generated Query")
                 with gr.Accordion("Examples", open=False):
                     gr.Examples(
                         label="Query Examples",