ginipick commited on
Commit
832e4f3
ยท
verified ยท
1 Parent(s): 45b6f79

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -25
app.py CHANGED
@@ -20,7 +20,7 @@ from PIL import Image
20
 
21
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
22
 
23
- # ----------------------- ๋ชจ๋ธ ๋ฐ ํ”„๋กœ์„ธ์„œ ๋กœ๋“œ ----------------------- #
24
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
25
  "Qwen/Qwen2.5-VL-7B-Instruct",
26
  torch_dtype=torch.bfloat16,
@@ -29,7 +29,7 @@ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
29
  )
30
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
31
 
32
- # ----------------------- Pydantic ๋ชจ๋ธ ์ •์˜ ----------------------- #
33
  class GeneralRetrievalQuery(BaseModel):
34
  broad_topical_query: str
35
  broad_topical_explanation: str
@@ -84,7 +84,7 @@ Generate the queries based on this image and provide the response in the specifi
84
 
85
  prompt, pydantic_model = get_retrieval_prompt("general")
86
 
87
- # ----------------------- ์ž…๋ ฅ ๋ฐ์ดํ„ฐ ์ „์ฒ˜๋ฆฌ ----------------------- #
88
  def _prep_data_for_input(image):
89
  messages = [
90
  {
@@ -107,32 +107,31 @@ def _prep_data_for_input(image):
107
  return_tensors="pt",
108
  )
109
 
110
- # ----------------------- ์ถœ๋ ฅ ํ˜•์‹ ๋ณ€ํ™˜ ํ•จ์ˆ˜ ----------------------- #
111
  def format_output(data: dict, output_format: str) -> str:
112
  """
113
- data: ํŒŒ์‹ฑ๋œ JSON ๋”•์…”๋„ˆ๋ฆฌ
114
- output_format: "JSON", "Markdown", "Table" ์ค‘ ํ•˜๋‚˜
115
  """
116
  if output_format == "JSON":
117
- return json.dumps(data, indent=2, ensure_ascii=False)
 
118
  elif output_format == "Markdown":
119
- # ๊ฐ ํ•ญ๋ชฉ์„ Markdown ๋ฌธ๋‹จ ํ˜•์‹์œผ๋กœ ์ถœ๋ ฅ
120
  md_lines = []
121
  for key, value in data.items():
122
  md_lines.append(f"**{key.replace('_', ' ').title()}:** {value}")
123
  return "\n\n".join(md_lines)
124
  elif output_format == "Table":
125
- # ๊ฐ„๋‹จํ•œ Markdown ํ‘œ ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜
126
  headers = ["Field", "Content"]
127
- separator = "|".join(["---"] * len(headers))
128
- rows = [f"| {' | '.join(headers)} |", f"|{separator}|"]
129
  for key, value in data.items():
130
  rows.append(f"| {key.replace('_', ' ').title()} | {value} |")
131
  return "\n".join(rows)
132
  else:
133
- return json.dumps(data, indent=2, ensure_ascii=False)
134
 
135
- # ----------------------- ์‘๋‹ต ์ƒ์„ฑ ํ•จ์ˆ˜ ----------------------- #
136
  @spaces.GPU
137
  def generate_response(image, output_format: str = "JSON"):
138
  inputs = _prep_data_for_input(image)
@@ -158,17 +157,18 @@ def generate_response(image, output_format: str = "JSON"):
158
  gr.Warning("Failed to parse JSON from output")
159
  return output_text
160
 
161
- # ----------------------- ์ธํ„ฐํŽ˜์ด์Šค ์ œ๋ชฉ ๋ฐ ์„ค๋ช… ----------------------- #
162
  title = "Elegant ColPali Query Generator using Qwen2.5-VL"
163
- description = """**ColPali**๋Š” ๋ฌธ์„œ ๊ฒ€์ƒ‰์— ์ตœ์ ํ™”๋œ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ ‘๊ทผ๋ฒ•์ž…๋‹ˆ๋‹ค.
164
- ์ด ์ธํ„ฐํŽ˜์ด์Šค๋Š” [Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•˜์—ฌ, ๋ฌธ์„œ ์ด๋ฏธ์ง€๋กœ๋ถ€ํ„ฐ ๊ด€๋ จ ๊ฒ€์ƒ‰ ์ฟผ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
165
 
166
- - **Broad Topical Query:** ๋ฌธ์„œ์˜ ์ฃผ์š” ์ฃผ์ œ๋ฅผ ํฌ๊ด„ํ•˜๋Š” ์ฟผ๋ฆฌ
167
- - **Specific Detail Query:** ๋ฌธ์„œ ๋‚ด ํŠน์ • ์‚ฌ์‹ค์ด๋‚˜ ์ˆ˜์น˜๋ฅผ ํฌํ•จํ•œ ์ฟผ๋ฆฌ
168
- - **Visual Element Query:** ๋ฌธ์„œ์˜ ์‹œ๊ฐ์  ์š”์†Œ(์˜ˆ: ์ฐจํŠธ, ๊ทธ๋ž˜ํ”„ ๋“ฑ)๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ํ•œ ์ฟผ๋ฆฌ
 
169
 
170
- ์•„๋ž˜ ์˜ˆ์ œ๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ, ๋ฌธ์„œ ์ด๋ฏธ์ง€์— ์ ํ•ฉํ•œ ์ฟผ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•ด ๋ณด์„ธ์š”.
171
- ๋” ์ž์„ธํ•œ ์ •๋ณด๋Š” [๋ธ”๋กœ๊ทธ ํฌ์ŠคํŠธ](https://danielvanstrien.xyz/posts/post-with-code/colpali/2024-09-23-generate_colpali_dataset.html)๋ฅผ ์ฐธ์กฐํ•˜์„ธ์š”.
172
  """
173
 
174
  examples = [
@@ -176,7 +176,7 @@ examples = [
176
  "examples/SRCCL_Technical-Summary.pdf_page_7.jpg",
177
  ]
178
 
179
- # ----------------------- ์ปค์Šคํ…€ CSS ----------------------- #
180
  custom_css = """
181
  body {
182
  background: #f7f9fb;
@@ -219,7 +219,7 @@ footer {
219
  }
220
  """
221
 
222
- # ----------------------- Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ ----------------------- #
223
  with gr.Blocks(css=custom_css, title=title) as demo:
224
  with gr.Column(variant="panel"):
225
  gr.Markdown(f"<header><h1>{title}</h1></header>")
@@ -231,7 +231,6 @@ with gr.Blocks(css=custom_css, title=title) as demo:
231
  with gr.Row():
232
  image_input = gr.Image(label="Upload Document Image", type="pil")
233
  with gr.Row():
234
- # ์ถœ๋ ฅ ํ˜•์‹ ์„ ํƒ ์˜ต์…˜ ์ถ”๊ฐ€
235
  output_format = gr.Radio(
236
  choices=["JSON", "Markdown", "Table"],
237
  value="JSON",
@@ -239,7 +238,8 @@ with gr.Blocks(css=custom_css, title=title) as demo:
239
  info="Select the desired output format."
240
  )
241
  generate_button = gr.Button("Generate Query")
242
- output_text = gr.Textbox(label="Generated Query", lines=10)
 
243
  with gr.Accordion("Examples", open=False):
244
  gr.Examples(
245
  label="Query Examples",
 
20
 
21
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
22
 
23
+ # ----------------------- Model and Processor Loading ----------------------- #
24
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
25
  "Qwen/Qwen2.5-VL-7B-Instruct",
26
  torch_dtype=torch.bfloat16,
 
29
  )
30
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
31
 
32
+ # ----------------------- Pydantic Model Definition ----------------------- #
33
  class GeneralRetrievalQuery(BaseModel):
34
  broad_topical_query: str
35
  broad_topical_explanation: str
 
84
 
85
  prompt, pydantic_model = get_retrieval_prompt("general")
86
 
87
+ # ----------------------- Input Preprocessing ----------------------- #
88
  def _prep_data_for_input(image):
89
  messages = [
90
  {
 
107
  return_tensors="pt",
108
  )
109
 
110
+ # ----------------------- Output Formatting ----------------------- #
111
  def format_output(data: dict, output_format: str) -> str:
112
  """
113
+ Convert the JSON data into the desired output format.
114
+ output_format: "JSON", "Markdown", "Table"
115
  """
116
  if output_format == "JSON":
117
+ # Wrap with code block for better display in Markdown view
118
+ return f"```json\n{json.dumps(data, indent=2, ensure_ascii=False)}\n```"
119
  elif output_format == "Markdown":
 
120
  md_lines = []
121
  for key, value in data.items():
122
  md_lines.append(f"**{key.replace('_', ' ').title()}:** {value}")
123
  return "\n\n".join(md_lines)
124
  elif output_format == "Table":
 
125
  headers = ["Field", "Content"]
126
+ separator = " | ".join(["---"] * len(headers))
127
+ rows = [f"| {' | '.join(headers)} |", f"| {separator} |"]
128
  for key, value in data.items():
129
  rows.append(f"| {key.replace('_', ' ').title()} | {value} |")
130
  return "\n".join(rows)
131
  else:
132
+ return f"```json\n{json.dumps(data, indent=2, ensure_ascii=False)}\n```"
133
 
134
+ # ----------------------- Response Generation ----------------------- #
135
  @spaces.GPU
136
  def generate_response(image, output_format: str = "JSON"):
137
  inputs = _prep_data_for_input(image)
 
157
  gr.Warning("Failed to parse JSON from output")
158
  return output_text
159
 
160
+ # ----------------------- Interface Title and Description (in English) ----------------------- #
161
  title = "Elegant ColPali Query Generator using Qwen2.5-VL"
162
+ description = """**ColPali** is a multimodal approach optimized for document retrieval.
163
+ This interface uses the [Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) model to generate relevant retrieval queries based on a document image.
164
 
165
+ The queries include:
166
+ - **Broad Topical Query:** Covers the main subject of the document.
167
+ - **Specific Detail Query:** Focuses on a particular fact, figure, or point from the document.
168
+ - **Visual Element Query:** References a visual component (e.g., chart, graph) from the document.
169
 
170
+ Refer to the examples below to generate queries suitable for your document image.
171
+ For more information, please see the associated blog post.
172
  """
173
 
174
  examples = [
 
176
  "examples/SRCCL_Technical-Summary.pdf_page_7.jpg",
177
  ]
178
 
179
+ # ----------------------- Custom CSS ----------------------- #
180
  custom_css = """
181
  body {
182
  background: #f7f9fb;
 
219
  }
220
  """
221
 
222
+ # ----------------------- Gradio Interface ----------------------- #
223
  with gr.Blocks(css=custom_css, title=title) as demo:
224
  with gr.Column(variant="panel"):
225
  gr.Markdown(f"<header><h1>{title}</h1></header>")
 
231
  with gr.Row():
232
  image_input = gr.Image(label="Upload Document Image", type="pil")
233
  with gr.Row():
 
234
  output_format = gr.Radio(
235
  choices=["JSON", "Markdown", "Table"],
236
  value="JSON",
 
238
  info="Select the desired output format."
239
  )
240
  generate_button = gr.Button("Generate Query")
241
+ # ์ถœ๋ ฅ ์ปดํฌ๋„ŒํŠธ๋ฅผ gr.Markdown์œผ๋กœ ๋ณ€๊ฒฝํ•˜์—ฌ Markdown ๋ฐ Table ํ˜•์‹์ด ์ œ๋Œ€๋กœ ๋ Œ๋”๋ง๋˜๋„๋ก ํ•จ.
242
+ output_text = gr.Markdown(label="Generated Query")
243
  with gr.Accordion("Examples", open=False):
244
  gr.Examples(
245
  label="Query Examples",