Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -20,7 +20,7 @@ from PIL import Image
|
|
20 |
|
21 |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
22 |
|
23 |
-
# -----------------------
|
24 |
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
25 |
"Qwen/Qwen2.5-VL-7B-Instruct",
|
26 |
torch_dtype=torch.bfloat16,
|
@@ -29,7 +29,7 @@ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
29 |
)
|
30 |
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
|
31 |
|
32 |
-
# ----------------------- Pydantic
|
33 |
class GeneralRetrievalQuery(BaseModel):
|
34 |
broad_topical_query: str
|
35 |
broad_topical_explanation: str
|
@@ -84,7 +84,7 @@ Generate the queries based on this image and provide the response in the specifi
|
|
84 |
|
85 |
prompt, pydantic_model = get_retrieval_prompt("general")
|
86 |
|
87 |
-
# -----------------------
|
88 |
def _prep_data_for_input(image):
|
89 |
messages = [
|
90 |
{
|
@@ -107,32 +107,31 @@ def _prep_data_for_input(image):
|
|
107 |
return_tensors="pt",
|
108 |
)
|
109 |
|
110 |
-
# -----------------------
|
111 |
def format_output(data: dict, output_format: str) -> str:
|
112 |
"""
|
113 |
-
|
114 |
-
output_format: "JSON", "Markdown", "Table"
|
115 |
"""
|
116 |
if output_format == "JSON":
|
117 |
-
|
|
|
118 |
elif output_format == "Markdown":
|
119 |
-
# ๊ฐ ํญ๋ชฉ์ Markdown ๋ฌธ๋จ ํ์์ผ๋ก ์ถ๋ ฅ
|
120 |
md_lines = []
|
121 |
for key, value in data.items():
|
122 |
md_lines.append(f"**{key.replace('_', ' ').title()}:** {value}")
|
123 |
return "\n\n".join(md_lines)
|
124 |
elif output_format == "Table":
|
125 |
-
# ๊ฐ๋จํ Markdown ํ ํ์์ผ๋ก ๋ณํ
|
126 |
headers = ["Field", "Content"]
|
127 |
-
separator = "|".join(["---"] * len(headers))
|
128 |
-
rows = [f"| {' | '.join(headers)} |", f"|{separator}|"]
|
129 |
for key, value in data.items():
|
130 |
rows.append(f"| {key.replace('_', ' ').title()} | {value} |")
|
131 |
return "\n".join(rows)
|
132 |
else:
|
133 |
-
return json.dumps(data, indent=2, ensure_ascii=False)
|
134 |
|
135 |
-
# -----------------------
|
136 |
@spaces.GPU
|
137 |
def generate_response(image, output_format: str = "JSON"):
|
138 |
inputs = _prep_data_for_input(image)
|
@@ -158,17 +157,18 @@ def generate_response(image, output_format: str = "JSON"):
|
|
158 |
gr.Warning("Failed to parse JSON from output")
|
159 |
return output_text
|
160 |
|
161 |
-
# -----------------------
|
162 |
title = "Elegant ColPali Query Generator using Qwen2.5-VL"
|
163 |
-
description = """**ColPali
|
164 |
-
|
165 |
|
166 |
-
|
167 |
-
- **
|
168 |
-
- **
|
|
|
169 |
|
170 |
-
|
171 |
-
|
172 |
"""
|
173 |
|
174 |
examples = [
|
@@ -176,7 +176,7 @@ examples = [
|
|
176 |
"examples/SRCCL_Technical-Summary.pdf_page_7.jpg",
|
177 |
]
|
178 |
|
179 |
-
# -----------------------
|
180 |
custom_css = """
|
181 |
body {
|
182 |
background: #f7f9fb;
|
@@ -219,7 +219,7 @@ footer {
|
|
219 |
}
|
220 |
"""
|
221 |
|
222 |
-
# ----------------------- Gradio
|
223 |
with gr.Blocks(css=custom_css, title=title) as demo:
|
224 |
with gr.Column(variant="panel"):
|
225 |
gr.Markdown(f"<header><h1>{title}</h1></header>")
|
@@ -231,7 +231,6 @@ with gr.Blocks(css=custom_css, title=title) as demo:
|
|
231 |
with gr.Row():
|
232 |
image_input = gr.Image(label="Upload Document Image", type="pil")
|
233 |
with gr.Row():
|
234 |
-
# ์ถ๋ ฅ ํ์ ์ ํ ์ต์
์ถ๊ฐ
|
235 |
output_format = gr.Radio(
|
236 |
choices=["JSON", "Markdown", "Table"],
|
237 |
value="JSON",
|
@@ -239,7 +238,8 @@ with gr.Blocks(css=custom_css, title=title) as demo:
|
|
239 |
info="Select the desired output format."
|
240 |
)
|
241 |
generate_button = gr.Button("Generate Query")
|
242 |
-
|
|
|
243 |
with gr.Accordion("Examples", open=False):
|
244 |
gr.Examples(
|
245 |
label="Query Examples",
|
|
|
20 |
|
21 |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
22 |
|
23 |
+
# ----------------------- Model and Processor Loading ----------------------- #
|
24 |
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
25 |
"Qwen/Qwen2.5-VL-7B-Instruct",
|
26 |
torch_dtype=torch.bfloat16,
|
|
|
29 |
)
|
30 |
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
|
31 |
|
32 |
+
# ----------------------- Pydantic Model Definition ----------------------- #
|
33 |
class GeneralRetrievalQuery(BaseModel):
|
34 |
broad_topical_query: str
|
35 |
broad_topical_explanation: str
|
|
|
84 |
|
85 |
prompt, pydantic_model = get_retrieval_prompt("general")
|
86 |
|
87 |
+
# ----------------------- Input Preprocessing ----------------------- #
|
88 |
def _prep_data_for_input(image):
|
89 |
messages = [
|
90 |
{
|
|
|
107 |
return_tensors="pt",
|
108 |
)
|
109 |
|
110 |
+
# ----------------------- Output Formatting ----------------------- #
|
111 |
def format_output(data: dict, output_format: str) -> str:
|
112 |
"""
|
113 |
+
Convert the JSON data into the desired output format.
|
114 |
+
output_format: "JSON", "Markdown", "Table"
|
115 |
"""
|
116 |
if output_format == "JSON":
|
117 |
+
# Wrap with code block for better display in Markdown view
|
118 |
+
return f"```json\n{json.dumps(data, indent=2, ensure_ascii=False)}\n```"
|
119 |
elif output_format == "Markdown":
|
|
|
120 |
md_lines = []
|
121 |
for key, value in data.items():
|
122 |
md_lines.append(f"**{key.replace('_', ' ').title()}:** {value}")
|
123 |
return "\n\n".join(md_lines)
|
124 |
elif output_format == "Table":
|
|
|
125 |
headers = ["Field", "Content"]
|
126 |
+
separator = " | ".join(["---"] * len(headers))
|
127 |
+
rows = [f"| {' | '.join(headers)} |", f"| {separator} |"]
|
128 |
for key, value in data.items():
|
129 |
rows.append(f"| {key.replace('_', ' ').title()} | {value} |")
|
130 |
return "\n".join(rows)
|
131 |
else:
|
132 |
+
return f"```json\n{json.dumps(data, indent=2, ensure_ascii=False)}\n```"
|
133 |
|
134 |
+
# ----------------------- Response Generation ----------------------- #
|
135 |
@spaces.GPU
|
136 |
def generate_response(image, output_format: str = "JSON"):
|
137 |
inputs = _prep_data_for_input(image)
|
|
|
157 |
gr.Warning("Failed to parse JSON from output")
|
158 |
return output_text
|
159 |
|
160 |
+
# ----------------------- Interface Title and Description (in English) ----------------------- #
|
161 |
title = "Elegant ColPali Query Generator using Qwen2.5-VL"
|
162 |
+
description = """**ColPali** is a multimodal approach optimized for document retrieval.
|
163 |
+
This interface uses the [Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) model to generate relevant retrieval queries based on a document image.
|
164 |
|
165 |
+
The queries include:
|
166 |
+
- **Broad Topical Query:** Covers the main subject of the document.
|
167 |
+
- **Specific Detail Query:** Focuses on a particular fact, figure, or point from the document.
|
168 |
+
- **Visual Element Query:** References a visual component (e.g., chart, graph) from the document.
|
169 |
|
170 |
+
Refer to the examples below to generate queries suitable for your document image.
|
171 |
+
For more information, please see the associated blog post.
|
172 |
"""
|
173 |
|
174 |
examples = [
|
|
|
176 |
"examples/SRCCL_Technical-Summary.pdf_page_7.jpg",
|
177 |
]
|
178 |
|
179 |
+
# ----------------------- Custom CSS ----------------------- #
|
180 |
custom_css = """
|
181 |
body {
|
182 |
background: #f7f9fb;
|
|
|
219 |
}
|
220 |
"""
|
221 |
|
222 |
+
# ----------------------- Gradio Interface ----------------------- #
|
223 |
with gr.Blocks(css=custom_css, title=title) as demo:
|
224 |
with gr.Column(variant="panel"):
|
225 |
gr.Markdown(f"<header><h1>{title}</h1></header>")
|
|
|
231 |
with gr.Row():
|
232 |
image_input = gr.Image(label="Upload Document Image", type="pil")
|
233 |
with gr.Row():
|
|
|
234 |
output_format = gr.Radio(
|
235 |
choices=["JSON", "Markdown", "Table"],
|
236 |
value="JSON",
|
|
|
238 |
info="Select the desired output format."
|
239 |
)
|
240 |
generate_button = gr.Button("Generate Query")
|
241 |
+
# ์ถ๋ ฅ ์ปดํฌ๋ํธ๋ฅผ gr.Markdown์ผ๋ก ๋ณ๊ฒฝํ์ฌ Markdown ๋ฐ Table ํ์์ด ์ ๋๋ก ๋ ๋๋ง๋๋๋ก ํจ.
|
242 |
+
output_text = gr.Markdown(label="Generated Query")
|
243 |
with gr.Accordion("Examples", open=False):
|
244 |
gr.Examples(
|
245 |
label="Query Examples",
|