Spaces:
Running
on
Zero
Running
on
Zero
improve interface logic with image editor and parse res
Browse files
app.py
CHANGED
@@ -24,11 +24,6 @@ model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True,
|
|
24 |
model = model.eval().cuda()
|
25 |
model.config.pad_token_id = tokenizer.eos_token_id
|
26 |
|
27 |
-
def image_to_base64(image):
|
28 |
-
buffered = io.BytesIO()
|
29 |
-
image.save(buffered, format="PNG")
|
30 |
-
return base64.b64encode(buffered.getvalue()).decode()
|
31 |
-
|
32 |
UPLOAD_FOLDER = "./uploads"
|
33 |
RESULTS_FOLDER = "./results"
|
34 |
|
@@ -36,6 +31,13 @@ for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
|
|
36 |
if not os.path.exists(folder):
|
37 |
os.makedirs(folder)
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
@spaces.GPU()
|
40 |
def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
|
41 |
if image is None:
|
@@ -45,9 +47,25 @@ def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
|
|
45 |
image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.png")
|
46 |
result_path = os.path.join(RESULTS_FOLDER, f"{unique_id}.html")
|
47 |
|
48 |
-
shutil.copy(image, image_path)
|
49 |
-
|
50 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
if task == "Plain Text OCR":
|
52 |
res = model.chat(tokenizer, image_path, ocr_type='ocr')
|
53 |
return res, None, unique_id
|
@@ -74,21 +92,34 @@ def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
|
|
74 |
finally:
|
75 |
if os.path.exists(image_path):
|
76 |
os.remove(image_path)
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
78 |
def update_inputs(task):
|
79 |
if task in ["Plain Text OCR", "Format Text OCR", "Multi-crop OCR", "Render Formatted OCR"]:
|
80 |
-
return [gr.update(visible=False)] *
|
81 |
elif task == "Fine-grained OCR (Box)":
|
82 |
return [
|
83 |
gr.update(visible=True, choices=["ocr", "format"]),
|
84 |
gr.update(visible=True),
|
85 |
gr.update(visible=False),
|
|
|
|
|
|
|
|
|
86 |
]
|
87 |
elif task == "Fine-grained OCR (Color)":
|
88 |
return [
|
89 |
gr.update(visible=True, choices=["ocr", "format"]),
|
90 |
gr.update(visible=False),
|
91 |
gr.update(visible=True, choices=["red", "green", "blue"]),
|
|
|
|
|
|
|
|
|
92 |
]
|
93 |
|
94 |
def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
|
@@ -96,7 +127,9 @@ def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
|
|
96 |
|
97 |
if res.startswith("Error:"):
|
98 |
return res, None
|
99 |
-
|
|
|
|
|
100 |
res = f"$$ {res} $$"
|
101 |
|
102 |
if html_content:
|
@@ -118,18 +151,11 @@ with gr.Blocks() as demo:
|
|
118 |
with gr.Row():
|
119 |
gr.Markdown(title)
|
120 |
|
121 |
-
with gr.Row():
|
122 |
-
with gr.Column(scale=1):
|
123 |
-
gr.Markdown(description)
|
124 |
-
with gr.Column(scale=1):
|
125 |
-
with gr.Group():
|
126 |
-
gr.Markdown(modelinfor)
|
127 |
-
gr.Markdown(joinus)
|
128 |
-
|
129 |
with gr.Row():
|
130 |
with gr.Column(scale=1):
|
131 |
with gr.Group():
|
132 |
image_input = gr.Image(type="filepath", label="Input Image")
|
|
|
133 |
task_dropdown = gr.Dropdown(
|
134 |
choices=[
|
135 |
"Plain Text OCR",
|
@@ -158,6 +184,7 @@ with gr.Blocks() as demo:
|
|
158 |
visible=False
|
159 |
)
|
160 |
submit_button = gr.Button("Process")
|
|
|
161 |
|
162 |
with gr.Column(scale=1):
|
163 |
with gr.Group():
|
@@ -167,7 +194,13 @@ with gr.Blocks() as demo:
|
|
167 |
task_dropdown.change(
|
168 |
update_inputs,
|
169 |
inputs=[task_dropdown],
|
170 |
-
outputs=[ocr_type_dropdown, ocr_box_input, ocr_color_dropdown]
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
)
|
172 |
|
173 |
submit_button.click(
|
@@ -176,6 +209,12 @@ with gr.Blocks() as demo:
|
|
176 |
outputs=[output_markdown, output_html]
|
177 |
)
|
178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
if __name__ == "__main__":
|
180 |
cleanup_old_files()
|
181 |
demo.launch()
|
|
|
24 |
model = model.eval().cuda()
|
25 |
model.config.pad_token_id = tokenizer.eos_token_id
|
26 |
|
|
|
|
|
|
|
|
|
|
|
27 |
UPLOAD_FOLDER = "./uploads"
|
28 |
RESULTS_FOLDER = "./results"
|
29 |
|
|
|
31 |
if not os.path.exists(folder):
|
32 |
os.makedirs(folder)
|
33 |
|
34 |
+
def image_to_base64(image):
|
35 |
+
buffered = io.BytesIO()
|
36 |
+
image.save(buffered, format="PNG")
|
37 |
+
return base64.b64encode(buffered.getvalue()).decode()
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
@spaces.GPU()
|
42 |
def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
|
43 |
if image is None:
|
|
|
47 |
image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.png")
|
48 |
result_path = os.path.join(RESULTS_FOLDER, f"{unique_id}.html")
|
49 |
|
|
|
|
|
50 |
try:
|
51 |
+
if isinstance(image, dict): # If image is from ImageEditor
|
52 |
+
composite_image = image.get("composite")
|
53 |
+
if composite_image is not None:
|
54 |
+
if isinstance(composite_image, np.ndarray):
|
55 |
+
Image.fromarray(composite_image).save(image_path)
|
56 |
+
elif isinstance(composite_image, str):
|
57 |
+
shutil.copy(composite_image, image_path)
|
58 |
+
else:
|
59 |
+
return "Error: Unsupported image format from ImageEditor", None, None
|
60 |
+
else:
|
61 |
+
return "Error: No composite image found in ImageEditor output", None, None
|
62 |
+
elif isinstance(image, np.ndarray):
|
63 |
+
Image.fromarray(image).save(image_path)
|
64 |
+
elif isinstance(image, str):
|
65 |
+
shutil.copy(image, image_path)
|
66 |
+
else:
|
67 |
+
return "Error: Unsupported image format", None, None
|
68 |
+
|
69 |
if task == "Plain Text OCR":
|
70 |
res = model.chat(tokenizer, image_path, ocr_type='ocr')
|
71 |
return res, None, unique_id
|
|
|
92 |
finally:
|
93 |
if os.path.exists(image_path):
|
94 |
os.remove(image_path)
|
95 |
+
def update_image_input(task):
|
96 |
+
if task == "Fine-grained OCR (Color)":
|
97 |
+
return gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
|
98 |
+
else:
|
99 |
+
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
|
100 |
+
|
101 |
def update_inputs(task):
|
102 |
if task in ["Plain Text OCR", "Format Text OCR", "Multi-crop OCR", "Render Formatted OCR"]:
|
103 |
+
return [gr.update(visible=False)] * 5 + [gr.update(visible=True), gr.update(visible=False)]
|
104 |
elif task == "Fine-grained OCR (Box)":
|
105 |
return [
|
106 |
gr.update(visible=True, choices=["ocr", "format"]),
|
107 |
gr.update(visible=True),
|
108 |
gr.update(visible=False),
|
109 |
+
gr.update(visible=False),
|
110 |
+
gr.update(visible=False),
|
111 |
+
gr.update(visible=True),
|
112 |
+
gr.update(visible=False)
|
113 |
]
|
114 |
elif task == "Fine-grained OCR (Color)":
|
115 |
return [
|
116 |
gr.update(visible=True, choices=["ocr", "format"]),
|
117 |
gr.update(visible=False),
|
118 |
gr.update(visible=True, choices=["red", "green", "blue"]),
|
119 |
+
gr.update(visible=False),
|
120 |
+
gr.update(visible=True),
|
121 |
+
gr.update(visible=False),
|
122 |
+
gr.update(visible=True)
|
123 |
]
|
124 |
|
125 |
def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
|
|
|
127 |
|
128 |
if res.startswith("Error:"):
|
129 |
return res, None
|
130 |
+
|
131 |
+
res = res.replace("\\title", "\\title ")
|
132 |
+
|
133 |
res = f"$$ {res} $$"
|
134 |
|
135 |
if html_content:
|
|
|
151 |
with gr.Row():
|
152 |
gr.Markdown(title)
|
153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
with gr.Row():
|
155 |
with gr.Column(scale=1):
|
156 |
with gr.Group():
|
157 |
image_input = gr.Image(type="filepath", label="Input Image")
|
158 |
+
image_editor = gr.ImageEditor(label="Image Editor", type="pil", visible=False)
|
159 |
task_dropdown = gr.Dropdown(
|
160 |
choices=[
|
161 |
"Plain Text OCR",
|
|
|
184 |
visible=False
|
185 |
)
|
186 |
submit_button = gr.Button("Process")
|
187 |
+
editor_submit_button = gr.Button("Process Edited Image", visible=False)
|
188 |
|
189 |
with gr.Column(scale=1):
|
190 |
with gr.Group():
|
|
|
194 |
task_dropdown.change(
|
195 |
update_inputs,
|
196 |
inputs=[task_dropdown],
|
197 |
+
outputs=[ocr_type_dropdown, ocr_box_input, ocr_color_dropdown, image_input, image_editor, submit_button, editor_submit_button]
|
198 |
+
)
|
199 |
+
|
200 |
+
task_dropdown.change(
|
201 |
+
update_image_input,
|
202 |
+
inputs=[task_dropdown],
|
203 |
+
outputs=[image_input, image_editor, editor_submit_button]
|
204 |
)
|
205 |
|
206 |
submit_button.click(
|
|
|
209 |
outputs=[output_markdown, output_html]
|
210 |
)
|
211 |
|
212 |
+
editor_submit_button.click(
|
213 |
+
ocr_demo,
|
214 |
+
inputs=[image_editor, task_dropdown, ocr_type_dropdown, ocr_box_input, ocr_color_dropdown],
|
215 |
+
outputs=[output_markdown, output_html]
|
216 |
+
)
|
217 |
+
|
218 |
if __name__ == "__main__":
|
219 |
cleanup_old_files()
|
220 |
demo.launch()
|