Spaces:

Tonic
/

GOT-OCR

Running on Zero

App Files Files Community

Tonic commited on Sep 14, 2024

Commit

fa528ee

unverified ·

1 Parent(s): f63ea6a

improve interface logic with image editor and parse res

Browse files

Files changed (1) hide show

app.py +58 -19

app.py CHANGED Viewed

@@ -24,11 +24,6 @@ model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True,
 model = model.eval().cuda()
 model.config.pad_token_id = tokenizer.eos_token_id
-def image_to_base64(image):
-    buffered = io.BytesIO()
-    image.save(buffered, format="PNG")
-    return base64.b64encode(buffered.getvalue()).decode()
 UPLOAD_FOLDER = "./uploads"
 RESULTS_FOLDER = "./results"
@@ -36,6 +31,13 @@ for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
     if not os.path.exists(folder):
         os.makedirs(folder)
 @spaces.GPU()
 def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
     if image is None:
@@ -45,9 +47,25 @@ def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
     image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.png")
     result_path = os.path.join(RESULTS_FOLDER, f"{unique_id}.html")
-    shutil.copy(image, image_path)
     try:
         if task == "Plain Text OCR":
             res = model.chat(tokenizer, image_path, ocr_type='ocr')
             return res, None, unique_id
@@ -74,21 +92,34 @@ def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
     finally:
         if os.path.exists(image_path):
             os.remove(image_path)
 def update_inputs(task):
     if task in ["Plain Text OCR", "Format Text OCR", "Multi-crop OCR", "Render Formatted OCR"]:
-        return [gr.update(visible=False)] * 3
     elif task == "Fine-grained OCR (Box)":
         return [
             gr.update(visible=True, choices=["ocr", "format"]),
             gr.update(visible=True),
             gr.update(visible=False),
         ]
     elif task == "Fine-grained OCR (Color)":
         return [
             gr.update(visible=True, choices=["ocr", "format"]),
             gr.update(visible=False),
             gr.update(visible=True, choices=["red", "green", "blue"]),
         ]
 def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
@@ -96,7 +127,9 @@ def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
     if res.startswith("Error:"):
         return res, None
     res = f"$$ {res} $$"
     if html_content:
@@ -118,18 +151,11 @@ with gr.Blocks() as demo:
     with gr.Row():
         gr.Markdown(title)
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown(description)
-        with gr.Column(scale=1):
-            with gr.Group():
-                gr.Markdown(modelinfor)
-                gr.Markdown(joinus)
     with gr.Row():
         with gr.Column(scale=1):
             with gr.Group():
                 image_input = gr.Image(type="filepath", label="Input Image")
                 task_dropdown = gr.Dropdown(
                     choices=[
                         "Plain Text OCR",
@@ -158,6 +184,7 @@ with gr.Blocks() as demo:
                     visible=False
                 )
                 submit_button = gr.Button("Process")
         with gr.Column(scale=1):
             with gr.Group():
@@ -167,7 +194,13 @@ with gr.Blocks() as demo:
     task_dropdown.change(
         update_inputs,
         inputs=[task_dropdown],
-        outputs=[ocr_type_dropdown, ocr_box_input, ocr_color_dropdown]
     )
     submit_button.click(
@@ -176,6 +209,12 @@ with gr.Blocks() as demo:
         outputs=[output_markdown, output_html]
     )
 if __name__ == "__main__":
     cleanup_old_files()
     demo.launch()

 model = model.eval().cuda()
 model.config.pad_token_id = tokenizer.eos_token_id
 UPLOAD_FOLDER = "./uploads"
 RESULTS_FOLDER = "./results"
     if not os.path.exists(folder):
         os.makedirs(folder)
+def image_to_base64(image):
+    buffered = io.BytesIO()
+    image.save(buffered, format="PNG")
+    return base64.b64encode(buffered.getvalue()).decode()
 @spaces.GPU()
 def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
     if image is None:
     image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.png")
     result_path = os.path.join(RESULTS_FOLDER, f"{unique_id}.html")
     try:
+        if isinstance(image, dict):  # If image is from ImageEditor
+            composite_image = image.get("composite")
+            if composite_image is not None:
+                if isinstance(composite_image, np.ndarray):
+                    Image.fromarray(composite_image).save(image_path)
+                elif isinstance(composite_image, str):
+                    shutil.copy(composite_image, image_path)
+                else:
+                    return "Error: Unsupported image format from ImageEditor", None, None
+            else:
+                return "Error: No composite image found in ImageEditor output", None, None
+        elif isinstance(image, np.ndarray):
+            Image.fromarray(image).save(image_path)
+        elif isinstance(image, str):
+            shutil.copy(image, image_path)
+        else:
+            return "Error: Unsupported image format", None, None
         if task == "Plain Text OCR":
             res = model.chat(tokenizer, image_path, ocr_type='ocr')
             return res, None, unique_id
     finally:
         if os.path.exists(image_path):
             os.remove(image_path)
+def update_image_input(task):
+    if task == "Fine-grained OCR (Color)":
+        return gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
+    else:
+        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
 def update_inputs(task):
     if task in ["Plain Text OCR", "Format Text OCR", "Multi-crop OCR", "Render Formatted OCR"]:
+        return [gr.update(visible=False)] * 5 + [gr.update(visible=True), gr.update(visible=False)]
     elif task == "Fine-grained OCR (Box)":
         return [
             gr.update(visible=True, choices=["ocr", "format"]),
             gr.update(visible=True),
             gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=True),
+            gr.update(visible=False)
         ]
     elif task == "Fine-grained OCR (Color)":
         return [
             gr.update(visible=True, choices=["ocr", "format"]),
             gr.update(visible=False),
             gr.update(visible=True, choices=["red", "green", "blue"]),
+            gr.update(visible=False),
+            gr.update(visible=True),
+            gr.update(visible=False),
+            gr.update(visible=True)
         ]
 def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
     if res.startswith("Error:"):
         return res, None
+    res = res.replace("\\title", "\\title ")
     res = f"$$ {res} $$"
     if html_content:
     with gr.Row():
         gr.Markdown(title)
     with gr.Row():
         with gr.Column(scale=1):
             with gr.Group():
                 image_input = gr.Image(type="filepath", label="Input Image")
+                image_editor = gr.ImageEditor(label="Image Editor", type="pil", visible=False)
                 task_dropdown = gr.Dropdown(
                     choices=[
                         "Plain Text OCR",
                     visible=False
                 )
                 submit_button = gr.Button("Process")
+                editor_submit_button = gr.Button("Process Edited Image", visible=False)
         with gr.Column(scale=1):
             with gr.Group():
     task_dropdown.change(
         update_inputs,
         inputs=[task_dropdown],
+        outputs=[ocr_type_dropdown, ocr_box_input, ocr_color_dropdown, image_input, image_editor, submit_button, editor_submit_button]
+    )
+    task_dropdown.change(
+        update_image_input,
+        inputs=[task_dropdown],
+        outputs=[image_input, image_editor, editor_submit_button]
     )
     submit_button.click(
         outputs=[output_markdown, output_html]
     )
+    editor_submit_button.click(
+        ocr_demo,
+        inputs=[image_editor, task_dropdown, ocr_type_dropdown, ocr_box_input, ocr_color_dropdown],
+        outputs=[output_markdown, output_html]
+    )
 if __name__ == "__main__":
     cleanup_old_files()
     demo.launch()