Spaces:

Tonic
/

GOT-OCR

Running

App Files Files Community

Tonic commited on Sep 14, 2024

Commit

7dcbad8

verified ·

1 Parent(s): ee4b3d0

improve file handling , display html

Browse files

Files changed (2) hide show

.gitignore +2 -1
app.py +40 -23

.gitignore CHANGED Viewed

@@ -1,2 +1,3 @@
 .DS_Store
-.venv/*

 .DS_Store
+.venv/*
+notes.py

app.py CHANGED Viewed

@@ -10,6 +10,9 @@ import numpy as np
 import yaml
 from pathlib import Path
 from globe import title, description, modelinfor, joinus
 model_name = 'ucaslcl/GOT-OCR2_0'
@@ -24,28 +27,36 @@ def image_to_base64(image):
     image.save(buffered, format="PNG")
     return base64.b64encode(buffered.getvalue()).decode()
-html_file = './demo.html'
 @spaces.GPU
 def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
     if task == "Plain Text OCR":
         res = model.chat(tokenizer, image, ocr_type='ocr')
-        return res, None
-    elif task == "Format Text OCR":
-        res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=html_file)
-    elif task == "Fine-grained OCR (Box)":
-        res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=html_file)
-    elif task == "Fine-grained OCR (Color)":
-        res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=html_file)
-    elif task == "Multi-crop OCR":
-        res = model.chat_crop(tokenizer, image, ocr_type='format', render=True, save_render_file=html_file)
-    elif task == "Render Formatted OCR":
-        res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=html_file)
-    with open(html_file, 'r') as f:
-        html_content = f.read()
-    return res, html_content
 def update_inputs(task):
     if task in ["Plain Text OCR", "Format Text OCR", "Multi-crop OCR", "Render Formatted OCR"]:
         return [gr.update(visible=False)] * 3
@@ -64,17 +75,22 @@ def update_inputs(task):
 def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
     res, html_content = process_image(image, task, ocr_type, ocr_box, ocr_color)
-    res = f"${res}$"
-    res = res.replace("$\\begin{tabular}", "\\begin{tabular}")
-    res = res.replace("\\end{tabular}$", "\\end{tabular}")
-    res = res.replace("\\(", "")
-    res = res.replace("\\)", "")
     if html_content:
         html_string = f'<iframe srcdoc="{html_content}" width="100%" height="600px"></iframe>'
         return res, html_string
     return res, None
-import gradio as gr
 with gr.Blocks() as demo:
     gr.Markdown(title)
@@ -102,7 +118,7 @@ with gr.Blocks() as demo:
         )
         ocr_box_input = gr.Textbox(
             label="OCR Box (x1,y1,x2,y2)",
-            placeholder="e.g., 100,100,200,200",
             visible=False
         )
         ocr_color_dropdown = gr.Dropdown(
@@ -130,4 +146,5 @@ with gr.Blocks() as demo:
     )
 if __name__ == "__main__":
     demo.launch()

 import yaml
 from pathlib import Path
 from globe import title, description, modelinfor, joinus
+import uuid
+import tempfile
+import time
 model_name = 'ucaslcl/GOT-OCR2_0'
     image.save(buffered, format="PNG")
     return base64.b64encode(buffered.getvalue()).decode()
+results_folder = Path('./results')
+results_folder.mkdir(parents=True, exist_ok=True)
 @spaces.GPU
 def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
+    unique_id = str(uuid.uuid4())
+    temp_html_path = results_folder / f"{unique_id}.html"
     if task == "Plain Text OCR":
         res = model.chat(tokenizer, image, ocr_type='ocr')
+        return res, None, unique_id
+    else:
+        if task == "Format Text OCR":
+            res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=str(temp_html_path))
+        elif task == "Fine-grained OCR (Box)":
+            res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=str(temp_html_path))
+        elif task == "Fine-grained OCR (Color)":
+            res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=str(temp_html_path))
+        elif task == "Multi-crop OCR":
+            res = model.chat_crop(tokenizer, image, ocr_type='format', render=True, save_render_file=str(temp_html_path))
+        elif task == "Render Formatted OCR":
+            res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=str(temp_html_path))
+        if temp_html_path.exists():
+            with open(temp_html_path, 'r') as f:
+                html_content = f.read()
+            return res, html_content, unique_id
+        else:
+            return res, None, unique_id
 def update_inputs(task):
     if task in ["Plain Text OCR", "Format Text OCR", "Multi-crop OCR", "Render Formatted OCR"]:
         return [gr.update(visible=False)] * 3
 def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
     res, html_content = process_image(image, task, ocr_type, ocr_box, ocr_color)
+    res = f"$$ {res} $$"
+    # res = res.replace("$$ \\begin{tabular}", "\\begin{tabular}")
+    # res = res.replace("\\end{tabular} $$", "\\end{tabular}")
+    # res = res.replace("\\(", "")
+    # res = res.replace("\\)", "")
     if html_content:
         html_string = f'<iframe srcdoc="{html_content}" width="100%" height="600px"></iframe>'
         return res, html_string
     return res, None
+def cleanup_old_files():
+    current_time = time.time()
+    for file_path in results_folder.glob('*.html'):
+        if current_time - file_path.stat().st_mtime > 3600:  # 1 hour
+            file_path.unlink()
 with gr.Blocks() as demo:
     gr.Markdown(title)
         )
         ocr_box_input = gr.Textbox(
             label="OCR Box (x1,y1,x2,y2)",
+            placeholder="[100,100,200,200]",
             visible=False
         )
         ocr_color_dropdown = gr.Dropdown(
     )
 if __name__ == "__main__":
+    cleanup_old_files()
     demo.launch()