Spaces:

Tonic
/

GOT-OCR

Running on Zero

App Files Files Community

Tonic commited on Sep 16, 2024

Commit

a197908

verified ·

1 Parent(s): 73597ae

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -12

app.py CHANGED Viewed

@@ -42,7 +42,7 @@ def image_to_base64(image):
 @spaces.GPU()
-def process_image(image, task, max_new_tokens, no_repeat_ngram_size, ocr_type=None, ocr_box=None, ocr_color=None):
     if image is None:
         return "Error: No image provided", None, None
@@ -70,19 +70,19 @@ def process_image(image, task, max_new_tokens, no_repeat_ngram_size, ocr_type=No
             return "Error: Unsupported image format", None, None
         if task == "Plain Text OCR":
-            res = model.chat(tokenizer, image_path, ocr_type='ocr', max_new_tokens=max_new_tokens, no_repeat_ngram_size=no_repeat_ngram_size)
             return res, None, unique_id
         else:
             if task == "Format Text OCR":
-                res = model.chat(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path, max_new_tokens=max_new_tokens, no_repeat_ngram_size=no_repeat_ngram_size)
             elif task == "Fine-grained OCR (Box)":
-                res = model.chat(tokenizer, image_path, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=result_path, max_new_tokens=max_new_tokens, no_repeat_ngram_size=no_repeat_ngram_size)
             elif task == "Fine-grained OCR (Color)":
-                res = model.chat(tokenizer, image_path, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=result_path, max_new_tokens=max_new_tokens, no_repeat_ngram_size=no_repeat_ngram_size)
             elif task == "Multi-crop OCR":
-                res = model.chat_crop(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path, max_new_tokens=max_new_tokens, no_repeat_ngram_size=no_repeat_ngram_size)
             elif task == "Render Formatted OCR":
-                res = model.chat(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path, max_new_tokens=max_new_tokens, no_repeat_ngram_size=no_repeat_ngram_size)
             if os.path.exists(result_path):
                 with open(result_path, 'r') as f:
@@ -173,8 +173,9 @@ def parse_latex_output(res):
     return '$$\\$$\n'.join(parsed_lines)
-def ocr_demo(image, task, ocr_type, ocr_box, ocr_color, max_new_tokens, no_repeat_ngram_size):
-    res, html_content, unique_id = process_image(image, task, max_new_tokens, no_repeat_ngram_size, ocr_type, ocr_box, ocr_color)
     if isinstance(res, str) and res.startswith("Error:"):
         return res, None
@@ -197,7 +198,6 @@ def cleanup_old_files():
             if current_time - file_path.stat().st_mtime > 3600:  # 1 hour
                 file_path.unlink()
 with gr.Blocks(theme=gr.themes.Base()) as demo:
     with gr.Row():
         gr.Markdown(title)
@@ -253,7 +253,7 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
                     visible=False
                 )
                 with gr.Row():
-                    max_new_tokens_slider = gr.Slider(50, 500, step=10, value=150, label="Max New Tokens")
                     no_repeat_ngram_size_slider = gr.Slider(1, 10, step=1, value=2, label="No Repeat N-gram Size")
                 submit_button = gr.Button("Process")
@@ -278,7 +278,7 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
     submit_button.click(
         ocr_demo,
-        inputs=[image_input, task_dropdown, ocr_type_dropdown, ocr_box_input, ocr_color_dropdown, max_new_tokens_slider, no_repeat_ngram_size_slider],
         outputs=[output_markdown, output_html]
     )
     editor_submit_button.click(

 @spaces.GPU()
+def process_image(image, task, no_repeat_ngram_size, ocr_type=None, ocr_box=None, ocr_color=None):
     if image is None:
         return "Error: No image provided", None, None
             return "Error: Unsupported image format", None, None
         if task == "Plain Text OCR":
+            res = model.chat(tokenizer, image_path, ocr_type='ocr',  no_repeat_ngram_size=no_repeat_ngram_size)
             return res, None, unique_id
         else:
             if task == "Format Text OCR":
+                res = model.chat(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path, no_repeat_ngram_size=no_repeat_ngram_size)
             elif task == "Fine-grained OCR (Box)":
+                res = model.chat(tokenizer, image_path, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=result_path, no_repeat_ngram_size=no_repeat_ngram_size)
             elif task == "Fine-grained OCR (Color)":
+                res = model.chat(tokenizer, image_path, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=result_path, no_repeat_ngram_size=no_repeat_ngram_size)
             elif task == "Multi-crop OCR":
+                res = model.chat_crop(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path,  no_repeat_ngram_size=no_repeat_ngram_size)
             elif task == "Render Formatted OCR":
+                res = model.chat(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path,  no_repeat_ngram_size=no_repeat_ngram_size)
             if os.path.exists(result_path):
                 with open(result_path, 'r') as f:
     return '$$\\$$\n'.join(parsed_lines)
+def ocr_demo(image, task, ocr_type, ocr_box, ocr_color, no_repeat_ngram_size):
+    res, html_content, unique_id = process_image(image, task,  no_repeat_ngram_size, ocr_type, ocr_box, ocr_color)
     if isinstance(res, str) and res.startswith("Error:"):
         return res, None
             if current_time - file_path.stat().st_mtime > 3600:  # 1 hour
                 file_path.unlink()
 with gr.Blocks(theme=gr.themes.Base()) as demo:
     with gr.Row():
         gr.Markdown(title)
                     visible=False
                 )
                 with gr.Row():
+                    # max_new_tokens_slider = gr.Slider(50, 500, step=10, value=150, label="Max New Tokens")
                     no_repeat_ngram_size_slider = gr.Slider(1, 10, step=1, value=2, label="No Repeat N-gram Size")
                 submit_button = gr.Button("Process")
     submit_button.click(
         ocr_demo,
+        inputs=[image_input, task_dropdown, ocr_type_dropdown, ocr_box_input, ocr_color_dropdown, no_repeat_ngram_size_slider],
         outputs=[output_markdown, output_html]
     )
     editor_submit_button.click(