Spaces:

Mageia
/

GOT-OCR-Optimize

Sleeping

App Files Files Community

Mageia commited on Oct 15, 2024

Commit

ac592f2

unverified ·

1 Parent(s): 79746f6

fix: process pdf once

Browse files

Files changed (1) hide show

app.py +37 -2

app.py CHANGED Viewed

@@ -3,11 +3,10 @@ import os
 import uuid
 import gradio as gr
 import torch
 from transformers import AutoConfig, AutoModel, AutoTokenizer
-from got_ocr import got_ocr
 # 初始化模型和分词器
 model_name = "ucaslcl/GOT-OCR2_0"
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -24,6 +23,42 @@ UPLOAD_FOLDER = "./uploads"
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 def perform_ocr(image):
     if image is None:
         return "请上传图片"

 import uuid
 import gradio as gr
+import spaces
 import torch
 from transformers import AutoConfig, AutoModel, AutoTokenizer
 # 初始化模型和分词器
 model_name = "ucaslcl/GOT-OCR2_0"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+@spaces.GPU()
+def got_ocr(model, tokenizer, image_path, got_mode="format texts OCR", fine_grained_mode="", ocr_color="", ocr_box=""):
+    # 执行OCR
+    try:
+        if got_mode == "plain texts OCR":
+            res = model.chat(tokenizer, image_path, ocr_type="ocr")
+            return res, None
+        elif got_mode == "format texts OCR":
+            result_path = f"{os.path.splitext(image_path)[0]}_result.html"
+            res = model.chat(tokenizer, image_path, ocr_type="format", render=True, save_render_file=result_path)
+        elif got_mode == "plain multi-crop OCR":
+            res = model.chat_crop(tokenizer, image_path, ocr_type="ocr")
+            return res, None
+        elif got_mode == "format multi-crop OCR":
+            result_path = f"{os.path.splitext(image_path)[0]}_result.html"
+            res = model.chat_crop(tokenizer, image_path, ocr_type="format", render=True, save_render_file=result_path)
+        elif got_mode == "plain fine-grained OCR":
+            res = model.chat(tokenizer, image_path, ocr_type="ocr", ocr_box=ocr_box, ocr_color=ocr_color)
+            return res, None
+        elif got_mode == "format fine-grained OCR":
+            result_path = f"{os.path.splitext(image_path)[0]}_result.html"
+            res = model.chat(tokenizer, image_path, ocr_type="format", ocr_box=ocr_box, ocr_color=ocr_color, render=True, save_render_file=result_path)
+        # 处理格式化结果
+        if "format" in got_mode and os.path.exists(result_path):
+            with open(result_path, "r") as f:
+                html_content = f.read()
+            encoded_html = base64.b64encode(html_content.encode("utf-8")).decode("utf-8")
+            return res, encoded_html
+        else:
+            return res, None
+    except Exception as e:
+        return f"错误: {str(e)}", None
 def perform_ocr(image):
     if image is None:
         return "请上传图片"