Mageia commited on
Commit
d0f2987
·
unverified ·
1 Parent(s): 2a61207

fix: process pdf once

Browse files
Files changed (1) hide show
  1. app.py +10 -2
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import base64
2
  import os
 
3
 
4
  import gradio as gr
5
  import spaces
@@ -15,7 +16,7 @@ model = model.eval().to(device)
15
 
16
 
17
  @spaces.GPU()
18
- def ocr_process(image, got_mode, ocr_color="", ocr_box=""):
19
  if image is None:
20
  return "错误:未提供图片"
21
 
@@ -23,18 +24,24 @@ def ocr_process(image, got_mode, ocr_color="", ocr_box=""):
23
  image_path = image
24
  result_path = f"{os.path.splitext(image_path)[0]}_result.html"
25
 
 
 
26
  if "plain" in got_mode:
 
27
  if "multi-crop" in got_mode:
28
  res = model.chat_crop(tokenizer, image_path, ocr_type="ocr")
29
  else:
30
  res = model.chat(tokenizer, image_path, ocr_type="ocr", ocr_box=ocr_box, ocr_color=ocr_color)
 
31
  return res
32
  elif "format" in got_mode:
 
33
  if "multi-crop" in got_mode:
34
  res = model.chat_crop(tokenizer, image_path, ocr_type="format", render=True, save_render_file=result_path)
35
  else:
36
  res = model.chat(tokenizer, image_path, ocr_type="format", ocr_box=ocr_box, ocr_color=ocr_color, render=True, save_render_file=result_path)
37
 
 
38
  if os.path.exists(result_path):
39
  with open(result_path, "r", encoding="utf-8") as f:
40
  html_content = f.read()
@@ -42,7 +49,8 @@ def ocr_process(image, got_mode, ocr_color="", ocr_box=""):
42
  data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
43
  preview = f'<iframe src="{data_uri}" width="100%" height="600px"></iframe>'
44
  download_link = f'<a href="{data_uri}" download="result.html">下载完整结果</a>'
45
- return f"{download_link}\n\n{preview}\n\n识别结果:\n"
 
46
 
47
  return "错误: 未知的OCR模式"
48
  except Exception as e:
 
1
  import base64
2
  import os
3
+ import time
4
 
5
  import gradio as gr
6
  import spaces
 
16
 
17
 
18
  @spaces.GPU()
19
+ def ocr_process(image, got_mode, ocr_color="", ocr_box="", progress=gr.Progress()):
20
  if image is None:
21
  return "错误:未提供图片"
22
 
 
24
  image_path = image
25
  result_path = f"{os.path.splitext(image_path)[0]}_result.html"
26
 
27
+ progress(0, desc="开始处理...")
28
+
29
  if "plain" in got_mode:
30
+ progress(0.3, desc="执行OCR识别...")
31
  if "multi-crop" in got_mode:
32
  res = model.chat_crop(tokenizer, image_path, ocr_type="ocr")
33
  else:
34
  res = model.chat(tokenizer, image_path, ocr_type="ocr", ocr_box=ocr_box, ocr_color=ocr_color)
35
+ progress(1, desc="处理完成")
36
  return res
37
  elif "format" in got_mode:
38
+ progress(0.3, desc="执行OCR识别...")
39
  if "multi-crop" in got_mode:
40
  res = model.chat_crop(tokenizer, image_path, ocr_type="format", render=True, save_render_file=result_path)
41
  else:
42
  res = model.chat(tokenizer, image_path, ocr_type="format", ocr_box=ocr_box, ocr_color=ocr_color, render=True, save_render_file=result_path)
43
 
44
+ progress(0.7, desc="生成结果...")
45
  if os.path.exists(result_path):
46
  with open(result_path, "r", encoding="utf-8") as f:
47
  html_content = f.read()
 
49
  data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
50
  preview = f'<iframe src="{data_uri}" width="100%" height="600px"></iframe>'
51
  download_link = f'<a href="{data_uri}" download="result.html">下载完整结果</a>'
52
+ progress(1, desc="处理完成")
53
+ return f"{download_link}\n\n{preview}"
54
 
55
  return "错误: 未知的OCR模式"
56
  except Exception as e: