SWHL commited on
Commit
8d6e841
1 Parent(s): fcea47c

Update models

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *tfevents* filter=lfs diff=lfs merge=lfs -text
34
  *.TTF filter=lfs diff=lfs merge=lfs -text
35
  images/car_plate.jpeg filter=lfs diff=lfs merge=lfs -text
 
 
33
  *tfevents* filter=lfs diff=lfs merge=lfs -text
34
  *.TTF filter=lfs diff=lfs merge=lfs -text
35
  images/car_plate.jpeg filter=lfs diff=lfs merge=lfs -text
36
+ *.ttc filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -1,87 +1,19 @@
1
  # -*- encoding: utf-8 -*-
2
- import math
3
- import random
4
  import time
5
  from pathlib import Path
6
 
7
  import cv2
8
  import gradio as gr
9
- import numpy as np
10
- from PIL import Image, ImageDraw, ImageFont
11
  from rapidocr_onnxruntime import RapidOCR
12
 
 
13
 
14
- def draw_ocr_box_txt(image, boxes, txts, font_path,
15
- scores=None, text_score=0.5):
16
- h, w = image.height, image.width
17
- img_left = image.copy()
18
- img_right = Image.new('RGB', (w, h), (255, 255, 255))
19
-
20
- random.seed(0)
21
- draw_left = ImageDraw.Draw(img_left)
22
- draw_right = ImageDraw.Draw(img_right)
23
- for idx, (box, txt) in enumerate(zip(boxes, txts)):
24
- if scores is not None and float(scores[idx]) < text_score:
25
- continue
26
-
27
- color = (random.randint(0, 255),
28
- random.randint(0, 255),
29
- random.randint(0, 255))
30
-
31
- box = [tuple(v) for v in box]
32
- draw_left.polygon(box, fill=color)
33
- draw_right.polygon([box[0][0], box[0][1],
34
- box[1][0], box[1][1],
35
- box[2][0], box[2][1],
36
- box[3][0], box[3][1]],
37
- outline=color)
38
-
39
- box_height = math.sqrt((box[0][0] - box[3][0])**2
40
- + (box[0][1] - box[3][1])**2)
41
-
42
- box_width = math.sqrt((box[0][0] - box[1][0])**2
43
- + (box[0][1] - box[1][1])**2)
44
-
45
- if box_height > 2 * box_width:
46
- font_size = max(int(box_width * 0.9), 10)
47
- font = ImageFont.truetype(font_path, font_size,
48
- encoding="utf-8")
49
- cur_y = box[0][1]
50
- for c in txt:
51
- char_size = font.getsize(c)
52
- draw_right.text((box[0][0] + 3, cur_y), c,
53
- fill=(0, 0, 0), font=font)
54
- cur_y += char_size[1]
55
- else:
56
- font_size = max(int(box_height * 0.8), 10)
57
- font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
58
- draw_right.text([box[0][0], box[0][1]], txt,
59
- fill=(0, 0, 0), font=font)
60
-
61
- img_left = Image.blend(image, img_left, 0.5)
62
- img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
63
- img_show.paste(img_left, (0, 0, w, h))
64
- img_show.paste(img_right, (w, 0, w * 2, h))
65
- return np.array(img_show)
66
-
67
-
68
- def visualize(image_path, boxes, txts, scores,
69
- font_path="./FZYTK.TTF"):
70
- image = Image.open(image_path)
71
-
72
- draw_img = draw_ocr_box_txt(image, boxes,
73
- txts, font_path,
74
- scores,
75
- text_score=0.5)
76
-
77
- draw_img_save = Path("./inference_results/")
78
- if not draw_img_save.exists():
79
- draw_img_save.mkdir(parents=True, exist_ok=True)
80
-
81
- time_stamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
82
- image_save = str(draw_img_save / f'{time_stamp}_{Path(image_path).name}')
83
- cv2.imwrite(image_save, draw_img[:, :, ::-1])
84
- return image_save
85
 
86
 
87
  def inference(img_path, box_thresh=0.5, unclip_ratio=1.6, text_score=0.5,
@@ -102,8 +34,17 @@ def inference(img_path, box_thresh=0.5, unclip_ratio=1.6, text_score=0.5,
102
  rec_img_shape=rec_image_shape)
103
  elapse = time.time() - s
104
 
 
 
 
 
 
 
 
 
 
105
  out_log_list.append(f'Init Model cost: {elapse:.5f}')
106
- out_log_list.extend([f'det_model:{det_model_path}',
107
  f'rec_model: {rec_model_path}',
108
  f'rec_image_shape: {rec_image_shape}'])
109
 
@@ -120,73 +61,78 @@ def inference(img_path, box_thresh=0.5, unclip_ratio=1.6, text_score=0.5,
120
  return img_path, '未识别到有效文本', out_log
121
 
122
  dt_boxes, rec_res, scores = list(zip(*ocr_result))
123
- img_save_path = visualize(img_path, dt_boxes, rec_res, scores)
 
 
124
  output_text = [f'{one_rec} {float(score):.4f}'
125
  for one_rec, score in zip(rec_res, scores)]
126
  return img_save_path, output_text, out_log
127
 
128
 
129
- examples = [['images/1.jpg'],
130
- ['images/ch_en_num.jpg'],
131
- ['images/air_ticket.jpg'],
132
- ['images/car_plate.jpeg'],
133
- ['images/idcard.jpg'],
134
- ['images/train_ticket.jpeg']]
135
-
136
- with gr.Blocks(title='RapidOCR') as demo:
137
- gr.Markdown("""
138
- <h1><center><a href="https://github.com/RapidAI/RapidOCR" target="_blank">Rapid⚡OCR</a></center></h1>
139
-
140
- ### Docs: [Docs](https://rapidocr.rtfd.io/)
141
- ### Parameters docs: [link](https://github.com/RapidAI/RapidOCR/tree/main/python#configyaml%E4%B8%AD%E5%B8%B8%E7%94%A8%E5%8F%82%E6%95%B0%E4%BB%8B%E7%BB%8D)
142
- - **box_thresh**: 检测到的框是文本的概率,值越大,框中是文本的概率就越大。存在漏检时,调低该值。取值范围:[0, 1.0]
143
- - **unclip_ratio**: 控制文本检测框的大小,值越大,检测框整体越大。在出现框截断文字的情况,调大该值。取值范围:[1.5, 2.0]
144
- - **text_score**: 文本识别结果是正确的置信度,值越大,显示出的识别结果更准确。存在漏检时,调低该值。取值范围:[0, 1.0]
145
- ### 运行环境:
146
- Python: 3.8 | onnxruntime: 1.14.1 | rapidocr_onnxruntime: 1.2.5""")
147
- gr.Markdown('**超参数调节**')
148
- with gr.Row():
149
- box_thresh = gr.Slider(minimum=0, maximum=1.0, value=0.5,
150
- label='box_thresh', step=0.1,
151
- interactive=True,
152
- info='[0, 1.0]')
153
- unclip_ratio = gr.Slider(minimum=1.5, maximum=2.0, value=1.6,
154
- label='unclip_ratio', step=0.1,
155
- interactive=True,
156
- info='[1.5, 2.0]')
157
- text_score = gr.Slider(minimum=0, maximum=1.0, value=0.5,
158
- label='text_score', step=0.1,
159
- interactive=True,
160
- info='[0, 1.0]')
161
-
162
- gr.Markdown('**模型选择**')
163
- with gr.Row():
164
- text_det = gr.Dropdown(['ch_PP-OCRv3_det_infer.onnx',
165
- 'ch_PP-OCRv2_det_infer.onnx',
166
- 'ch_ppocr_server_v2.0_det_infer.onnx'],
167
- label='选择文本检测模型',
168
- value='ch_PP-OCRv3_det_infer.onnx',
169
- interactive=True)
170
-
171
- text_rec = gr.Dropdown(['ch_PP-OCRv3_rec_infer.onnx',
172
- 'ch_PP-OCRv2_rec_infer.onnx',
173
- 'ch_ppocr_server_v2.0_rec_infer.onnx'],
174
- label='选择文本识别模型',
175
- value='ch_PP-OCRv3_rec_infer.onnx',
176
- interactive=True)
177
-
178
- with gr.Row():
179
- input_img = gr.Image(type='filepath', label='Input')
180
- out_img = gr.Image(type='filepath', label='Output')
181
- out_log = gr.outputs.Textbox(type='text', label='Run Log')
182
- out_txt = gr.outputs.Textbox(type='text', label='RecText')
183
- button = gr.Button('Submit')
184
- button.click(fn=inference,
185
- inputs=[input_img, box_thresh, unclip_ratio, text_score,
186
- text_det, text_rec],
187
- outputs=[out_img, out_txt, out_log])
188
- gr.Examples(examples=examples,
189
- inputs=[input_img, box_thresh, unclip_ratio, text_score,
190
- text_det, text_rec],
191
- outputs=[out_img, out_txt, out_log], fn=inference)
192
- demo.launch(debug=True, enable_queue=True)
 
 
 
 
1
  # -*- encoding: utf-8 -*-
 
 
2
  import time
3
  from pathlib import Path
4
 
5
  import cv2
6
  import gradio as gr
 
 
7
  from rapidocr_onnxruntime import RapidOCR
8
 
9
+ from utils import visualize
10
 
11
+ font_dict = {
12
+ 'ch': 'FZYTK.TTF',
13
+ 'japan': 'japan.ttc',
14
+ 'korean': 'korean.ttf',
15
+ 'en': 'FZYTK.TTF'
16
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
 
19
  def inference(img_path, box_thresh=0.5, unclip_ratio=1.6, text_score=0.5,
 
34
  rec_img_shape=rec_image_shape)
35
  elapse = time.time() - s
36
 
37
+ if 'ch' in rec_model_path or 'en' in rec_model_path:
38
+ lan_name = 'ch'
39
+ elif 'japan' in rec_model_path:
40
+ lan_name = 'japan'
41
+ elif 'korean' in rec_model_path:
42
+ lan_name = 'korean'
43
+ else:
44
+ lan_name = 'ch'
45
+
46
  out_log_list.append(f'Init Model cost: {elapse:.5f}')
47
+ out_log_list.extend([f'det_model: {det_model_path}',
48
  f'rec_model: {rec_model_path}',
49
  f'rec_image_shape: {rec_image_shape}'])
50
 
 
61
  return img_path, '未识别到有效文本', out_log
62
 
63
  dt_boxes, rec_res, scores = list(zip(*ocr_result))
64
+ font_path = Path('fonts') / font_dict.get(lan_name)
65
+ img_save_path = visualize(img_path, dt_boxes, rec_res, scores,
66
+ font_path=str(font_path))
67
  output_text = [f'{one_rec} {float(score):.4f}'
68
  for one_rec, score in zip(rec_res, scores)]
69
  return img_save_path, output_text, out_log
70
 
71
 
72
+ if __name__ == '__main__':
73
+
74
+ examples = [['images/1.jpg'],
75
+ ['images/ch_en_num.jpg'],
76
+ ['images/air_ticket.jpg'],
77
+ ['images/car_plate.jpeg'],
78
+ ['images/idcard.jpg'],
79
+ ['images/train_ticket.jpeg'],
80
+ ['images/japan_2.jpg'],
81
+ ['images/korean_1.jpg']]
82
+
83
+ with gr.Blocks(title='RapidOCR') as demo:
84
+ gr.Markdown("""
85
+ <h1><center><a href="https://github.com/RapidAI/RapidOCR" target="_blank">Rapid⚡OCR</a></center></h1>
86
+
87
+ ### Docs: [Docs](https://rapidocr.rtfd.io/)
88
+ ### 运行环境:
89
+ Python: 3.8 | onnxruntime: 1.14.1 | rapidocr_onnxruntime: 1.2.5""")
90
+ gr.Markdown(
91
+ '''**[超参数调节](https://github.com/RapidAI/RapidOCR/tree/main/python#configyaml%E4%B8%AD%E5%B8%B8%E7%94%A8%E5%8F%82%E6%95%B0%E4%BB%8B%E7%BB%8D)**
92
+ - **box_thresh**: 检测到的框是文本的概率,值越大,框中是文本的概率就越大。存在漏检时,调低该值。取值范围:[0, 1.0]
93
+ - **unclip_ratio**: 控制文本检测框的大小,值越大,检测框整体越大。在出现框截断文字的情况,调大该值。取值范围:[1.5, 2.0]
94
+ - **text_score**: 文本识别结果是正确的置信度,值越大,显示出的识别结果更准确。存在漏检时,调低该值。取值范围:[0, 1.0]
95
+ ''')
96
+ with gr.Row():
97
+ box_thresh = gr.Slider(minimum=0, maximum=1.0, value=0.5,
98
+ label='box_thresh', step=0.1,
99
+ interactive=True,
100
+ info='[0, 1.0]')
101
+ unclip_ratio = gr.Slider(minimum=1.5, maximum=2.0, value=1.6,
102
+ label='unclip_ratio', step=0.1,
103
+ interactive=True,
104
+ info='[1.5, 2.0]')
105
+ text_score = gr.Slider(minimum=0, maximum=1.0, value=0.5,
106
+ label='text_score', step=0.1,
107
+ interactive=True,
108
+ info='[0, 1.0]')
109
+
110
+ gr.Markdown('**[模型选择](https://github.com/RapidAI/RapidOCR/blob/main/docs/models.md)**')
111
+ with gr.Row():
112
+ text_det = gr.Dropdown(['ch_PP-OCRv3_det_infer.onnx',
113
+ 'ch_PP-OCRv2_det_infer.onnx',
114
+ 'ch_ppocr_server_v2.0_det_infer.onnx'],
115
+ label='选择文本检测模型',
116
+ value='ch_PP-OCRv3_det_infer.onnx',
117
+ interactive=True)
118
+ rec_model_list = [v.name for v in Path('models/text_rec').iterdir()]
119
+ text_rec = gr.Dropdown(rec_model_list,
120
+ label='选择文本识别模型(包括中英文和多语言)',
121
+ value='ch_PP-OCRv3_rec_infer.onnx',
122
+ interactive=True)
123
+
124
+ with gr.Row():
125
+ input_img = gr.Image(type='filepath', label='Input')
126
+ out_img = gr.Image(type='filepath', label='Output')
127
+ out_log = gr.outputs.Textbox(type='text', label='Run Log')
128
+ out_txt = gr.outputs.Textbox(type='text', label='RecText')
129
+ button = gr.Button('Submit')
130
+ button.click(fn=inference,
131
+ inputs=[input_img, box_thresh, unclip_ratio, text_score,
132
+ text_det, text_rec],
133
+ outputs=[out_img, out_txt, out_log])
134
+ gr.Examples(examples=examples,
135
+ inputs=[input_img, box_thresh, unclip_ratio, text_score,
136
+ text_det, text_rec],
137
+ outputs=[out_img, out_txt, out_log], fn=inference)
138
+ demo.launch(debug=True, enable_queue=True)
FZYTK.TTF → models/text_rec/en_PP-OCRv3_rec_infer.onnx RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4065a23df6823c8e2b69a0e76d02f02a6470b8774a5e91086609701ad95cc33f
3
- size 3241748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef7abd8bd3629ae57ea2c28b425c1bd258a871b93fd2fe7c433946ade9b5d9ea
3
+ size 8967018
models/text_rec/en_number_mobile_v2.0_rec_infer.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e679ba625c544444be78292a50d9e1af9caa1569239a88bb8b864cb688b11c01
3
+ size 1882607
models/text_rec/japan_rec_crnn_v2.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b0495059f5738166e606d864b04ff00093f67a807efb02cddf472839cae970c
3
+ size 3571807
models/text_rec/korean_mobile_v2.0_rec_infer.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6558500138b43b46a4941957fb8c918546dae5fb0e71718536f1883acc80faf
3
+ size 3290650