Spaces:

SWHL
/

RapidOCRDemo

Running

App Files Files Community

SWHL commited on Dec 19, 2022

Commit

00e3b6c

•

1 Parent(s): 461c5b6

Update rapidocr_onnxruntime

Browse files

Files changed (23) hide show

resources/fonts/FZYTK.TTF → FZYTK.TTF +0 -0
app.py +19 -19
config.yaml +0 -72
rapidocr_onnxruntime/__init__.py +0 -4
rapidocr_onnxruntime/ch_ppocr_v2_cls/__init__.py +0 -4
rapidocr_onnxruntime/ch_ppocr_v2_cls/config.yaml +0 -14
rapidocr_onnxruntime/ch_ppocr_v2_cls/text_cls.py +0 -117
rapidocr_onnxruntime/ch_ppocr_v2_cls/utils.py +0 -80
rapidocr_onnxruntime/ch_ppocr_v3_det/__init__.py +0 -4
rapidocr_onnxruntime/ch_ppocr_v3_det/config.yaml +0 -29
rapidocr_onnxruntime/ch_ppocr_v3_det/text_detect.py +0 -127
rapidocr_onnxruntime/ch_ppocr_v3_det/utils.py +0 -452
rapidocr_onnxruntime/ch_ppocr_v3_rec/__init__.py +0 -4
rapidocr_onnxruntime/ch_ppocr_v3_rec/config.yaml +0 -12
rapidocr_onnxruntime/ch_ppocr_v3_rec/text_recognize.py +0 -120
rapidocr_onnxruntime/ch_ppocr_v3_rec/utils.py +0 -128
rapidocr_onnxruntime/rapid_ocr_api.py +0 -176
requirements.txt +1 -8
resources/fonts/.gitkeep +0 -0
resources/models/.gitkeep +0 -0
resources/models/ch_PP-OCRv3_det_infer.onnx +0 -3
resources/models/ch_PP-OCRv3_rec_infer.onnx +0 -3
resources/models/ch_ppocr_mobile_v2.0_cls_infer.onnx +0 -3

resources/fonts/FZYTK.TTF → FZYTK.TTF RENAMED Viewed

File without changes

app.py CHANGED Viewed

@@ -5,24 +5,18 @@ os.system('pip install -r requirements.txt')
 import math
 import random
-from pathlib import Path
 import time
 import cv2
 import gradio as gr
-from rapidocr_onnxruntime import TextSystem
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
-text_sys = TextSystem('config.yaml')
 def draw_ocr_box_txt(image, boxes, txts, font_path,
  scores=None, text_score=0.5):
- if not Path(font_path).exists():
- raise FileNotFoundError(f'The {font_path} does not exists! \n'
- f'Please download the file in the https://drive.google.com/file/d/1evWVX38EFNwTq_n5gTFgnlv8tdaNcyIA/view?usp=sharing')
  h, w = image.height, image.width
  img_left = image.copy()
  img_right = Image.new('RGB', (w, h), (255, 255, 255))
@@ -31,12 +25,14 @@ def draw_ocr_box_txt(image, boxes, txts, font_path,
  draw_left = ImageDraw.Draw(img_left)
  draw_right = ImageDraw.Draw(img_right)
  for idx, (box, txt) in enumerate(zip(boxes, txts)):
- if scores is not None and scores[idx] < text_score:
  continue
  color = (random.randint(0, 255),
  random.randint(0, 255),
  random.randint(0, 255))
  draw_left.polygon(box, fill=color)
  draw_right.polygon([box[0][0], box[0][1],
  box[1][0], box[1][1],
@@ -73,10 +69,9 @@ def draw_ocr_box_txt(image, boxes, txts, font_path,
  return np.array(img_show)
-def visualize(image_path, boxes, rec_res, font_path="resources/fonts/FZYTK.TTF"):
  image = Image.open(image_path)
- txts = [rec_res[i][0] for i in range(len(rec_res))]
- scores = [rec_res[i][1] for i in range(len(rec_res))]
  draw_img = draw_ocr_box_txt(image, boxes,
  txts, font_path,
@@ -96,18 +91,23 @@ def visualize(image_path, boxes, rec_res, font_path="resources/fonts/FZYTK.TTF")
 def inference(img, box_thresh, unclip_ratio, text_score):
  img_path = img.name
  img = cv2.imread(img_path)
- dt_boxes, rec_res = text_sys(img,
- box_thresh=box_thresh,
- unclip_ratio=unclip_ratio,
- text_score=text_score)
- img_save_path = visualize(img_path, dt_boxes, rec_res)
- return img_save_path, rec_res
-title = 'Rapid🗲OCR Demo (捷智OCR)'
 description = 'Gradio demo for RapidOCR. Github Repo: https://github.com/RapidAI/RapidOCR'
 article = "<p style='text-align: center'> Completely open source, free and support offline deployment of multi-platform and multi-language OCR SDK <a href='https://github.com/RapidAI/RapidOCR'>Github Repo</a></p>"
 css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
 gr.Interface(
  inference,
  inputs=[

 import math
 import random
 import time
+from pathlib import Path
 import cv2
 import gradio as gr
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
+from rapidocr_onnxruntime import RapidOCR
 def draw_ocr_box_txt(image, boxes, txts, font_path,
  scores=None, text_score=0.5):
  h, w = image.height, image.width
  img_left = image.copy()
  img_right = Image.new('RGB', (w, h), (255, 255, 255))
  draw_left = ImageDraw.Draw(img_left)
  draw_right = ImageDraw.Draw(img_right)
  for idx, (box, txt) in enumerate(zip(boxes, txts)):
+ if scores is not None and float(scores[idx]) < text_score:
  continue
  color = (random.randint(0, 255),
  random.randint(0, 255),
  random.randint(0, 255))
+ box = [tuple(v) for v in box]
  draw_left.polygon(box, fill=color)
  draw_right.polygon([box[0][0], box[0][1],
  box[1][0], box[1][1],
  return np.array(img_show)
+def visualize(image_path, boxes, txts, scores,
+ font_path="./FZYTK.TTF"):
  image = Image.open(image_path)
  draw_img = draw_ocr_box_txt(image, boxes,
  txts, font_path,
 def inference(img, box_thresh, unclip_ratio, text_score):
  img_path = img.name
  img = cv2.imread(img_path)
+ ocr_result, _ = rapid_ocr(img, box_thresh=box_thresh,
+ unclip_ratio=unclip_ratio,
+ text_score=text_score)
+ dt_boxes, rec_res, scores = list(zip(*ocr_result))
+ img_save_path = visualize(img_path, dt_boxes, rec_res, scores)
+ output_text = [f'{one_rec} {float(score):.4f}'
+ for one_rec, score in zip(rec_res, scores)]
+ return img_save_path, output_text
+title = 'RapidOCR Demo (捷智OCR)'
 description = 'Gradio demo for RapidOCR. Github Repo: https://github.com/RapidAI/RapidOCR'
 article = "<p style='text-align: center'> Completely open source, free and support offline deployment of multi-platform and multi-language OCR SDK <a href='https://github.com/RapidAI/RapidOCR'>Github Repo</a></p>"
 css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
+rapid_ocr = RapidOCR()
 gr.Interface(
  inference,
  inputs=[

config.yaml DELETED Viewed

@@ -1,72 +0,0 @@
-Global:
- text_score: 0.5
- use_angle_cls: true
- print_verbose: true
- min_height: 30
- width_height_ratio: 8
-Det:
- module_name: ch_ppocr_v3_det
- class_name: TextDetector
- model_path: resources/models/ch_PP-OCRv3_det_infer.onnx
- use_cuda: false
- # Details of the params: https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html
- CUDAExecutionProvider:
- device_id: 0
- arena_extend_strategy: kNextPowerOfTwo
- cudnn_conv_algo_search: EXHAUSTIVE
- do_copy_in_default_stream: true
- pre_process:
- DetResizeForTest:
- limit_side_len: 736
- limit_type: min
- NormalizeImage:
- std: [0.229, 0.224, 0.225]
- mean: [0.485, 0.456, 0.406]
- scale: 1./255.
- order: hwc
- ToCHWImage:
- KeepKeys:
- keep_keys: ['image', 'shape']
- post_process:
- thresh: 0.3
- box_thresh: 0.5
- max_candidates: 1000
- unclip_ratio: 1.6
- use_dilation: true
- score_mode: fast
-Cls:
- module_name: ch_ppocr_v2_cls
- class_name: TextClassifier
- model_path: resources/models/ch_ppocr_mobile_v2.0_cls_infer.onnx
- use_cuda: false
- CUDAExecutionProvider:
- device_id: 0
- arena_extend_strategy: kNextPowerOfTwo
- cudnn_conv_algo_search: EXHAUSTIVE
- do_copy_in_default_stream: true
- cls_image_shape: [3, 48, 192]
- cls_batch_num: 6
- cls_thresh: 0.9
- label_list: ['0', '180']
-Rec:
- module_name: ch_ppocr_v3_rec
- class_name: TextRecognizer
- model_path: resources/models/ch_PP-OCRv3_rec_infer.onnx
- use_cuda: false
- CUDAExecutionProvider:
- device_id: 0
- arena_extend_strategy: kNextPowerOfTwo
- cudnn_conv_algo_search: EXHAUSTIVE
- do_copy_in_default_stream: true
- rec_img_shape: [3, 48, 320]
- rec_batch_num: 6

rapidocr_onnxruntime/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-# -*- encoding: utf-8 -*-
-# @Author: SWHL
-# @Contact: [email protected]
-from .rapid_ocr_api import TextSystem

rapidocr_onnxruntime/ch_ppocr_v2_cls/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-# -*- encoding: utf-8 -*-
-# @Author: SWHL
-# @Contact: [email protected]
-from .text_cls import TextClassifier

rapidocr_onnxruntime/ch_ppocr_v2_cls/config.yaml DELETED Viewed

@@ -1,14 +0,0 @@
-model_path: resources/models/ch_ppocr_mobile_v2.0_cls_infer.onnx
-use_cuda: false
-# Details of the params: https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html
-CUDAExecutionProvider:
- device_id: 0
- arena_extend_strategy: kNextPowerOfTwo
- cudnn_conv_algo_search: EXHAUSTIVE
- do_copy_in_default_stream: true
-cls_image_shape: [3, 48, 192]
-cls_batch_num: 6
-cls_thresh: 0.9
-label_list: ['0', '180']

rapidocr_onnxruntime/ch_ppocr_v2_cls/text_cls.py DELETED Viewed

@@ -1,117 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-import copy
-import math
-import time
-from typing import List
-import cv2
-import numpy as np
-try:
- from .utils import ClsPostProcess, read_yaml, OrtInferSession
-except:
- from utils import ClsPostProcess, read_yaml, OrtInferSession
-class TextClassifier(object):
- def __init__(self, config):
- self.cls_image_shape = config['cls_image_shape']
- self.cls_batch_num = config['cls_batch_num']
- self.cls_thresh = config['cls_thresh']
- self.postprocess_op = ClsPostProcess(config['label_list'])
- session_instance = OrtInferSession(config)
- self.session = session_instance.session
- self.input_name = session_instance.get_input_name()
- def __call__(self, img_list: List[np.ndarray]):
- if isinstance(img_list, np.ndarray):
- img_list = [img_list]
- img_list = copy.deepcopy(img_list)
- # Calculate the aspect ratio of all text bars
- width_list = [img.shape[1] / float(img.shape[0]) for img in img_list]
- # Sorting can speed up the cls process
- indices = np.argsort(np.array(width_list))
- img_num = len(img_list)
- cls_res = [['', 0.0]] * img_num
- batch_num = self.cls_batch_num
- elapse = 0
- for beg_img_no in range(0, img_num, batch_num):
- end_img_no = min(img_num, beg_img_no + batch_num)
- norm_img_batch = []
- for ino in range(beg_img_no, end_img_no):
- norm_img = self.resize_norm_img(img_list[indices[ino]])
- norm_img = norm_img[np.newaxis, :]
- norm_img_batch.append(norm_img)
- norm_img_batch = np.concatenate(norm_img_batch).astype(np.float32)
- starttime = time.time()
- onnx_inputs = {self.input_name: norm_img_batch}
- prob_out = self.session.run(None, onnx_inputs)[0]
- cls_result = self.postprocess_op(prob_out)
- elapse += time.time() - starttime
- for rno in range(len(cls_result)):
- label, score = cls_result[rno]
- cls_res[indices[beg_img_no + rno]] = [label, score]
- if '180' in label and score > self.cls_thresh:
- img_list[indices[beg_img_no + rno]] = cv2.rotate(
- img_list[indices[beg_img_no + rno]], 1)
- return img_list, cls_res, elapse
- def resize_norm_img(self, img):
- img_c, img_h, img_w = self.cls_image_shape
- h, w = img.shape[:2]
- ratio = w / float(h)
- if math.ceil(img_h * ratio) > img_w:
- resized_w = img_w
- else:
- resized_w = int(math.ceil(img_h * ratio))
- resized_image = cv2.resize(img, (resized_w, img_h))
- resized_image = resized_image.astype('float32')
- if img_c == 1:
- resized_image = resized_image / 255
- resized_image = resized_image[np.newaxis, :]
- else:
- resized_image = resized_image.transpose((2, 0, 1)) / 255
- resized_image -= 0.5
- resized_image /= 0.5
- padding_im = np.zeros((img_c, img_h, img_w), dtype=np.float32)
- padding_im[:, :, :resized_w] = resized_image
- return padding_im
-if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument('--image_path', type=str, help='image_dir|image_path')
- parser.add_argument('--config_path', type=str, default='config.yaml')
- args = parser.parse_args()
- config = read_yaml(args.config_path)
- text_classifier = TextClassifier(config)
- img = cv2.imread(args.image_path)
- img_list, cls_res, predict_time = text_classifier(img)
- for ino in range(len(img_list)):
- print(f"cls result:{cls_res[ino]}")

rapidocr_onnxruntime/ch_ppocr_v2_cls/utils.py DELETED Viewed

@@ -1,80 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import warnings
-import yaml
-from onnxruntime import (get_available_providers, get_device,
- SessionOptions, InferenceSession,
- GraphOptimizationLevel)
-class OrtInferSession(object):
- def __init__(self, config):
- sess_opt = SessionOptions()
- sess_opt.log_severity_level = 4
- sess_opt.enable_cpu_mem_arena = False
- sess_opt.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
- cuda_ep = 'CUDAExecutionProvider'
- cpu_ep = 'CPUExecutionProvider'
- cpu_provider_options = {
- "arena_extend_strategy": "kSameAsRequested",
- }
- EP_list = []
- if config['use_cuda'] and get_device() == 'GPU' \
- and cuda_ep in get_available_providers():
- EP_list = [(cuda_ep, config[cuda_ep])]
- EP_list.append((cpu_ep, cpu_provider_options))
- self.session = InferenceSession(config['model_path'],
- sess_options=sess_opt,
- providers=EP_list)
- if config['use_cuda'] and cuda_ep not in self.session.get_providers():
- warnings.warn(f'{cuda_ep} is not avaiable for current env, the inference part is automatically shifted to be executed under {cpu_ep}.\n'
- 'Please ensure the installed onnxruntime-gpu version matches your cuda and cudnn version, '
- 'you can check their relations from the offical web site: '
- 'https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html',
- RuntimeWarning)
- def get_input_name(self, input_idx=0):
- return self.session.get_inputs()[input_idx].name
- def get_output_name(self, output_idx=0):
- return self.session.get_outputs()[output_idx].name
-def read_yaml(yaml_path):
- with open(yaml_path, 'rb') as f:
- data = yaml.load(f, Loader=yaml.Loader)
- return data
-class ClsPostProcess(object):
- """ Convert between text-label and text-index """
- def __init__(self, label_list):
- super(ClsPostProcess, self).__init__()
- self.label_list = label_list
- def __call__(self, preds, label=None):
- pred_idxs = preds.argmax(axis=1)
- decode_out = [(self.label_list[idx], preds[i, idx])
- for i, idx in enumerate(pred_idxs)]
- if label is None:
- return decode_out
- label = [(self.label_list[idx], 1.0) for idx in label]
- return decode_out, label

rapidocr_onnxruntime/ch_ppocr_v3_det/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-# -*- encoding: utf-8 -*-
-# @Author: SWHL
-# @Contact: [email protected]
-from .text_detect import TextDetector

rapidocr_onnxruntime/ch_ppocr_v3_det/config.yaml DELETED Viewed

@@ -1,29 +0,0 @@
-model_path: resources/models/ch_PP-OCRv3_det_infer.onnx
-use_cuda: false
-CUDAExecutionProvider:
- device_id: 0
- arena_extend_strategy: kNextPowerOfTwo
- cudnn_conv_algo_search: EXHAUSTIVE
- do_copy_in_default_stream: true
-pre_process:
- DetResizeForTest:
- limit_side_len: 736
- limit_type: min
- NormalizeImage:
- std: [0.229, 0.224, 0.225]
- mean: [0.485, 0.456, 0.406]
- scale: 1./255.
- order: hwc
- ToCHWImage:
- KeepKeys:
- keep_keys: ['image', 'shape']
-post_process:
- thresh: 0.3
- box_thresh: 0.5
- max_candidates: 1000
- unclip_ratio: 1.6
- use_dilation: true
- score_mode: "fast"

rapidocr_onnxruntime/ch_ppocr_v3_det/text_detect.py DELETED Viewed

@@ -1,127 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# -*- encoding: utf-8 -*-
-# @Author: SWHL
-# @Contact: [email protected]
-import argparse
-import time
-import cv2
-import numpy as np
-try:
- from .utils import (DBPostProcess, create_operators,
- transform, read_yaml, OrtInferSession)
-except:
- from utils import (DBPostProcess, create_operators,
- transform, read_yaml, OrtInferSession)
-class TextDetector(object):
- def __init__(self, config):
- self.preprocess_op = create_operators(config['pre_process'])
- self.postprocess_op = DBPostProcess(**config['post_process'])
- session_instance = OrtInferSession(config)
- self.session = session_instance.session
- self.input_name = session_instance.get_input_name()
- def __call__(self, img):
- if img is None:
- raise ValueError('img is None')
- ori_im_shape = img.shape[:2]
- data = {'image': img}
- data = transform(data, self.preprocess_op)
- img, shape_list = data
- if img is None:
- return None, 0
- img = np.expand_dims(img, axis=0).astype(np.float32)
- shape_list = np.expand_dims(shape_list, axis=0)
- starttime = time.time()
- preds = self.session.run(None, {self.input_name: img})
- post_result = self.postprocess_op(preds[0], shape_list)
- dt_boxes = post_result[0]['points']
- dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im_shape)
- elapse = time.time() - starttime
- return dt_boxes, elapse
- def order_points_clockwise(self, pts):
- """
- reference from:
- https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
- sort the points based on their x-coordinates
- """
- xSorted = pts[np.argsort(pts[:, 0]), :]
- # grab the left-most and right-most points from the sorted
- # x-roodinate points
- leftMost = xSorted[:2, :]
- rightMost = xSorted[2:, :]
- # now, sort the left-most coordinates according to their
- # y-coordinates so we can grab the top-left and bottom-left
- # points, respectively
- leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
- (tl, bl) = leftMost
- rightMost = rightMost[np.argsort(rightMost[:, 1]), :]
- (tr, br) = rightMost
- rect = np.array([tl, tr, br, bl], dtype="float32")
- return rect
- def clip_det_res(self, points, img_height, img_width):
- for pno in range(points.shape[0]):
- points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1))
- points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1))
- return points
- def filter_tag_det_res(self, dt_boxes, image_shape):
- img_height, img_width = image_shape[:2]
- dt_boxes_new = []
- for box in dt_boxes:
- box = self.order_points_clockwise(box)
- box = self.clip_det_res(box, img_height, img_width)
- rect_width = int(np.linalg.norm(box[0] - box[1]))
- rect_height = int(np.linalg.norm(box[0] - box[3]))
- if rect_width <= 3 or rect_height <= 3:
- continue
- dt_boxes_new.append(box)
- dt_boxes = np.array(dt_boxes_new)
- return dt_boxes
-if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument('--config_path', type=str, default='config.yaml')
- parser.add_argument('--image_path', type=str, default=None)
- args = parser.parse_args()
- config = read_yaml(args.config_path)
- text_detector = TextDetector(config)
- img = cv2.imread(args.image_path)
- dt_boxes, elapse = text_detector(img)
- from utils import draw_text_det_res
- src_im = draw_text_det_res(dt_boxes, args.image_path)
- cv2.imwrite('det_results.jpg', src_im)
- print('The det_results.jpg has been saved in the current directory.')

rapidocr_onnxruntime/ch_ppocr_v3_det/utils.py DELETED Viewed

@@ -1,452 +0,0 @@
-"""
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-# -*- encoding: utf-8 -*-
-# @Author: SWHL
-# @Contact: [email protected]
-import sys
-import warnings
-import cv2
-import numpy as np
-import pyclipper
-import six
-import yaml
-from shapely.geometry import Polygon
-from onnxruntime import (get_available_providers, get_device,
- SessionOptions, InferenceSession,
- GraphOptimizationLevel)
-class OrtInferSession(object):
- def __init__(self, config):
- sess_opt = SessionOptions()
- sess_opt.log_severity_level = 4
- sess_opt.enable_cpu_mem_arena = False
- sess_opt.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
- cuda_ep = 'CUDAExecutionProvider'
- cpu_ep = 'CPUExecutionProvider'
- cpu_provider_options = {
- "arena_extend_strategy": "kSameAsRequested",
- }
- EP_list = []
- if config['use_cuda'] and get_device() == 'GPU' \
- and cuda_ep in get_available_providers():
- EP_list = [(cuda_ep, config[cuda_ep])]
- EP_list.append((cpu_ep, cpu_provider_options))
- self.session = InferenceSession(config['model_path'],
- sess_options=sess_opt,
- providers=EP_list)
- if config['use_cuda'] and cuda_ep not in self.session.get_providers():
- warnings.warn(f'{cuda_ep} is not avaiable for current env, the inference part is automatically shifted to be executed under {cpu_ep}.\n'
- 'Please ensure the installed onnxruntime-gpu version matches your cuda and cudnn version, '
- 'you can check their relations from the offical web site: '
- 'https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html',
- RuntimeWarning)
- def get_input_name(self, input_idx=0):
- return self.session.get_inputs()[input_idx].name
- def get_output_name(self, output_idx=0):
- return self.session.get_outputs()[output_idx].name
-def read_yaml(yaml_path):
- with open(yaml_path, 'rb') as f:
- data = yaml.load(f, Loader=yaml.Loader)
- return data
-class DecodeImage(object):
- """ decode image """
- def __init__(self, img_mode='RGB', channel_first=False):
- self.img_mode = img_mode
- self.channel_first = channel_first
- def __call__(self, data):
- img = data['image']
- if six.PY2:
- assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage"
- else:
- assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage"
- img = np.frombuffer(img, dtype='uint8')
- img = cv2.imdecode(img, 1)
- if img is None:
- return None
- if self.img_mode == 'GRAY':
- img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
- elif self.img_mode == 'RGB':
- assert img.shape[2] == 3, f'invalid shape of image[{img.shape}]'
- img = img[:, :, ::-1]
- if self.channel_first:
- img = img.transpose((2, 0, 1))
- data['image'] = img
- return data
-class NormalizeImage(object):
- """ normalize image such as substract mean, divide std"""
- def __init__(self, scale=None, mean=None, std=None, order='chw'):
- if isinstance(scale, str):
- scale = eval(scale)
- self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
- mean = mean if mean is not None else [0.485, 0.456, 0.406]
- std = std if std is not None else [0.229, 0.224, 0.225]
- shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
- self.mean = np.array(mean).reshape(shape).astype('float32')
- self.std = np.array(std).reshape(shape).astype('float32')
- def __call__(self, data):
- img = np.array(data['image']).astype(np.float32)
- data['image'] = (img * self.scale - self.mean) / self.std
- return data
-class ToCHWImage(object):
- """ convert hwc image to chw image"""
- def __init__(self):
- pass
- def __call__(self, data):
- img = data['image']
- from PIL import Image
- if isinstance(img, Image.Image):
- img = np.array(img)
- data['image'] = img.transpose((2, 0, 1))
- return data
-class KeepKeys(object):
- def __init__(self, keep_keys):
- self.keep_keys = keep_keys
- def __call__(self, data):
- data_list = []
- for key in self.keep_keys:
- data_list.append(data[key])
- return data_list
-class DetResizeForTest(object):
- def __init__(self, **kwargs):
- super(DetResizeForTest, self).__init__()
- self.resize_type = 0
- if 'image_shape' in kwargs:
- self.image_shape = kwargs['image_shape']
- self.resize_type = 1
- elif 'limit_side_len' in kwargs:
- self.limit_side_len = kwargs.get('limit_side_len', 736)
- self.limit_type = kwargs.get('limit_type', 'min')
- if 'resize_long' in kwargs:
- self.resize_type = 2
- self.resize_long = kwargs.get('resize_long', 960)
- else:
- self.limit_side_len = kwargs.get('limit_side_len', 736)
- self.limit_type = kwargs.get('limit_type', 'min')
- def __call__(self, data):
- img = data['image']
- src_h, src_w = img.shape[:2]
- if self.resize_type == 0:
- # img, shape = self.resize_image_type0(img)
- img, [ratio_h, ratio_w] = self.resize_image_type0(img)
- elif self.resize_type == 2:
- img, [ratio_h, ratio_w] = self.resize_image_type2(img)
- else:
- # img, shape = self.resize_image_type1(img)
- img, [ratio_h, ratio_w] = self.resize_image_type1(img)
- data['image'] = img
- data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
- return data
- def resize_image_type1(self, img):
- resize_h, resize_w = self.image_shape
- ori_h, ori_w = img.shape[:2] # (h, w, c)
- ratio_h = float(resize_h) / ori_h
- ratio_w = float(resize_w) / ori_w
- img = cv2.resize(img, (int(resize_w), int(resize_h)))
- # return img, np.array([ori_h, ori_w])
- return img, [ratio_h, ratio_w]
- def resize_image_type0(self, img):
- """
- resize image to a size multiple of 32 which is required by the network
- args:
- img(array): array with shape [h, w, c]
- return(tuple):
- img, (ratio_h, ratio_w)
- """
- limit_side_len = self.limit_side_len
- h, w = img.shape[:2]
- # limit the max side
- if self.limit_type == 'max':
- if max(h, w) > limit_side_len:
- if h > w:
- ratio = float(limit_side_len) / h
- else:
- ratio = float(limit_side_len) / w
- else:
- ratio = 1.
- else:
- if min(h, w) < limit_side_len:
- if h < w:
- ratio = float(limit_side_len) / h
- else:
- ratio = float(limit_side_len) / w
- else:
- ratio = 1.
- resize_h = int(h * ratio)
- resize_w = int(w * ratio)
- resize_h = int(round(resize_h / 32) * 32)
- resize_w = int(round(resize_w / 32) * 32)
- try:
- if int(resize_w) <= 0 or int(resize_h) <= 0:
- return None, (None, None)
- img = cv2.resize(img, (int(resize_w), int(resize_h)))
- except:
- print(img.shape, resize_w, resize_h)
- sys.exit(0)
- ratio_h = resize_h / float(h)
- ratio_w = resize_w / float(w)
- return img, [ratio_h, ratio_w]
- def resize_image_type2(self, img):
- h, w = img.shape[:2]
- resize_w = w
- resize_h = h
- # Fix the longer side
- if resize_h > resize_w:
- ratio = float(self.resize_long) / resize_h
- else:
- ratio = float(self.resize_long) / resize_w
- resize_h = int(resize_h * ratio)
- resize_w = int(resize_w * ratio)
- max_stride = 128
- resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
- resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
- img = cv2.resize(img, (int(resize_w), int(resize_h)))
- ratio_h = resize_h / float(h)
- ratio_w = resize_w / float(w)
- return img, [ratio_h, ratio_w]
-def transform(data, ops=None):
- """ transform """
- if ops is None:
- ops = []
- for op in ops:
- data = op(data)
- if data is None:
- return None
- return data
-def create_operators(op_param_dict):
- """
- create operators based on the config
- """
- ops = []
- for op_name, param in op_param_dict.items():
- if param is None:
- param = {}
- op = eval(op_name)(**param)
- ops.append(op)
- return ops
-def draw_text_det_res(dt_boxes, img_path):
- src_im = cv2.imread(img_path)
- for box in dt_boxes:
- box = np.array(box).astype(np.int32).reshape(-1, 2)
- cv2.polylines(src_im, [box], True,
- color=(255, 255, 0), thickness=2)
- return src_im
-class DBPostProcess(object):
- """The post process for Differentiable Binarization (DB)."""
- def __init__(self,
- thresh=0.3,
- box_thresh=0.7,
- max_candidates=1000,
- unclip_ratio=2.0,
- score_mode="fast",
- use_dilation=False):
- self.thresh = thresh
- self.box_thresh = box_thresh
- self.max_candidates = max_candidates
- self.unclip_ratio = unclip_ratio
- self.min_size = 3
- self.score_mode = score_mode
- if use_dilation:
- self.dilation_kernel = np.array([[1, 1], [1, 1]])
- else:
- self.dilation_kernel = None
- def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
- '''
- _bitmap: single map with shape (1, H, W),
- whose values are binarized as {0, 1}
- '''
- bitmap = _bitmap
- height, width = bitmap.shape
- outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
- cv2.CHAIN_APPROX_SIMPLE)
- if len(outs) == 3:
- img, contours, _ = outs[0], outs[1], outs[2]
- elif len(outs) == 2:
- contours, _ = outs[0], outs[1]
- num_contours = min(len(contours), self.max_candidates)
- boxes = []
- scores = []
- for index in range(num_contours):
- contour = contours[index]
- points, sside = self.get_mini_boxes(contour)
- if sside < self.min_size:
- continue
- points = np.array(points)
- if self.score_mode == "fast":
- score = self.box_score_fast(pred, points.reshape(-1, 2))
- else:
- score = self.box_score_slow(pred, contour)
- if self.box_thresh > score:
- continue
- box = self.unclip(points).reshape(-1, 1, 2)
- box, sside = self.get_mini_boxes(box)
- if sside < self.min_size + 2:
- continue
- box = np.array(box)
- box[:, 0] = np.clip(
- np.round(box[:, 0] / width * dest_width), 0, dest_width)
- box[:, 1] = np.clip(
- np.round(box[:, 1] / height * dest_height), 0, dest_height)
- boxes.append(box.astype(np.int16))
- scores.append(score)
- return np.array(boxes, dtype=np.int16), scores
- def unclip(self, box):
- unclip_ratio = self.unclip_ratio
- poly = Polygon(box)
- distance = poly.area * unclip_ratio / poly.length
- offset = pyclipper.PyclipperOffset()
- offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
- expanded = np.array(offset.Execute(distance))
- return expanded
- def get_mini_boxes(self, contour):
- bounding_box = cv2.minAreaRect(contour)
- points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
- index_1, index_2, index_3, index_4 = 0, 1, 2, 3
- if points[1][1] > points[0][1]:
- index_1 = 0
- index_4 = 1
- else:
- index_1 = 1
- index_4 = 0
- if points[3][1] > points[2][1]:
- index_2 = 2
- index_3 = 3
- else:
- index_2 = 3
- index_3 = 2
- box = [
- points[index_1], points[index_2], points[index_3], points[index_4]
- ]
- return box, min(bounding_box[1])
- def box_score_fast(self, bitmap, _box):
- h, w = bitmap.shape[:2]
- box = _box.copy()
- xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int32), 0, w - 1)
- xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int32), 0, w - 1)
- ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int32), 0, h - 1)
- ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int32), 0, h - 1)
- mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
- box[:, 0] = box[:, 0] - xmin
- box[:, 1] = box[:, 1] - ymin
- cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
- return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
- def box_score_slow(self, bitmap, contour):
- '''
- box_score_slow: use polyon mean score as the mean score
- '''
- h, w = bitmap.shape[:2]
- contour = contour.copy()
- contour = np.reshape(contour, (-1, 2))
- xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
- xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
- ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
- ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
- mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
- contour[:, 0] = contour[:, 0] - xmin
- contour[:, 1] = contour[:, 1] - ymin
- cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1)
- return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
- def __call__(self, pred, shape_list):
- pred = pred[:, 0, :, :]
- segmentation = pred > self.thresh
- boxes_batch = []
- for batch_index in range(pred.shape[0]):
- src_h, src_w, ratio_h, ratio_w = shape_list[batch_index]
- if self.dilation_kernel is not None:
- mask = cv2.dilate(
- np.array(segmentation[batch_index]).astype(np.uint8),
- self.dilation_kernel)
- else:
- mask = segmentation[batch_index]
- boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
- src_w, src_h)
- boxes_batch.append({'points': boxes})
- return boxes_batch

rapidocr_onnxruntime/ch_ppocr_v3_rec/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-# -*- encoding: utf-8 -*-
-# @Author: SWHL
-# @Contact: [email protected]
-from .text_recognize import TextRecognizer

rapidocr_onnxruntime/ch_ppocr_v3_rec/config.yaml DELETED Viewed

@@ -1,12 +0,0 @@
-model_path: resources/models/ch_PP-OCRv3_rec_infer.onnx
-use_cuda: false
-# Details of the params: https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html
-CUDAExecutionProvider:
- device_id: 0
- arena_extend_strategy: kNextPowerOfTwo
- cudnn_conv_algo_search: EXHAUSTIVE
- do_copy_in_default_stream: true
-rec_img_shape: [3, 48, 320]
-rec_batch_num: 6

rapidocr_onnxruntime/ch_ppocr_v3_rec/text_recognize.py DELETED Viewed

@@ -1,120 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-import math
-import time
-from typing import List
-import cv2
-import numpy as np
-try:
- from .utils import CTCLabelDecode, read_yaml, OrtInferSession
-except:
- from utils import CTCLabelDecode, read_yaml, OrtInferSession
-class TextRecognizer(object):
- def __init__(self, config):
- session_instance = OrtInferSession(config)
- self.session = session_instance.session
- self.input_name = session_instance.get_input_name()
- meta_dict = session_instance.get_metadata()
- if 'character' in meta_dict.keys():
- self.character_dict_path = meta_dict['character'].splitlines()
- else:
- self.character_dict_path = config.get('keys_path', None)
- self.postprocess_op = CTCLabelDecode(self.character_dict_path)
- self.rec_batch_num = config['rec_batch_num']
- self.rec_image_shape = config['rec_img_shape']
- def __call__(self, img_list: List[np.ndarray]):
- if isinstance(img_list, np.ndarray):
- img_list = [img_list]
- # Calculate the aspect ratio of all text bars
- width_list = [img.shape[1] / float(img.shape[0]) for img in img_list]
- # Sorting can speed up the recognition process
- indices = np.argsort(np.array(width_list))
- img_num = len(img_list)
- rec_res = [['', 0.0]] * img_num
- batch_num = self.rec_batch_num
- elapse = 0
- for beg_img_no in range(0, img_num, batch_num):
- end_img_no = min(img_num, beg_img_no + batch_num)
- max_wh_ratio = 0
- for ino in range(beg_img_no, end_img_no):
- h, w = img_list[indices[ino]].shape[0:2]
- wh_ratio = w * 1.0 / h
- max_wh_ratio = max(max_wh_ratio, wh_ratio)
- norm_img_batch = []
- for ino in range(beg_img_no, end_img_no):
- norm_img = self.resize_norm_img(img_list[indices[ino]],
- max_wh_ratio)
- norm_img_batch.append(norm_img[np.newaxis, :])
- norm_img_batch = np.concatenate(norm_img_batch).astype(np.float32)
- starttime = time.time()
- onnx_inputs = {self.input_name: norm_img_batch}
- preds = self.session.run(None, onnx_inputs)[0]
- rec_result = self.postprocess_op(preds)
- for rno in range(len(rec_result)):
- rec_res[indices[beg_img_no + rno]] = rec_result[rno]
- elapse += time.time() - starttime
- return rec_res, elapse
- def resize_norm_img(self, img, max_wh_ratio):
- img_channel, img_height, img_width = self.rec_image_shape
- assert img_channel == img.shape[2]
- img_width = int(img_height * max_wh_ratio)
- h, w = img.shape[:2]
- ratio = w / float(h)
- if math.ceil(img_height * ratio) > img_width:
- resized_w = img_width
- else:
- resized_w = int(math.ceil(img_height * ratio))
- resized_image = cv2.resize(img, (resized_w, img_height))
- resized_image = resized_image.astype('float32')
- resized_image = resized_image.transpose((2, 0, 1)) / 255
- resized_image -= 0.5
- resized_image /= 0.5
- padding_im = np.zeros((img_channel, img_height, img_width),
- dtype=np.float32)
- padding_im[:, :, 0:resized_w] = resized_image
- return padding_im
-if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument('--image_path', type=str, help='image_dir|image_path')
- parser.add_argument('--config_path', type=str, default='config.yaml')
- args = parser.parse_args()
- config = read_yaml(args.config_path)
- text_recognizer = TextRecognizer(config)
- img = cv2.imread(args.image_path)
- rec_res, predict_time = text_recognizer(img)
- print(f'rec result: {rec_res}\t cost: {predict_time}s')

rapidocr_onnxruntime/ch_ppocr_v3_rec/utils.py DELETED Viewed

@@ -1,128 +0,0 @@
-# -*- encoding: utf-8 -*-
-# @Author: SWHL
-# @Contact: [email protected]
-import warnings
-import numpy as np
-import yaml
-from onnxruntime import (get_available_providers, get_device,
- SessionOptions, InferenceSession,
- GraphOptimizationLevel)
-class OrtInferSession(object):
- def __init__(self, config):
- sess_opt = SessionOptions()
- sess_opt.log_severity_level = 4
- sess_opt.enable_cpu_mem_arena = False
- sess_opt.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
- cuda_ep = 'CUDAExecutionProvider'
- cpu_ep = 'CPUExecutionProvider'
- cpu_provider_options = {
- "arena_extend_strategy": "kSameAsRequested",
- }
- EP_list = []
- if config['use_cuda'] and get_device() == 'GPU' \
- and cuda_ep in get_available_providers():
- EP_list = [(cuda_ep, config[cuda_ep])]
- EP_list.append((cpu_ep, cpu_provider_options))
- self.session = InferenceSession(config['model_path'],
- sess_options=sess_opt,
- providers=EP_list)
- if config['use_cuda'] and cuda_ep not in self.session.get_providers():
- warnings.warn(f'{cuda_ep} is not avaiable for current env, the inference part is automatically shifted to be executed under {cpu_ep}.\n'
- 'Please ensure the installed onnxruntime-gpu version matches your cuda and cudnn version, '
- 'you can check their relations from the offical web site: '
- 'https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html',
- RuntimeWarning)
- def get_input_name(self, input_idx=0):
- return self.session.get_inputs()[input_idx].name
- def get_output_name(self, output_idx=0):
- return self.session.get_outputs()[output_idx].name
- def get_metadata(self):
- meta_dict = self.session.get_modelmeta().custom_metadata_map
- return meta_dict
-def read_yaml(yaml_path):
- with open(yaml_path, 'rb') as f:
- data = yaml.load(f, Loader=yaml.Loader)
- return data
-class CTCLabelDecode(object):
- """ Convert between text-label and text-index """
- def __init__(self, character_dict_path):
- super(CTCLabelDecode, self).__init__()
- self.character_str = []
- assert character_dict_path is not None, "character_dict_path should not be None"
- if isinstance(character_dict_path, str):
- with open(character_dict_path, "rb") as fin:
- lines = fin.readlines()
- for line in lines:
- line = line.decode('utf-8').strip("\n").strip("\r\n")
- self.character_str.append(line)
- else:
- self.character_str = character_dict_path
- self.character_str.append(' ')
- dict_character = self.add_special_char(self.character_str)
- self.character = dict_character
- self.dict = {}
- for i, char in enumerate(dict_character):
- self.dict[char] = i
- def __call__(self, preds, label=None):
- preds_idx = preds.argmax(axis=2)
- preds_prob = preds.max(axis=2)
- text = self.decode(preds_idx, preds_prob,
- is_remove_duplicate=True)
- if label is None:
- return text
- label = self.decode(label)
- return text, label
- def add_special_char(self, dict_character):
- dict_character = ['blank'] + dict_character
- return dict_character
- def get_ignored_tokens(self):
- return [0] # for ctc blank
- def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
- """ convert text-index into text-label. """
- result_list = []
- ignored_tokens = self.get_ignored_tokens()
- batch_size = len(text_index)
- for batch_idx in range(batch_size):
- char_list = []
- conf_list = []
- for idx in range(len(text_index[batch_idx])):
- if text_index[batch_idx][idx] in ignored_tokens:
- continue
- if is_remove_duplicate:
- # only for predict
- if idx > 0 and text_index[batch_idx][idx - 1] == text_index[
- batch_idx][idx]:
- continue
- char_list.append(self.character[int(text_index[batch_idx][
- idx])])
- if text_prob is not None:
- conf_list.append(text_prob[batch_idx][idx])
- else:
- conf_list.append(1)
- text = ''.join(char_list)
- result_list.append((text, np.mean(conf_list + [1e-50] )))
- return result_list

rapidocr_onnxruntime/rapid_ocr_api.py DELETED Viewed

@@ -1,176 +0,0 @@
-# -*- encoding: utf-8 -*-
-# @Author: SWHL
-# @Contact: [email protected]
-import copy
-import importlib
-import sys
-from pathlib import Path
-import cv2
-import numpy as np
-import yaml
-root_dir = Path(__file__).resolve().parent
-sys.path.append(str(root_dir))
-class TextSystem(object):
- def __init__(self, config_path):
- super(TextSystem).__init__()
- if not Path(config_path).exists():
- raise FileExistsError(f'{config_path} does not exist!')
- config = self.read_yaml(config_path)
- global_config = config['Global']
- self.print_verbose = global_config['print_verbose']
- self.text_score = global_config['text_score']
- self.min_height = global_config['min_height']
- self.width_height_ratio = global_config['width_height_ratio']
- TextDetector = self.init_module(config['Det']['module_name'],
- config['Det']['class_name'])
- self.text_detector = TextDetector(config['Det'])
- TextRecognizer = self.init_module(config['Rec']['module_name'],
- config['Rec']['class_name'])
- self.text_recognizer = TextRecognizer(config['Rec'])
- self.use_angle_cls = config['Global']['use_angle_cls']
- if self.use_angle_cls:
- TextClassifier = self.init_module(config['Cls']['module_name'],
- config['Cls']['class_name'])
- self.text_cls = TextClassifier(config['Cls'])
- def __call__(self, img: np.ndarray, **kwargs):
- # 这里更改几个超参数的值
- if kwargs:
- # 获得超参数
- box_thresh = kwargs.get('box_thresh', 0.5)
- unclip_ratio = kwargs.get('unclip_ratio', 1.6)
- text_score = kwargs.get('text_score', 0.5)
- # 更新超参数
- self.text_detector.postprocess_op.box_thresh = box_thresh
- self.text_detector.postprocess_op.unclip_ratio = unclip_ratio
- self.text_score = text_score
- h, w = img.shape[:2]
- if self.width_height_ratio == -1:
- use_limit_ratio = False
- else:
- use_limit_ratio = w / h > self.width_height_ratio
- if h <= self.min_height or use_limit_ratio:
- dt_boxes, img_crop_list = self.get_boxes_img_without_det(img, h, w)
- else:
- dt_boxes, elapse = self.text_detector(img)
- if dt_boxes is None or len(dt_boxes) < 1:
- return None, None
- if self.print_verbose:
- print(f'dt_boxes num: {len(dt_boxes)}, elapse: {elapse}')
- dt_boxes = self.sorted_boxes(dt_boxes)
- img_crop_list = self.get_crop_img_list(img, dt_boxes)
- if self.use_angle_cls:
- img_crop_list, _, elapse = self.text_cls(img_crop_list)
- if self.print_verbose:
- print(f'cls num: {len(img_crop_list)}, elapse: {elapse}')
- rec_res, elapse = self.text_recognizer(img_crop_list)
- if self.print_verbose:
- print(f'rec_res num: {len(rec_res)}, elapse: {elapse}')
- filter_boxes, filter_rec_res = self.filter_boxes_rec_by_score(dt_boxes,
- rec_res)
- return filter_boxes, filter_rec_res
- @staticmethod
- def read_yaml(yaml_path):
- with open(yaml_path, 'rb') as f:
- data = yaml.load(f, Loader=yaml.Loader)
- return data
- @staticmethod
- def init_module(module_name, class_name):
- module_part = importlib.import_module(module_name)
- return getattr(module_part, class_name)
- def get_boxes_img_without_det(self, img, h, w):
- x0, y0, x1, y1 = 0, 0, w, h
- dt_boxes = np.array([[x0, y0], [x1, y0], [x1, y1], [x0, y1]])
- dt_boxes = dt_boxes[np.newaxis, ...]
- img_crop_list = [img]
- return dt_boxes, img_crop_list
- def get_crop_img_list(self, img, dt_boxes):
- def get_rotate_crop_image(img, points):
- img_crop_width = int(
- max(
- np.linalg.norm(points[0] - points[1]),
- np.linalg.norm(points[2] - points[3])))
- img_crop_height = int(
- max(
- np.linalg.norm(points[0] - points[3]),
- np.linalg.norm(points[1] - points[2])))
- pts_std = np.float32([[0, 0], [img_crop_width, 0],
- [img_crop_width, img_crop_height],
- [0, img_crop_height]])
- M = cv2.getPerspectiveTransform(points, pts_std)
- dst_img = cv2.warpPerspective(
- img,
- M, (img_crop_width, img_crop_height),
- borderMode=cv2.BORDER_REPLICATE,
- flags=cv2.INTER_CUBIC)
- dst_img_height, dst_img_width = dst_img.shape[0:2]
- if dst_img_height * 1.0 / dst_img_width >= 1.5:
- dst_img = np.rot90(dst_img)
- return dst_img
- img_crop_list = []
- for box in dt_boxes:
- tmp_box = copy.deepcopy(box)
- img_crop = get_rotate_crop_image(img, tmp_box)
- img_crop_list.append(img_crop)
- return img_crop_list
- @staticmethod
- def sorted_boxes(dt_boxes):
- """
- Sort text boxes in order from top to bottom, left to right
- args:
- dt_boxes(array):detected text boxes with shape [4, 2]
- return:
- sorted boxes(array) with shape [4, 2]
- """
- num_boxes = dt_boxes.shape[0]
- sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
- _boxes = list(sorted_boxes)
- for i in range(num_boxes - 1):
- if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \
- (_boxes[i + 1][0][0] < _boxes[i][0][0]):
- tmp = _boxes[i]
- _boxes[i] = _boxes[i + 1]
- _boxes[i + 1] = tmp
- return _boxes
- def filter_boxes_rec_by_score(self, dt_boxes, rec_res):
- filter_boxes, filter_rec_res = [], []
- for box, rec_reuslt in zip(dt_boxes, rec_res):
- text, score = rec_reuslt
- if score >= self.text_score:
- filter_boxes.append(box)
- filter_rec_res.append(rec_reuslt)
- return filter_boxes, filter_rec_res
-if __name__ == '__main__':
- text_sys = TextSystem('config.yaml')
- import cv2
- img = cv2.imread('resources/test_images/det_images/ch_en_num.jpg')
- result = text_sys(img)
- print(result)

requirements.txt CHANGED Viewed

@@ -1,10 +1,3 @@
 Gradio
-pyclipper>=1.2.0
-Shapely>=1.7.1
-opencv_python>=4.5.1.48
-six>=1.15.0
-numpy>=1.19.5
 Pillow
-PyYAML
-pytest
-onnxruntime

 Gradio
 Pillow
+rapidocr_onnxruntime

resources/fonts/.gitkeep DELETED Viewed

File without changes

resources/models/.gitkeep DELETED Viewed

File without changes

resources/models/ch_PP-OCRv3_det_infer.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3439588c030faea393a54515f51e983d8e155b19a2e8aba7891934c1cf0de526
-size 2432880

resources/models/ch_PP-OCRv3_rec_infer.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:897a3ededb38fee0dae2c1ccee38241f37df202c9509e3abca02e9217c5ee615
-size 10690752

resources/models/ch_ppocr_mobile_v2.0_cls_infer.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e47acedf663230f8863ff1ab0e64dd2d82b838fceb5957146dab185a89d6215c
-size 585532