# -*- encoding: utf-8 -*- import os os.system('pip install -r requirements.txt') import math import random import time from pathlib import Path import cv2 import gradio as gr import numpy as np from PIL import Image, ImageDraw, ImageFont from rapidocr_onnxruntime import RapidOCR def draw_ocr_box_txt(image, boxes, txts, font_path, scores=None, text_score=0.5): h, w = image.height, image.width img_left = image.copy() img_right = Image.new('RGB', (w, h), (255, 255, 255)) random.seed(0) draw_left = ImageDraw.Draw(img_left) draw_right = ImageDraw.Draw(img_right) for idx, (box, txt) in enumerate(zip(boxes, txts)): if scores is not None and float(scores[idx]) < text_score: continue color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) box = [tuple(v) for v in box] draw_left.polygon(box, fill=color) draw_right.polygon([box[0][0], box[0][1], box[1][0], box[1][1], box[2][0], box[2][1], box[3][0], box[3][1]], outline=color) box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][1])**2) box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][1])**2) if box_height > 2 * box_width: font_size = max(int(box_width * 0.9), 10) font = ImageFont.truetype(font_path, font_size, encoding="utf-8") cur_y = box[0][1] for c in txt: char_size = font.getsize(c) draw_right.text((box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font) cur_y += char_size[1] else: font_size = max(int(box_height * 0.8), 10) font = ImageFont.truetype(font_path, font_size, encoding="utf-8") draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font) img_left = Image.blend(image, img_left, 0.5) img_show = Image.new('RGB', (w * 2, h), (255, 255, 255)) img_show.paste(img_left, (0, 0, w, h)) img_show.paste(img_right, (w, 0, w * 2, h)) return np.array(img_show) def visualize(image_path, boxes, txts, scores, font_path="./FZYTK.TTF"): image = Image.open(image_path) draw_img = draw_ocr_box_txt(image, boxes, txts, font_path, scores, text_score=0.5) draw_img_save = Path("./inference_results/") if not draw_img_save.exists(): draw_img_save.mkdir(parents=True, exist_ok=True) time_stamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) image_save = str(draw_img_save / f'{time_stamp}_{Path(image_path).name}') cv2.imwrite(image_save, draw_img[:, :, ::-1]) return image_save def inference(img_path, box_thresh=0.5, unclip_ratio=1.6, text_score=0.5): img = cv2.imread(img_path) ocr_result, _ = rapid_ocr(img, box_thresh=box_thresh, unclip_ratio=unclip_ratio, text_score=text_score) dt_boxes, rec_res, scores = list(zip(*ocr_result)) img_save_path = visualize(img_path, dt_boxes, rec_res, scores) output_text = [f'{one_rec} {float(score):.4f}' for one_rec, score in zip(rec_res, scores)] return img_save_path, output_text title = 'RapidOCR Demo (捷智OCR)' description = """

Docs: [Docs](https://rapidocr.rtfd.io/) Parameters docs: [link](https://github.com/RapidAI/RapidOCR/tree/main/python#configyaml%E4%B8%AD%E5%B8%B8%E7%94%A8%E5%8F%82%E6%95%B0%E4%BB%8B%E7%BB%8D) box_thresh: 检测到的框是文本的概率，值越大，框中是文本的概率就越大。存在漏检时，调低该值。取值范围：[0, 1.0] unclip_ratio: 控制文本检测框的大小，值越大，检测框整体越大。在出现框截断文字的情况，调大该值。取值范围：[1.5, 2.0] text_score: 文本识别结果是正确的置信度，值越大，显示出的识别结果更准确。存在漏检时，调低该值。取值范围：[0, 1.0] """ article = """

Completely open source, free and support offline deployment of multi-platform and multi-language OCR SDK Github Repo

""" css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}" examples = [['images/1.jpg']] rapid_ocr = RapidOCR() gr.Interface( inference, inputs=[ gr.inputs.Image(type='filepath', label='Input'), gr.Slider(minimum=0, maximum=1.0, value=0.5, label='box_thresh', step=0.1, info='检测到的框是文本的概率，值越大，框中是文本的概率就越大。存在漏检时，调低该值。取值范围：[0, 1.0]'), gr.Slider(minimum=1.5, maximum=2.0, value=1.6, label='unclip_ratio', step=0.1, info='控制文本检测框的大小，值越大，检测框整体越大。在出现框截断文字的情况，调大该值。取值范围：[1.5, 2.0]'), gr.Slider(minimum=0, maximum=1.0, value=0.5, label='text_score', step=0.1, info='文本识别结果是正确的置信度，值越大，显示出的识别结果更准确。存在漏检时，调低该值。取值范围：[0, 1.0]'), ], outputs=[ gr.outputs.Image(type='filepath', label='Output_image'), gr.outputs.Textbox(type='text', label='Output_text') ], title=title, description=description, examples=examples, article=article, css=css, allow_flagging='never', ).launch(debug=True, enable_queue=True)