|
import gradio as gr |
|
import cv2 |
|
import numpy as np |
|
from rapidocr_onnxruntime import RapidOCR |
|
|
|
engine = RapidOCR() |
|
|
|
info_points = { |
|
"customer_name": [156, 109, 928, 168], |
|
"amount": [157, 397, 606, 461], |
|
"price": [155, 341, 607, 399], |
|
"plateNumber": [740, 173, 928, 227] |
|
} |
|
|
|
def find_reference_points(template_image, target_image): |
|
|
|
template_result, _ = engine(template_image) |
|
target_result, _ = engine(target_image) |
|
|
|
reference_points_template = [] |
|
reference_points_target = [] |
|
|
|
|
|
for template_word in template_result: |
|
template_text = template_word[1] |
|
template_x, template_y = template_word[0][1] |
|
|
|
for target_word in target_result: |
|
target_text = target_word[1] |
|
target_x, target_y = target_word[0][1] |
|
|
|
if template_text == target_text: |
|
reference_points_template.append((template_x, template_y)) |
|
reference_points_target.append((target_x, target_y)) |
|
break |
|
|
|
return np.array(reference_points_template), np.array(reference_points_target) |
|
|
|
def align_images(template_image, target_image): |
|
|
|
src_pts, dst_pts = find_reference_points(template_image, target_image) |
|
|
|
if len(src_pts) < 4 or len(dst_pts) < 4: |
|
return target_image |
|
|
|
|
|
M, _ = cv2.findHomography(dst_pts, src_pts, cv2.RANSAC, 5.0) |
|
|
|
|
|
aligned_image = cv2.warpPerspective(target_image, M, (template_image.shape[1], template_image.shape[0])) |
|
|
|
return aligned_image |
|
|
|
def process_images(template_image, target_image): |
|
|
|
template_image = cv2.cvtColor(template_image, cv2.COLOR_RGB2BGR) |
|
|
|
target_image = cv2.cvtColor(target_image, cv2.COLOR_RGB2BGR) |
|
|
|
|
|
aligned_image = align_images(template_image, target_image) |
|
|
|
|
|
aligned_image = cv2.cvtColor(aligned_image, cv2.COLOR_BGR2RGB) |
|
|
|
|
|
info_dict = {} |
|
|
|
for key, value in info_points.items(): |
|
cv2.rectangle(aligned_image, (value[0], value[1]), (value[2], value[3]), (0, 255, 0), 2) |
|
|
|
ocr_result, _ = engine(aligned_image[value[1]:value[3], value[0]:value[2]]) |
|
info_dict[key] = ocr_result[0][1] |
|
|
|
return aligned_image, info_dict |
|
|
|
|
|
demo = gr.Interface( |
|
fn=process_images, |
|
inputs=[ |
|
gr.Image(label="模板图像"), |
|
gr.Image(label="目标图像") |
|
], |
|
outputs=[ |
|
gr.Image(label="对齐后的图像"), |
|
gr.Textbox(label="识别信息") |
|
], |
|
title="磅单提取工具", |
|
description="上传一张模板图像和一张目标图像,提取关键信息。" |
|
) |
|
|
|
|
|
demo.launch() |
|
|