bangdan / app.py
domenicCarter's picture
feat: first commit
7013115
raw
history blame
3.06 kB
import gradio as gr
import cv2
import numpy as np
from rapidocr_onnxruntime import RapidOCR
engine = RapidOCR()
info_points = {
"customer_name": [156, 109, 928, 168],
"amount": [157, 397, 606, 461],
"price": [155, 341, 607, 399],
"plateNumber": [740, 173, 928, 227]
}
def find_reference_points(template_image, target_image):
# OCR处理模板图像和目标图像
template_result, _ = engine(template_image)
target_result, _ = engine(target_image)
reference_points_template = []
reference_points_target = []
# 查找匹配的文本块
for template_word in template_result:
template_text = template_word[1]
template_x, template_y = template_word[0][1]
for target_word in target_result:
target_text = target_word[1]
target_x, target_y = target_word[0][1]
if template_text == target_text:
reference_points_template.append((template_x, template_y))
reference_points_target.append((target_x, target_y))
break
return np.array(reference_points_template), np.array(reference_points_target)
def align_images(template_image, target_image):
# 找到参考点
src_pts, dst_pts = find_reference_points(template_image, target_image)
if len(src_pts) < 4 or len(dst_pts) < 4:
return target_image # 如果找不到足够的参考点,返回原始图像
# 计算透视变换矩阵
M, _ = cv2.findHomography(dst_pts, src_pts, cv2.RANSAC, 5.0)
# 应用透视变换
aligned_image = cv2.warpPerspective(target_image, M, (template_image.shape[1], template_image.shape[0]))
return aligned_image
def process_images(template_image, target_image):
# 将Gradio的图像格式转换为OpenCV格式
template_image = cv2.cvtColor(template_image, cv2.COLOR_RGB2BGR)
# template_image = cv2.imread("../data/template.jpg")
target_image = cv2.cvtColor(target_image, cv2.COLOR_RGB2BGR)
# 对齐图像
aligned_image = align_images(template_image, target_image)
# 将结果转换回RGB格式以供Gradio显示
aligned_image = cv2.cvtColor(aligned_image, cv2.COLOR_BGR2RGB)
# 识别信息
info_dict = {}
# 在info_points中绘制矩形框
for key, value in info_points.items():
cv2.rectangle(aligned_image, (value[0], value[1]), (value[2], value[3]), (0, 255, 0), 2)
# ocr识别
ocr_result, _ = engine(aligned_image[value[1]:value[3], value[0]:value[2]])
info_dict[key] = ocr_result[0][1]
return aligned_image, info_dict
# 创建Gradio界面
demo = gr.Interface(
fn=process_images,
inputs=[
gr.Image(label="模板图像"),
gr.Image(label="目标图像")
],
outputs=[
gr.Image(label="对齐后的图像"),
gr.Textbox(label="识别信息")
],
title="磅单提取工具",
description="上传一张模板图像和一张目标图像,提取关键信息。"
)
# 启动Gradio应用
demo.launch()