import gradio as gr import cv2 import numpy as np from rapidocr_onnxruntime import RapidOCR engine = RapidOCR() info_points = { "customer_name": [156, 109, 928, 168], "amount": [157, 397, 606, 461], "price": [155, 341, 607, 399], "plateNumber": [740, 173, 928, 227] } def find_reference_points(template_image, target_image): # OCR处理模板图像和目标图像 template_result, _ = engine(template_image) target_result, _ = engine(target_image) reference_points_template = [] reference_points_target = [] # 查找匹配的文本块 for template_word in template_result: template_text = template_word[1] template_x, template_y = template_word[0][1] for target_word in target_result: target_text = target_word[1] target_x, target_y = target_word[0][1] if template_text == target_text: reference_points_template.append((template_x, template_y)) reference_points_target.append((target_x, target_y)) break return np.array(reference_points_template), np.array(reference_points_target) def align_images(template_image, target_image): # 找到参考点 src_pts, dst_pts = find_reference_points(template_image, target_image) if len(src_pts) < 4 or len(dst_pts) < 4: return target_image # 如果找不到足够的参考点,返回原始图像 # 计算透视变换矩阵 M, _ = cv2.findHomography(dst_pts, src_pts, cv2.RANSAC, 5.0) # 应用透视变换 aligned_image = cv2.warpPerspective(target_image, M, (template_image.shape[1], template_image.shape[0])) return aligned_image def process_images(template_image, target_image): # 将Gradio的图像格式转换为OpenCV格式 template_image = cv2.cvtColor(template_image, cv2.COLOR_RGB2BGR) # template_image = cv2.imread("../data/template.jpg") target_image = cv2.cvtColor(target_image, cv2.COLOR_RGB2BGR) # 对齐图像 aligned_image = align_images(template_image, target_image) # 将结果转换回RGB格式以供Gradio显示 aligned_image = cv2.cvtColor(aligned_image, cv2.COLOR_BGR2RGB) # 识别信息 info_dict = {} # 在info_points中绘制矩形框 for key, value in info_points.items(): cv2.rectangle(aligned_image, (value[0], value[1]), (value[2], value[3]), (0, 255, 0), 2) # ocr识别 ocr_result, _ = engine(aligned_image[value[1]:value[3], value[0]:value[2]]) info_dict[key] = ocr_result[0][1] return aligned_image, info_dict # 创建Gradio界面 demo = gr.Interface( fn=process_images, inputs=[ gr.Image(label="模板图像"), gr.Image(label="目标图像") ], outputs=[ gr.Image(label="对齐后的图像"), gr.Textbox(label="识别信息") ], title="磅单提取工具", description="上传一张模板图像和一张目标图像,提取关键信息。" ) # 启动Gradio应用 demo.launch()