Spaces:
Sleeping
Sleeping
File size: 5,393 Bytes
17f433a 7dca4ae af4cb72 7dca4ae ba27c53 7dca4ae ba27c53 7dca4ae 17f433a 92b10aa 7dca4ae 17f433a 92b10aa 17f433a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import gradio as gr
from PIL import Image
import numpy as np
from utils import perspective_transform
from ultralytics import YOLO
max_len = 10
def ocr(plate_image, ocr_model):
try:
if 'yolo' in ocr_model:
model = YOLO(f'./weights/{ocr_model}.pt', task='detect')
# model = YOLO(f'./weights/best.pt', task='detect')
preds = model.predict(plate_image)
pred_data = [list(pred.boxes.data.cpu().numpy()[0]) for pred in preds[0]]
x_sorted_preds = sorted(pred_data, key=lambda x:x[0])
pred_cls = np.array(x_sorted_preds,dtype=np.int32)[:,-1]
return ''.join([model.names[cls] for cls in pred_cls])
else:
return ''
except Exception :
return 'error'
def process_image(image, detection_model, ocr_model, yolo_thresh, perpective_width, perpective_height):
task = 'obb' if 'obb' in detection_model else 'detect'
model = YOLO(f'./weights/{detection_model}.pt', task=task)
predict = model(image, conf=yolo_thresh)
if 'obb' in detection_model:
obb_crops, transformed = perspective_transform(predict, dst_width=perpective_width, dst_height=perpective_height)
crop_results = [(plate, ocr(plate, ocr_model)) for plate in obb_crops]
transform_results = [(plate, ocr(plate, ocr_model)) for plate in transformed]
return crop_results, transform_results
else:
boxes = np.array(predict[0].boxes.xyxy.cpu().numpy(),dtype=np.int32)
crops = [image[y1:y2,x1:x2,:] for x1,y1,x2,y2 in boxes]
results = [(plate,ocr(plate,ocr_model)) for plate in crops]
return results, []
def create_interface():
with gr.Blocks(css="footer{display:none !important}") as demo:
with gr.Row():
with gr.Column(scale=1):
detection_model = gr.Dropdown(label="Detection Model", choices=["yolov8-m", "yolov8-obb-m","yolov8-s", "yolov8-obb-s"], value="yolov8-m")
ocr_model = gr.Dropdown(label="OCR Model", choices=["yolov32c", "trocr"], value="yolov32c")
yolo_thresh = gr.Slider(minimum=0.0, maximum=1.0, label='yolo_threshold')
with gr.Row():
perpective_width = gr.Slider(minimum=100, maximum=500, label='perpective_width')
perpective_height = gr.Slider(minimum=50, maximum=200, label='perpective_height')
with gr.Column(scale=3):
input_image = gr.Image(label="Upload Image", type="numpy")
with gr.Row() as r1:
c_img1 = gr.Image(visible=False)
c_text1 = gr.Textbox(visible=False)
t_img1 = gr.Image(visible=False)
t_text1 = gr.Textbox(visible=False)
with gr.Row() as r2:
c_img2 = gr.Image(visible=False)
c_text2 = gr.Textbox(visible=False)
t_img2 = gr.Image(visible=False)
t_text2 = gr.Textbox(visible=False)
with gr.Row() as r3:
c_img3 = gr.Image(visible=False)
c_text3 = gr.Textbox(visible=False)
t_img3 = gr.Image(visible=False)
t_text3 = gr.Textbox(visible=False)
with gr.Row() as r4:
c_img4 = gr.Image(visible=False)
c_text4= gr.Textbox(visible=False)
t_img4 = gr.Image(visible=False)
t_text4 = gr.Textbox(visible=False)
with gr.Row() as r5:
c_img5 = gr.Image(visible=False)
c_text5 = gr.Textbox(visible=False)
t_img5 = gr.Image(visible=False)
t_text5 = gr.Textbox(visible=False)
#o = gr.Textbox()
def main_fn(image, detection_model, ocr_model, yolo_thresh, perpective_width, perpective_height):
crop_results, transform_results = process_image(image, detection_model, ocr_model, yolo_thresh, perpective_width, perpective_height)
crop_output = []
for i,(crop, c_text) in enumerate(crop_results):
crop_output.append(gr.Image(label=f'clp{i+1}', value=crop, visible=True))
crop_output.append(gr.Textbox(label=f'text{i+1}',value=c_text, visible=True))
for j in range((max_len - len(crop_output)) //2):
crop_output.append(gr.Image(visible=False))
crop_output.append(gr.Textbox(visible=False))
transform_output = []
for i,(transform, t_text) in enumerate(transform_results):
transform_output.append(gr.Image(label=f'tlp{i+1}', value=transform, visible=True))
transform_output.append(gr.Textbox(label=f'text{i+1}',value=t_text, visible=True))
for j in range((max_len - len(transform_output)) //2):
transform_output.append(gr.Image(visible=False))
transform_output.append(gr.Textbox(visible=False))
return crop_output + transform_output
# return len(crop_results)
submit_button = gr.Button("Process Image")
submit_button.click(
fn=main_fn,
inputs=[input_image, detection_model, ocr_model,yolo_thresh, perpective_width, perpective_height],
outputs=[c_img1, c_text1, c_img2, c_text2, c_img3, c_text3, c_img4, c_text4, c_img5, c_text5, t_img1, t_text1, t_img2, t_text2, t_img3, t_text3, t_img4, t_text4, t_img5, t_text5]
# outputs = o
)
return demo
# Run the Gradio app
demo = create_interface()
demo.launch()
|