import gradio as gr import cv2 import numpy as np from gradio_webrtc import WebRTC from pathlib import Path CLASSES = [ "background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor", ] COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3)) directory = Path(__file__).parent MODEL = str((directory / "MobileNetSSD_deploy.caffemodel").resolve()) PROTOTXT = str((directory / "MobileNetSSD_deploy.prototxt.txt").resolve()) net = cv2.dnn.readNetFromCaffe(PROTOTXT, MODEL) def detection(image, conf_threshold=0.3): blob = cv2.dnn.blobFromImage( cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5 ) net.setInput(blob) detections = net.forward() image = cv2.resize(image, (500, 500)) (h, w) = image.shape[:2] labels = [] for i in np.arange(0, detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > conf_threshold: # extract the index of the class label from the `detections`, # then compute the (x, y)-coordinates of the bounding box for # the object idx = int(detections[0, 0, i, 1]) box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) (startX, startY, endX, endY) = box.astype("int") # display the prediction label = f"{CLASSES[idx]}: {round(confidence * 100, 2)}%" labels.append(label) cv2.rectangle(image, (startX, startY), (endX, endY), COLORS[idx], 2) y = startY - 15 if startY - 15 > 15 else startY + 15 cv2.putText( image, label, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2 ) return image css=""".my-group {max-width: 600px !important; max-height: 600 !important;} .my-column {display: flex !important; justify-content: center !important; align-items: center !important};""" with gr.Blocks(css=css) as demo: gr.HTML( """ <h1 style='text-align: center'> YOLOv10 Webcam Stream </h1> """) gr.HTML( """ <h3 style='text-align: center'> <a href='https://arxiv.org/abs/2405.14458' target='_blank'>arXiv</a> | <a href='https://github.com/THU-MIG/yolov10' target='_blank'>github</a> </h3> """) with gr.Column(elem_classes=["my-column"]): with gr.Group(elem_classes=["my-group"]): image = WebRTC(label="Strean") conf_threshold = gr.Slider( label="Confidence Threshold", minimum=0.0, maximum=1.0, step=0.05, value=0.30, ) image.webrtc_stream( fn=detection, inputs=[image], stream_every=0.05, time_limit=30 ) if __name__ == '__main__': demo.launch()