Spaces:
Sleeping
Sleeping
File size: 3,755 Bytes
024a0ce a6398cf 024a0ce a6398cf 024a0ce 7fdc1ab 024a0ce 7fdc1ab 024a0ce 7fdc1ab c1cecd8 7fdc1ab 39caf4f 7fdc1ab 39caf4f 7fdc1ab 024a0ce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import cv2
import numpy as np
# Load the SSD model and configuration
model_path = 'MobileNetSSD_deploy.caffemodel' # Path to the pre-trained SSD model
config_path = 'MobileNetSSD_deploy.prototxt.txt' # Path to the deploy prototxt file
# Load the class labels from the COCO dataset
CLASSES = [
'background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
'truck', 'boat', 'traffic light', 'fire hydrant', 'none', 'stop sign', 'parking meter',
'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
'giraffe', 'none', 'backpack', 'umbrella', 'none', 'handbag', 'tie', 'suitcase',
'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
'skateboard', 'surfboard', 'tennis racket', 'bottle', 'none', 'wine glass', 'cup',
'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
'bed', 'dining table', 'toilet', 'none', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]
# Initialize the OpenCV DNN network
net = cv2.dnn.readNetFromCaffe(config_path,model_path)
# Function to process the video frame and detect objects
def detect_objects_in_frame(frame):
# Get the image shape
height, width = frame.shape[:2]
# Prepare the frame for the model (mean subtraction and resizing)
blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), (127.5, 127.5, 127.5), swapRB=True, crop=False)
# Set the blob as input to the network
net.setInput(blob)
# Run the forward pass to get predictions
detections = net.forward()
# Loop through all the detections
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > 0.5: # Set a threshold for object detection
# Get the class index and the bounding box coordinates
class_id = int(detections[0, 0, i, 1])
left = int(detections[0, 0, i, 3] * width)
top = int(detections[0, 0, i, 4] * height)
right = int(detections[0, 0, i, 5] * width)
bottom = int(detections[0, 0, i, 6] * height)
# Draw the bounding box and label on the frame
label = f"{CLASSES[class_id]}: {confidence:.2f}"
cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
return frame
import gradio as gr
from gradio_webrtc import WebRTC
css = """.my-group {max-width: 600px !important; max-height: 600px !important;}
.my-column {display: flex !important; justify-content: center !important; align-items: center !important;}"""
with gr.Blocks(css=css) as demo:
gr.HTML(
"""
<h1 style='text-align: center'>
Mobilev2 ssd Webcam Stream (Powered by WebRTC ⚡️)
</h1>
"""
)
with gr.Column(elem_classes=["my-column"]):
with gr.Group(elem_classes=["my-group"]):
image = WebRTC(label="Stream", rtc_configuration=None)
# conf_threshold = gr.Slider(
# label="Confidence Threshold",
# minimum=0.0,
# maximum=1.0,
# step=0.05,
# value=0.30,
# )
# image.stream(
# fn=detect_objects_in_frame, inputs=[image, conf_threshold], outputs=[image], time_limit=10
# )
if __name__ == "__main__":
demo.launch()
|