Spaces:

RakanAlsheraiwi
/

ObjectDetection

File size: 3,203 Bytes

894a0e9
 
 
 
 
 
eee0e91
894a0e9
54e2701
 
eee0e91
894a0e9
 
 
 
 
 
 
 
 
 
 
 
54e2701
 
eee0e91
54e2701
 
 
eee0e91
 
894a0e9
eee0e91
 
894a0e9
 
 
eee0e91
894a0e9
 
 
54e2701
eee0e91
894a0e9
 
 
eee0e91
894a0e9
eee0e91
894a0e9
 
 
eee0e91
54e2701
 
eee0e91
894a0e9
eee0e91
894a0e9
 
 
eee0e91
894a0e9
eee0e91
894a0e9
54e2701
 
eee0e91
54e2701
 
 
eee0e91
 
894a0e9
eee0e91
 
894a0e9
 
 
 
 
eee0e91
 
 
894a0e9
 
 
 
 
eee0e91
 
 
894a0e9

import cv2
import torch
from PIL import Image, ImageDraw
import gradio as gr
import pandas as pd
from transformers import pipeline

model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
translator = pipeline("translation_en_to_ar", model="Helsinki-NLP/opus-mt-en-ar")

def detect_and_draw_image(input_image):
    results = model(input_image)
    detections = results.xyxy[0].numpy()
    
    draw = ImageDraw.Draw(input_image)
    counts = {}
    for detection in detections:
        xmin, ymin, xmax, ymax, conf, class_id = detection
        label = model.names[int(class_id)]
        counts[label] = counts.get(label, 0) + 1
        
        draw.rectangle([(xmin, ymin), (xmax, ymax)], outline="red", width=2)
        draw.text((xmin, ymin), f"{label}: {conf:.2f}", fill="white")

    translated_labels = translator(list(counts.keys()))
    df = pd.DataFrame({
        'Label (English)': list(counts.keys()),
        'Label (Arabic)': [t['translation_text'] for t in translated_labels],
        'Object Count': list(counts.values())
    })

    return input_image, df

def detect_and_draw_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    overall_counts = {}

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.resize(frame, (640, 480))
        results = model(frame)
        detections = results.xyxy[0].numpy()

        for detection in detections:
            xmin, ymin, xmax, ymax, conf, class_id = detection
            label = model.names[int(class_id)]
            overall_counts[label] = overall_counts.get(label, 0) + 1
            
            cv2.rectangle(frame, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 0, 0), 2)
            cv2.putText(frame, f"{label}: {conf:.2f}", (int(xmin), int(ymin) - 10), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

        frames.append(frame)

    cap.release()
    output_path = 'output.mp4'
    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), 20.0, (640, 480))

    for frame in frames:
        out.write(frame)
    out.release()

    translated_labels = translator(list(overall_counts.keys()))
    df = pd.DataFrame({
        'Label (English)': list(overall_counts.keys()),
        'Label (Arabic)': [t['translation_text'] for t in translated_labels],
        'Object Count': list(overall_counts.values())
    })

    return output_path, df

image_interface = gr.Interface(
    fn=detect_and_draw_image,
    inputs=gr.Image(type="pil", label="Upload Image"),
    outputs=[gr.Image(type="pil"), gr.Dataframe(label="Object Counts")],
    title="Object Detection for Images",
    description="Upload an image to see the objects detected and their counts."
)

video_interface = gr.Interface(
    fn=detect_and_draw_video,
    inputs=gr.Video(label="Upload Video"),
    outputs=[gr.Video(label="Processed Video"), gr.Dataframe(label="Object Counts")],
    title="Object Detection for Videos",
    description="Upload a video to see the objects detected and their counts."
)

app = gr.TabbedInterface([image_interface, video_interface], ["Image Detection", "Video Detection"])
app.launch(debug=True)