|
import cv2 |
|
import torch |
|
from PIL import Image, ImageDraw |
|
import gradio as gr |
|
import pandas as pd |
|
from transformers import pipeline |
|
|
|
model = torch.hub.load('ultralytics/yolov5', 'yolov5s') |
|
translator = pipeline("translation_en_to_ar", model="Helsinki-NLP/opus-mt-en-ar") |
|
|
|
def detect_and_draw_image(input_image): |
|
results = model(input_image) |
|
detections = results.xyxy[0].numpy() |
|
|
|
draw = ImageDraw.Draw(input_image) |
|
counts = {} |
|
for detection in detections: |
|
xmin, ymin, xmax, ymax, conf, class_id = detection |
|
label = model.names[int(class_id)] |
|
counts[label] = counts.get(label, 0) + 1 |
|
|
|
draw.rectangle([(xmin, ymin), (xmax, ymax)], outline="red", width=2) |
|
draw.text((xmin, ymin), f"{label}: {conf:.2f}", fill="white") |
|
|
|
translated_labels = translator(list(counts.keys())) |
|
df = pd.DataFrame({ |
|
'Label (English)': list(counts.keys()), |
|
'Label (Arabic)': [t['translation_text'] for t in translated_labels], |
|
'Object Count': list(counts.values()) |
|
}) |
|
|
|
return input_image, df |
|
|
|
def detect_and_draw_video(video_path): |
|
cap = cv2.VideoCapture(video_path) |
|
frames = [] |
|
overall_counts = {} |
|
|
|
while cap.isOpened(): |
|
ret, frame = cap.read() |
|
if not ret: |
|
break |
|
|
|
frame = cv2.resize(frame, (640, 480)) |
|
results = model(frame) |
|
detections = results.xyxy[0].numpy() |
|
|
|
for detection in detections: |
|
xmin, ymin, xmax, ymax, conf, class_id = detection |
|
label = model.names[int(class_id)] |
|
overall_counts[label] = overall_counts.get(label, 0) + 1 |
|
|
|
cv2.rectangle(frame, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 0, 0), 2) |
|
cv2.putText(frame, f"{label}: {conf:.2f}", (int(xmin), int(ymin) - 10), |
|
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2) |
|
|
|
frames.append(frame) |
|
|
|
cap.release() |
|
output_path = 'output.mp4' |
|
out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), 20.0, (640, 480)) |
|
|
|
for frame in frames: |
|
out.write(frame) |
|
out.release() |
|
|
|
translated_labels = translator(list(overall_counts.keys())) |
|
df = pd.DataFrame({ |
|
'Label (English)': list(overall_counts.keys()), |
|
'Label (Arabic)': [t['translation_text'] for t in translated_labels], |
|
'Object Count': list(overall_counts.values()) |
|
}) |
|
|
|
return output_path, df |
|
|
|
image_interface = gr.Interface( |
|
fn=detect_and_draw_image, |
|
inputs=gr.Image(type="pil", label="Upload Image"), |
|
outputs=[gr.Image(type="pil"), gr.Dataframe(label="Object Counts")], |
|
title="Object Detection for Images", |
|
description="Upload an image to see the objects detected and their counts." |
|
) |
|
|
|
video_interface = gr.Interface( |
|
fn=detect_and_draw_video, |
|
inputs=gr.Video(label="Upload Video"), |
|
outputs=[gr.Video(label="Processed Video"), gr.Dataframe(label="Object Counts")], |
|
title="Object Detection for Videos", |
|
description="Upload a video to see the objects detected and their counts." |
|
) |
|
|
|
app = gr.TabbedInterface([image_interface, video_interface], ["Image Detection", "Video Detection"]) |
|
app.launch(debug=True) |
|
|