Spaces:

RakanAlsheraiwi
/

ObjectDetection

App Files Files Community

ObjectDetection / app.py

RakanAlsheraiwi's picture

RakanAlsheraiwi

Update app.py

894a0e9 verified 27 days ago

3.2 kB

	import cv2
	import torch
	from PIL import Image, ImageDraw
	import gradio as gr
	import pandas as pd
	from transformers import pipeline

	model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
	translator = pipeline("translation_en_to_ar", model="Helsinki-NLP/opus-mt-en-ar")

	def detect_and_draw_image(input_image):
	results = model(input_image)
	detections = results.xyxy[0].numpy()

	draw = ImageDraw.Draw(input_image)
	counts = {}
	for detection in detections:
	xmin, ymin, xmax, ymax, conf, class_id = detection
	label = model.names[int(class_id)]
	counts[label] = counts.get(label, 0) + 1

	draw.rectangle([(xmin, ymin), (xmax, ymax)], outline="red", width=2)
	draw.text((xmin, ymin), f"{label}: {conf:.2f}", fill="white")

	translated_labels = translator(list(counts.keys()))
	df = pd.DataFrame({
	'Label (English)': list(counts.keys()),
	'Label (Arabic)': [t['translation_text'] for t in translated_labels],
	'Object Count': list(counts.values())
	})

	return input_image, df

	def detect_and_draw_video(video_path):
	cap = cv2.VideoCapture(video_path)
	frames = []
	overall_counts = {}

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	frame = cv2.resize(frame, (640, 480))
	results = model(frame)
	detections = results.xyxy[0].numpy()

	for detection in detections:
	xmin, ymin, xmax, ymax, conf, class_id = detection
	label = model.names[int(class_id)]
	overall_counts[label] = overall_counts.get(label, 0) + 1

	cv2.rectangle(frame, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 0, 0), 2)
	cv2.putText(frame, f"{label}: {conf:.2f}", (int(xmin), int(ymin) - 10),
	cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

	frames.append(frame)

	cap.release()
	output_path = 'output.mp4'
	out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), 20.0, (640, 480))

	for frame in frames:
	out.write(frame)
	out.release()

	translated_labels = translator(list(overall_counts.keys()))
	df = pd.DataFrame({
	'Label (English)': list(overall_counts.keys()),
	'Label (Arabic)': [t['translation_text'] for t in translated_labels],
	'Object Count': list(overall_counts.values())
	})

	return output_path, df

	image_interface = gr.Interface(
	fn=detect_and_draw_image,
	inputs=gr.Image(type="pil", label="Upload Image"),
	outputs=[gr.Image(type="pil"), gr.Dataframe(label="Object Counts")],
	title="Object Detection for Images",
	description="Upload an image to see the objects detected and their counts."
	)

	video_interface = gr.Interface(
	fn=detect_and_draw_video,
	inputs=gr.Video(label="Upload Video"),
	outputs=[gr.Video(label="Processed Video"), gr.Dataframe(label="Object Counts")],
	title="Object Detection for Videos",
	description="Upload a video to see the objects detected and their counts."
	)

	app = gr.TabbedInterface([image_interface, video_interface], ["Image Detection", "Video Detection"])
	app.launch(debug=True)