import gradio as gr
import torch
import cv2
import numpy as np
from PIL import Image
from torchvision.transforms import functional as F
from yolov5.utils.general import non_max_suppression
from yolov5.models.yolo import Model

# Load YOLOv5 model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True).to(device)
model.eval()

def preprocess_image(image):
    image = Image.fromarray(image)
    image_tensor = F.to_tensor(image).unsqueeze(0).to(device)
    return image_tensor

def draw_boxes(image, outputs, threshold=0.3):
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    h, w, _ = image.shape

    for box in outputs:
        score, label, x1, y1, x2, y2 = box[4].item(), int(box[5].item()), box[0].item(), box[1].item(), box[2].item(), box[3].item()
        if score > threshold:
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
            cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 2)
            text = f"{model.names[label]}: {score:.2f}"
            cv2.putText(image, text, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

    return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

def detect_objects(image):
    image_tensor = preprocess_image(image)
    outputs = model(image_tensor)
    outputs = non_max_suppression(outputs)[0]
    result_image = draw_boxes(image, outputs.cpu().numpy())
    return result_image

iface = gr.Interface(
    fn=detect_objects,
    inputs=gr.Image(type="numpy"),
    outputs=gr.Image(type="numpy"),
    title="YOLOv5 Object Detection",
    description="Upload an image to detect objects using the YOLOv5 model."
)

if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)