import gradio as gr
from huggingface_hub import hf_hub_download
from ultralytics import YOLO
from supervision import Detections
from PIL import Image, ImageDraw
import cv2
import numpy as np
import tempfile
import os

# Download and load the YOLOv8 face detection model
model_path = hf_hub_download(repo_id="arnabdhar/YOLOv8-Face-Detection", filename="model.pt")
model = YOLO(model_path)

# ---------- Image Detection Function ----------
def detect_faces(image: Image.Image):
    output = model(image)
    results = Detections.from_ultralytics(output[0])
    boxes = results.xyxy

    annotated_image = image.copy()
    draw = ImageDraw.Draw(annotated_image)

    for box in boxes:
        x1, y1, x2, y2 = box
        draw.rectangle([x1, y1, x2, y2], outline="red", width=2)

    face_count = len(boxes)
    return annotated_image, f"Number of faces detected: {face_count}"

# ---------- Video Detection Function ----------
def detect_faces_in_video(video_path):
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    temp_out_path = os.path.join(tempfile.gettempdir(), "output.avi")
    fourcc = cv2.VideoWriter_fourcc(*"XVID")
    out = cv2.VideoWriter(temp_out_path, fourcc, fps, (w, h))

    total_faces = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = model(rgb)
        dets = Detections.from_ultralytics(results[0])

        for box in dets.xyxy:
            x1, y1, x2, y2 = map(int, box)
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)

        total_faces += len(dets.xyxy)
        out.write(frame)

    cap.release()
    out.release()

    return temp_out_path, f"Total faces detected (across all frames): {total_faces}"

# ---------- Gradio Interface Setup ----------
image_interface = gr.Interface(
    fn=detect_faces,
    inputs=gr.Image(type="pil", label="Upload Image"),
    outputs=[
        gr.Image(type="pil", label="Annotated Image"),
        gr.Text(label="Face Count")
    ],
    title="YOLOv8 Face Detector (Image)",
    description="Upload an image to detect faces using a YOLOv8 face detection model."
)

video_interface = gr.Interface(
    fn=detect_faces_in_video,
    inputs=gr.Video(label="Upload Video"),
    outputs=[
        gr.Video(label="Annotated Video"),
        gr.Text(label="Face Count")
    ],
    title="YOLOv8 Face Detector (Video)",
    description="Upload a video to detect and annotate faces frame by frame using YOLOv8."
)

demo = gr.TabbedInterface(
    interface_list=[image_interface, video_interface],
    tab_names=["Image Face Detection", "Video Face Detection"]
)

if __name__ == "__main__":
    demo.launch()