from ultralytics import YOLO
from PIL import Image
import gradio as gr
from huggingface_hub import snapshot_download
import os
import cv2
import tempfile

def load_model(repo_id):
    download_dir = snapshot_download(repo_id)
    print(download_dir)
    path = os.path.join(download_dir, "best_int8_openvino_model")
    print(path)
    detection_model = YOLO(path, task='detect')
    return detection_model

def predict_image(pilimg):
    # Process image
    source = pilimg
    result = detection_model.predict(source, conf=0.5, iou=0.6)
    img_bgr = result[0].plot()
    out_pilimg = Image.fromarray(img_bgr[..., ::-1])  # Convert BGR to RGB
    return out_pilimg

def predict_video(video_path):
    # Read video file
    cap = cv2.VideoCapture(video_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    # Create temporary output file
    temp_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
    out = cv2.VideoWriter(temp_video.name, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Run YOLO prediction on each frame
        result = detection_model.predict(frame, conf=0.5, iou=0.6)
        frame_with_boxes = result[0].plot()

        # Write processed frame to output video
        out.write(frame_with_boxes)

    cap.release()
    out.release()

    return temp_video.name

REPO_ID = "CharmainChua/windowsandcurtains"
detection_model = load_model(REPO_ID)

# Gradio Interface
image_input = gr.Interface(
    fn=predict_image,
    inputs=gr.Image(type="pil"),
    outputs=gr.Image(type="pil"),
    label="Object Detection on Image"
)

video_input = gr.Interface(
    fn=predict_video,
    inputs=gr.Video(type="file"),
    outputs=gr.Video(),
    label="Object Detection on Video"
)

gr.TabbedInterface([image_input, video_input], ["Image Detection", "Video Detection"]).launch(share=True)