import gradio as gr
from huggingface_hub import snapshot_download
from ultralytics import YOLO
import os
from PIL import Image
import cv2
import tempfile

# Public model path location
# MODEL_REPO_ID = "mintheinwin/3907578Y"

# Organization's (NYP) model path location
MODEL_REPO_ID = "ITI107-2024S2/3907578Y"

# Load model
def load_model(repo_id):
    download_dir = snapshot_download(repo_id)
    path = os.path.join(download_dir, "best_int8_openvino_model")
    detection_model = YOLO(path, task="detect")
    return detection_model

# Initialize the model
detection_model = load_model(MODEL_REPO_ID)

# Student info
student_info = "Student Id: 3907578Y, Name: Min Thein Win"

# Prediction images
def predict_image(pil_img):
    result = detection_model.predict(pil_img, conf=0.5, iou=0.5)
    img_bgr = result[0].plot()  # Annotated image
    out_pilimg = Image.fromarray(img_bgr[..., ::-1])  # Convert to RGB PIL image
    return out_pilimg

# Prediction videos
def predict_video(video):
    cap = cv2.VideoCapture(video)
    temp_dir = tempfile.mkdtemp()
    output_path = os.path.join(temp_dir, "annotated_video.mp4")

    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Perform detection on each frame
        result = detection_model.predict(frame, conf=0.5, iou=0.5)
        annotated_frame = result[0].plot()
        frames.append(annotated_frame)

    cap.release()

    # Save annotated video
    if frames:
        height, width, _ = frames[0].shape
        out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), 20, (width, height))
        for frame in frames:
            out.write(frame)
        out.release()

    return output_path

# UI Interface
def create_interface():
    with gr.Blocks() as interface:
        # Header
        gr.Markdown("# Wild Animal Detection (Lion/Tiger)")
        gr.Markdown(student_info)

        with gr.Row(equal_height=True):  # Input and output view are aligned vertically
            # Input Section
            with gr.Column(scale=1):
                gr.Markdown("### Detection Input Image/Video:")
                input_type = gr.Radio(
                    choices=["Image", "Video"],
                    value="Image",  # Default selection is "Image"
                    label="Select Input Type",
                    interactive=True,
                )
                input_image = gr.Image(type="pil", label="Upload Image", visible=True, height=420, width=600)  # Consistent size
                input_video = gr.Video(label="Upload Video", visible=False, height=420, width=600)  # Consistent size

                def toggle_inputs(input_choice):
                    if input_choice == "Image":
                        return gr.update(visible=True), gr.update(visible=False)
                    else:
                        return gr.update(visible=False), gr.update(visible=True)

                input_type.change(toggle_inputs, inputs=input_type, outputs=[input_image, input_video])

            # Output Section
            with gr.Column(scale=1):  # Adjust scale for proportional width
                gr.Markdown("### Detection Output Image/Video:")
                gr.Markdown(
                    "<p style='color: #FF8000; font-weight: bold;'>"
                    "This section displays detected objects with bounding boxes, animal types (Lion/Tiger), and confidence levels.</p>"
                )

                output_image = gr.Image(type="pil", label="Detection Result (Image)", visible=True, height=410, width=600)  # Consistent size
                output_video = gr.Video(label="Detection Result (Video)", visible=False, height=410, width=600)  # Consistent size

        # Detect Function
        def detect(input_choice, image=None, video=None):
            if input_choice == "Image" and image is not None:
                return predict_image(image), None
            elif input_choice == "Video" and video is not None:
                return None, predict_video(video)
            return None, None

        # Buttons in Horizontal Layout
        with gr.Row():
            detect_btn = gr.Button("START DETECT", variant="primary")
            reset_btn = gr.Button("RESET", variant="secondary")

        detect_btn.click(
            fn=detect,
            inputs=[input_type, input_image, input_video],
            outputs=[output_image, output_video],
        )

        # Reset Function
        def reset_view():
            return None, None

        #reset_btn.click(fn=reset_view, inputs=None, outputs=[output_image, output_video])
        reset_btn.click(fn=reset_view, inputs=None, outputs=[output_image, output_video])

        # Dynamic Output Visibility
        def update_output_visibility(input_choice):
            if input_choice == "Image":
                return gr.update(visible=True), gr.update(visible=False)
            else:
                return gr.update(visible=False), gr.update(visible=True)

        input_type.change(update_output_visibility, inputs=input_type, outputs=[output_image, output_video])

        # Footer view
        gr.Markdown("---")
        gr.Markdown(
            "<p style='text-align: center;color: gray;'>"
            "Developed by NYP student @ Min Thein Win</p>"
        )
    return interface

# Launch App
interface = create_interface()
interface.launch(share=True)