import gradio as gr
import torch
from PIL import Image
import torchvision.transforms as T
from ultralytics import YOLO
import onnxruntime as ort
import cv2
import numpy as np

# Load the onnx model
model = ort.InferenceSession("Model_IV.onnx")

def predict(image):
    # Save shape of original image for later
    original_image_shape = image.shape
    print("Original image shape:", original_image_shape)

    # Preprocess the image

    # Get name and shape of the model's inputs
    input_name = model.get_inputs()[0].name
    input_shape = model.get_inputs()[0].shape

    # Resize the image to the model's input shape
    image = cv2.resize(image, (input_shape[2], input_shape[3]))

    # Reshape the image to match the model's input shape
    image = image.reshape(3, 640, 640)

    # Normalize output image using ImageNet-style normalization
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    mean = np.expand_dims(mean, axis=(1,2))
    std = np.expand_dims(std, axis=(1,2))
    image = (image / 255.0 - mean)/std
    
    # Convert the image to a numpy array and add a batch dimension
    if len(input_shape) == 4 and input_shape[0] == 1:
        image = np.expand_dims(image, axis=0)
    image = image.astype(np.float32)
    
    # Make prediction
    print("Input image shape:", image.shape)
    output = model.run(None, {input_name: image})
    print("Output image shape:", output[0].shape)

    # Postprocess output image
    annotated_img = output[0]
    
    # print("Annotated image type before normalization:", type(annotated_img))
    # print("Annotated image before normalization:", annotated_img)
    print("Min value of image before normalization:", np.min(annotated_img))
    print("Max value of image before normalization:", np.max(annotated_img))

    # Normalize output image using Min-Max normalization
    min_val = np.min(annotated_img)
    max_val = np.max(annotated_img)
    annotated_img = (annotated_img - min_val) / (max_val - min_val)

    print("Min value of image after normalization:", np.min(annotated_img))
    print("Max value of image after normalization:", np.max(annotated_img))
    # print("annotated_img type after normalization:", type(annotated_img))
    # print("annotated_img shape after normalization:", annotated_img.shape)

    # Reshape the image to match the PIL Image input shape
    print("annotated_img shape before reshape:", annotated_img.shape)
    annotated_img = annotated_img.reshape(original_image_shape)
    print("annotated_img shape after reshape:", annotated_img.shape)

    # Convert to PIL Image
    annotated_img = Image.fromarray(annotated_img) # Hits a ValueError in this line
    print("PIL Image type:", type(annotated_img))

    return annotated_img
    
# Gradio interface
demo = gr.Interface(
    fn=predict,
    inputs=gr.Image(sources=["webcam"], type="numpy"),  # Image input from webcam, as a numpy array
    outputs=gr.Image(type="pil"),  # Image output from model, as a PIL Image
)

# Launch interface
if __name__ == "__main__":
    demo.launch()