Spaces:

aje6
/

ASL-Final-Project-ONNX

Sleeping

File size: 3,047 Bytes

db34d43
fb13326
 
 
5c2da46
4c285e5
 
 
fb13326
4c285e5
 
 
f23dab3
65cfa3a
 
 
4c285e5
645e6a2
 
 
4c285e5
 
 
 
a1f32fc
645e6a2
 
fcdb808
 
f3e66cc
c69a4de
 
a25049c
 
c69a4de
 
4c285e5
7356e47
 
645e6a2
7356e47
645e6a2
65cfa3a
4c285e5
65cfa3a
12a96f1
645e6a2
9f76001
78f6d8f
65cfa3a
7ccf16b
 
 
96f1f98
4669305
 
 
 
ef51cda
 
 
65cfa3a
ab8f0fe
 
 
 
 
 
ef51cda
78ab471
65cfa3a
b1b22e2
78ab471
12a96f1
4c285e5
fb13326
 
 
65cfa3a
 
fb13326
 
65cfa3a
fb13326

import gradio as gr
import torch
from PIL import Image
import torchvision.transforms as T
from ultralytics import YOLO
import onnxruntime as ort
import cv2
import numpy as np

# Load the onnx model
model = ort.InferenceSession("Model_IV.onnx")

def predict(image):
    # Save shape of original image for later
    original_image_shape = image.shape
    print("Original image shape:", original_image_shape)

    # Preprocess the image

    # Get name and shape of the model's inputs
    input_name = model.get_inputs()[0].name
    input_shape = model.get_inputs()[0].shape

    # Resize the image to the model's input shape
    image = cv2.resize(image, (input_shape[2], input_shape[3]))

    # Reshape the image to match the model's input shape
    image = image.reshape(3, 640, 640)

    # Normalize output image using ImageNet-style normalization
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    mean = np.expand_dims(mean, axis=(1,2))
    std = np.expand_dims(std, axis=(1,2))
    image = (image / 255.0 - mean)/std
    
    # Convert the image to a numpy array and add a batch dimension
    if len(input_shape) == 4 and input_shape[0] == 1:
        image = np.expand_dims(image, axis=0)
    image = image.astype(np.float32)
    
    # Make prediction
    print("Input image shape:", image.shape)
    output = model.run(None, {input_name: image})
    print("Output image shape:", output[0].shape)

    # Postprocess output image
    annotated_img = output[0]
    
    # print("Annotated image type before normalization:", type(annotated_img))
    # print("Annotated image before normalization:", annotated_img)
    print("Min value of image before normalization:", np.min(annotated_img))
    print("Max value of image before normalization:", np.max(annotated_img))

    # Normalize output image using Min-Max normalization
    min_val = np.min(annotated_img)
    max_val = np.max(annotated_img)
    annotated_img = (annotated_img - min_val) / (max_val - min_val)

    print("Min value of image after normalization:", np.min(annotated_img))
    print("Max value of image after normalization:", np.max(annotated_img))
    # print("annotated_img type after normalization:", type(annotated_img))
    # print("annotated_img shape after normalization:", annotated_img.shape)

    # Reshape the image to match the PIL Image input shape
    print("annotated_img shape before reshape:", annotated_img.shape)
    annotated_img = annotated_img.reshape(original_image_shape)
    print("annotated_img shape after reshape:", annotated_img.shape)

    # Convert to PIL Image
    annotated_img = Image.fromarray(annotated_img) # Hits a ValueError in this line
    print("PIL Image type:", type(annotated_img))

    return annotated_img
    
# Gradio interface
demo = gr.Interface(
    fn=predict,
    inputs=gr.Image(sources=["webcam"], type="numpy"),  # Image input from webcam, as a numpy array
    outputs=gr.Image(type="pil"),  # Image output from model, as a PIL Image
)

# Launch interface
if __name__ == "__main__":
    demo.launch()