import gradio as gr
import cv2
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input, decode_predictions
from tensorflow.keras.preprocessing.image import img_to_array

# Load pre-trained MobileNetV2 model
model = MobileNetV2(weights="imagenet")

def predict_objects(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_resized = cv2.resize(img, (224, 224))
    img_array = img_to_array(img_resized)
    img_array_expanded = preprocess_input(img_array.reshape((1, *img_array.shape)))

    predictions = model.predict(img_array_expanded)
    label = decode_predictions(predictions, top=1)[0][0][1]
    
    return img, label

def draw_box(img, label):
    height, width, _ = img.shape
    cv2.rectangle(img, (0, 0), (width, height), (0, 255, 0), 2)
    cv2.putText(img, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
    return img

def object_detection(frame):
    frame, label = predict_objects(frame)
    frame_with_box = draw_box(frame, label)
    return frame_with_box

gr.Interface(
    fn=object_detection,
    inputs=gr.Image(source="webcam", streaming=True),
    outputs="image",
    live=True
).launch()