4673483T / app.py
AI-Pagoda's picture
Upload app.py
c57dfb3 verified
from ultralytics import YOLO
from PIL import Image
import gradio as gr
from huggingface_hub import snapshot_download
import os
import cv2
import numpy as np
model_path = "best_int8_openvino_model"
def load_model(repo_id):
download_dir = snapshot_download(repo_id)
print(download_dir)
path = os.path.join(download_dir, "best_int8_openvino_model")
print(path)
detection_model = YOLO(path, task='detect')
return detection_model
# Image prediction function
def predict_image(input_image):
source = input_image
result = detection_model.predict(source, conf=0.5, iou=0.6)
img_bgr = result[0].plot()
img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) # Convert BGR to RGB
output_image = Image.fromarray(img_rgb) # Use the RGB image for output
return output_image
# Video prediction function
def predict_video(input_video):
# Get the original filename (without path)
original_filename = os.path.basename(input_video.name)
# Get the name without the extension and append '_detected'
base_filename, _ = os.path.splitext(original_filename)
output_video = base_filename + "_detected.mp4"
# Read video file
video_capture = cv2.VideoCapture(input_video.name)
frames = []
while True:
ret, frame = video_capture.read()
if not ret:
break
# Process each frame
result = detection_model.predict(frame, conf=0.5, iou=0.6)
img_bgr = result[0].plot() # Get the frame with detected objects
frames.append(img_bgr) # Add the RGB frame to the list
# Release video capture
video_capture.release()
# Convert frames to video
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Codec for mp4
out = cv2.VideoWriter(output_video, fourcc, 30, (frames[0].shape[1], frames[0].shape[0]))
for frame in frames:
out.write(frame) # Write each frame to video
out.release()
return output_video # Return the path to the output video
REPO_ID = "AI-Pagoda/4673483T"
detection_model = load_model(REPO_ID)
# Create Gradio interface with tabs
with gr.Blocks() as app:
with gr.Tabs():
with gr.Tab("Image Detection"):
gr.Interface(fn=predict_image,
inputs=gr.Image(type="pil", label="Upload Image"),
outputs=gr.Image(type="pil", label="Download Image"),
title="Image Object Detection",
description="Upload an image to detect Snake and Lizard.")
with gr.Tab("Video Detection"):
gr.Interface(fn=predict_video,
inputs=gr.File(label="Upload Video"),
outputs=gr.File(label="Download Video"),
title="Video Object Detection",
description="Upload an image to detect Snake and Lizard.")
app.launch()