from ultralytics import YOLO from PIL import Image import gradio as gr from huggingface_hub import snapshot_download import os import cv2 import tempfile def load_model(repo_id): download_dir = snapshot_download(repo_id) print(download_dir) path = os.path.join(download_dir, "best_int8_openvino_model") print(path) detection_model = YOLO(path, task='detect') return detection_model def predict_image(pilimg): # Process image source = pilimg result = detection_model.predict(source, conf=0.5, iou=0.6) img_bgr = result[0].plot() out_pilimg = Image.fromarray(img_bgr[..., ::-1]) # Convert BGR to RGB return out_pilimg def predict_video(video_path): # Read video file cap = cv2.VideoCapture(video_path) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) # Create temporary output file temp_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") out = cv2.VideoWriter(temp_video.name, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height)) while cap.isOpened(): ret, frame = cap.read() if not ret: break # Run YOLO prediction on each frame result = detection_model.predict(frame, conf=0.5, iou=0.6) frame_with_boxes = result[0].plot() # Write processed frame to output video out.write(frame_with_boxes) cap.release() out.release() return temp_video.name REPO_ID = "CharmainChua/windowsandcurtains" detection_model = load_model(REPO_ID) # Gradio Interface image_input = gr.Interface( fn=predict_image, inputs=gr.Image(type="pil"), outputs=gr.Image(type="pil"), label="Object Detection on Image" ) video_input = gr.Interface( fn=predict_video, inputs=gr.Video(type="file"), outputs=gr.Video(), label="Object Detection on Video" ) gr.TabbedInterface([image_input, video_input], ["Image Detection", "Video Detection"]).launch(share=True)