import spaces import supervision as sv import PIL.Image as Image from ultralytics import YOLO from huggingface_hub import hf_hub_download, HfApi import gradio as gr import torch global repo_id repo_id = "atalaydenknalbant/asl-yolo-models" def get_model_filenames(repo_id): api = HfApi() files = api.list_repo_files(repo_id) model_filenames = [file for file in files if file.endswith('.pt')] return model_filenames model_filenames = get_model_filenames(repo_id) def download_models(repo_id, model_id): # Download the selected model hf_hub_download(repo_id, filename=model_id, local_dir=f"./") return f"./{model_id}" box_annotator = sv.BoxAnnotator() category_dict = {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I', 9: 'J', 10: 'K', 11: 'L', 12: 'M', 13: 'N', 14: 'O', 15: 'P', 16: 'Q', 17: 'R', 18: 'S', 19: 'T', 20: 'U', 21: 'V', 22: 'W', 23: 'X', 24: 'Y', 25: 'Z'} @spaces.GPU def yolo_inference(image, model_id, conf_threshold, iou_threshold, max_detection): # Download models model_path = download_models(repo_id, model_id) model = YOLO(model_path) results = model(source=image, imgsz=640, iou=iou_threshold, conf=conf_threshold, verbose=False, max_det=max_detection)[0] detections = sv.Detections.from_ultralytics(results) labels = [ f"{category_dict[class_id]} {confidence:.2f}" for class_id, confidence in zip(detections.class_id, detections.confidence) ] annotated_image = box_annotator.annotate(image, detections=detections, labels=labels) return annotated_image def app(): with gr.Blocks(): with gr.Row(): with gr.Column(): image = gr.Image(type="pil", label="Image", interactive=True) model_id = gr.Dropdown( label="Model", choices=model_filenames, value=model_filenames[0] if model_filenames else "", ) conf_threshold = gr.Slider( label="Confidence Threshold", minimum=0.1, maximum=1.0, step=0.1, value=0.45, ) iou_threshold = gr.Slider( label="IoU Threshold", minimum=0.1, maximum=1.0, step=0.1, value=0.7, ) max_detection = gr.Slider( label="Max Detection", minimum=1, step=1, value=1, ) yolov_infer = gr.Button(value="Detect Objects") with gr.Column(): output_image = gr.Image(type="pil", label="Annotated Image", interactive=False) yolov_infer.click( fn=yolo_inference, inputs=[ image, model_id, conf_threshold, iou_threshold, max_detection, ], outputs=[output_image], ) gr.Examples( examples=[ [ "b.jpg", "yolo11x.pt", 0.45, 0.7, 1, ], [ "a.jpg", "yolo11s.pt", 0.45, 0.7, 1, ], [ "y.jpg", "yolo11m.pt", 0.45, 0.7, 1, ], ], fn=yolo_inference, inputs=[ image, model_id, conf_threshold, iou_threshold, max_detection, ], outputs=[output_image], cache_examples=True, ) gradio_app = gr.Blocks() with gradio_app: gr.HTML( """

YOLO Powered ASL(American Sign Language) Letter Detector PSA: It can't detect J or Z

""") with gr.Row(): with gr.Column(): app() gradio_app.launch()