import gradio as gr import torch from transformers import AutoModelForObjectDetection, AutoImageProcessor from PIL import Image, ImageDraw # Definir el repositorio en Hugging Face repo_id = "facebook/detr-resnet-101" # Cargar el modelo en modo FP16 para mayor velocidad en GPU device = "cuda" if torch.cuda.is_available() else "cpu" model = AutoModelForObjectDetection.from_pretrained(repo_id).to(device).half() image_processor = AutoImageProcessor.from_pretrained(repo_id) # FunciĆ³n para la inferencia def predict(img): img = img.convert("RGB") # Asegurar formato RGB inputs = image_processor(images=img, return_tensors="pt", pin_memory=True).to(device) with torch.no_grad(): outputs = model(**inputs) # Procesar los resultados target_sizes = torch.tensor([img.size[::-1]], device=device) results = image_processor.post_process_object_detection(outputs, threshold=0.5, target_sizes=target_sizes)[0] # Dibujar las detecciones en la imagen draw = ImageDraw.Draw(img) detecciones = [] for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): box = [round(i, 2) for i in box.tolist()] x, y, x2, y2 = box draw.rectangle([x, y, x2, y2], outline="red", width=3) class_name = f"Clase {label.item()} - Confianza: {round(score.item(), 2)}" draw.text((x, y), class_name, fill="red") detecciones.append(class_name) return img, "\n".join(detecciones) # Crear la interfaz y lanzarla con Gradio gr.Interface( fn=predict, inputs=gr.Image(type="pil"), outputs=[gr.Image(), gr.Text()], examples=['raccoon-133.jpg', 'raccoon-108.jpg'], concurrency_limit=2 ).launch(share=False)