import gradio as gr from transformers import CLIPProcessor, CLIPModel from PIL import Image import torch # Load the model and processor model_id = "DGurgurov/clip-vit-base-patch32-oxford-pets" model = CLIPModel.from_pretrained(model_id) processor = CLIPProcessor.from_pretrained(model_id) # Define the inference function def predict(image): inputs = processor(images=image, return_tensors="pt") outputs = model.get_image_features(**inputs) logits_per_image = outputs.logits_per_image probs = torch.nn.functional.softmax(logits_per_image, dim=1) return {f"Class {i}": prob.item() for i, prob in enumerate(probs[0])} # Define Gradio interface image = gr.inputs.Image(type="pil") label = gr.outputs.Label(num_top_classes=5) interface = gr.Interface( fn=predict, inputs=image, outputs=label, title="CLIP Model - Oxford Pets", description="Upload an image and get the top 5 class predictions." ) # Launch the Gradio app interface.launch()