import gradio as gr
from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
from PIL import Image
from datasets import load_dataset
# Load your fine-tuned model and dataset
processor = AutoProcessor.from_pretrained("DGurgurov/clip-vit-base-patch32-oxford-pets")
model = AutoModelForZeroShotImageClassification.from_pretrained("DGurgurov/clip-vit-base-patch32-oxford-pets")
# Load dataset to get labels
dataset = load_dataset("pcuenq/oxford-pets") # Adjust dataset loading as per your setup
labels = list(set(dataset['train']['label']))
label2id = {label: i for i, label in enumerate(labels)}
id2label = {i: label for label, i in label2id.items()}
# Function to classify image using CLIP model
def classify_image(image):
# Preprocess the image
image = Image.fromarray(image)
inputs = processor(text=labels, images=image, return_tensors="pt", padding=True)
# Run inference
outputs = model(**inputs)
# Extract logits and apply softmax
logits_per_image = outputs.logits_per_image # logits_per_image is a tensor with shape [1, num_labels]
probs = logits_per_image[0].softmax(dim=0) # Take the softmax across the labels
# Get predicted label id and score
predicted_label_id = probs.argmax().item()
predicted_label = id2label[predicted_label_id]
return predicted_label
# Gradio interface
iface = gr.Interface(
inputs=gr.Image(label="Upload a picture of an animal"),
outputs=gr.Textbox(label="Predicted Animal"),
title="Animal Classifier",
description="CLIP-based model fine-tuned on Oxford Pets dataset to classify animals.",
# Launch the Gradio interface