Spaces:

DGurgurov
/

clip-pets-classifier

App Files Files Community

DGurgurov commited on Jun 17

Commit

b1f6a67

•

1 Parent(s): c901e48

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -32

app.py CHANGED Viewed

@@ -1,51 +1,60 @@
 import gradio as gr
 from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
-from torchvision.transforms import functional as F
 from PIL import Image
-# Load tokenizer and model
 processor = AutoProcessor.from_pretrained("DGurgurov/clip-vit-base-patch32-oxford-pets")
 model = AutoModelForZeroShotImageClassification.from_pretrained("DGurgurov/clip-vit-base-patch32-oxford-pets")
-# Define label mappings
-labels = set(dataset['train']['label'])
 label2id = {label: i for i, label in enumerate(labels)}
 id2label = {i: label for label, i in label2id.items()}
-# Function to preprocess image
 def preprocess_image(image):
-    image = Image.fromarray(image)  # Convert numpy array to PIL Image
-    image = image.convert("RGB")    # Ensure image is RGB (some images might be grayscale)
-    image = image.resize((224, 224)) # Resize image to match CLIP model input size
-    image = F.to_tensor(image)      # Convert PIL Image to PyTorch tensor
-    image = F.normalize(image, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize image
-    return image
-# Function to predict using CLIP model
-def predict(image):
-    # Preprocess image
     image = preprocess_image(image)
-    # Prepare input for the model
-    inputs = processor(images=image.unsqueeze(0), labels=labels, return_tensors="pt")
-    # Perform inference
-    outputs = model(**inputs)
     # Get predicted label
-    logits_per_image = outputs.logits_per_image
-    predicted_class = labels[torch.argmax(logits_per_image, dim=-1)]
-    return predicted_class
-# Define Gradio interface
 iface = gr.Interface(
-    fn=predict,
-    inputs=gr.Image(shape=(224, 224)),
-    outputs=gr.Textbox(),
     title="Animal Classifier",
-    description="CLIP-ViT model fine-tuned on Oxford Pets dataset to classify animals."
 )
-# Launch the Gradio app
 iface.launch()

 import gradio as gr
+import torch
 from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
+from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
 from PIL import Image
+import requests
+from io import BytesIO
+from dataset import load_dataset
+# Load your fine-tuned model and dataset
 processor = AutoProcessor.from_pretrained("DGurgurov/clip-vit-base-patch32-oxford-pets")
 model = AutoModelForZeroShotImageClassification.from_pretrained("DGurgurov/clip-vit-base-patch32-oxford-pets")
+# Load dataset to get labels
+dataset = load_dataset("pcuenq/oxford-pets")  # Adjust dataset loading as per your setup
+labels = list(set(dataset['train']['label']))
 label2id = {label: i for i, label in enumerate(labels)}
 id2label = {i: label for label, i in label2id.items()}
+# Define transformations for input images
+transform = Compose([
+    Resize((224, 224)),
+    CenterCrop(224),
+    ToTensor(),
+    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+])
+# Function to preprocess the input image
 def preprocess_image(image):
+    image = Image.open(BytesIO(image))
+    image = transform(image)
+    return image.unsqueeze(0)
+# Function to classify image using CLIP model
+def classify_image(image):
+    # Preprocess the image
     image = preprocess_image(image)
+    # Run inference
+    with torch.no_grad():
+        outputs = model(image)
     # Get predicted label
+    predicted_label_id = torch.argmax(outputs, dim=1).item()
+    predicted_label = id2label[predicted_label_id]
+    return predicted_label
+# Gradio interface
 iface = gr.Interface(
+    fn=classify_image,
+    inputs=gr.Image(label="Upload a picture of an animal"),
+    outputs=gr.Textbox(label="Predicted Animal"),
     title="Animal Classifier",
+    description="CLIP-based model fine-tuned on Oxford Pets dataset to classify animals.",
 )
+# Launch the Gradio interface
 iface.launch()