GV05's picture
Update app.py
4cbf483
from transformers import CLIPProcessor, CLIPModel
import gradio as gr
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
classes = ["Iron Man", "Captain America", "Thor", "Spider-Man", "Black Widow", "Black Panther","Hulk", "Ant-Man",
'Peggy Carter', "Daredevil", "Star-Lord", "Wong", "Doctor Strange","Nick Fury", "Gamora", "Jessica Jones",
"Nebula", "Falcon", "Winter Soldier", "Rocket", "Hawkeye"]
text = [f"a photo of {x}" for x in classes]
def predict(img):
inputs = processor(text=text, images=img, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image # this is the image-text similarity score
probs = logits_per_image.softmax(dim=1).squeeze() # we can take the softmax to get the label probabilities
return {classes[i] : float(probs[i]) for i in range(len(probs))}
title = "Marvel Heroes Classification"
description = "Using clip for zero-shot classification"
examples = ["black_panter.jpg"]
gr.Interface(fn=predict, inputs = gr.inputs.Image(shape = (512,512)), outputs= gr.outputs.Label(),
examples=examples, title=title, description=description).launch(inline=False)