Spaces:

Ramos-Ramos
/

distillclip

Runtime error

App Files Files Community

ryanramos commited on Jun 29, 2023

Commit

b66e6ca

1 Parent(s): 36c4b4b

Update app.py

Browse files

Files changed (1) hide show

app.py +120 -14

app.py CHANGED Viewed

@@ -1,14 +1,3 @@
-import torch
-from torch.utils.data import DataLoader
-from datasets import load_dataset, IterableDataset
-import evaluate
-# from sklearn.linear_model import LogisticRegression
-import webdataset as wds
-from tqdm.auto import tqdm
-import lovely_tensors as lt
-lt.monkey_patch()
 from transformers import CLIPModel, CLIPProcessor
 MODEL_ID = 'openai/clip-vit-base-patch32' #@param {'type': 'string'}
@@ -112,16 +101,133 @@ processor = CLIPProcessor.from_pretrained(MODEL_ID)
 pipe = pipeline("zero-shot-image-classification", model=model, feature_extractor=processor.image_processor, tokenizer=processor.tokenizer)
 def predict(image, texts):
   texts = texts.split(', ')
   out = pipe(image, candidate_labels=texts)
   return {d['label']: d['score'] for d in out}
-import gradio as gr
 demo = gr.Interface(
   fn=predict,
-  inputs=[gr.Image(type='pil'), gr.Textbox(label='comma separated labels')],
   outputs='label',
 )

 from transformers import CLIPModel, CLIPProcessor
 MODEL_ID = 'openai/clip-vit-base-patch32' #@param {'type': 'string'}
 pipe = pipeline("zero-shot-image-classification", model=model, feature_extractor=processor.image_processor, tokenizer=processor.tokenizer)
+cifar_templates = [
+    'a photo of a {}.',
+    'a blurry photo of a {}.',
+    'a black and white photo of a {}.',
+    'a low contrast photo of a {}.',
+    'a high contrast photo of a {}.',
+    'a bad photo of a {}.',
+    'a good photo of a {}.',
+    'a photo of a small {}.',
+    'a photo of a big {}.',
+    'a photo of the {}.',
+    'a blurry photo of the {}.',
+    'a black and white photo of the {}.',
+    'a low contrast photo of the {}.',
+    'a high contrast photo of the {}.',
+    'a bad photo of the {}.',
+    'a good photo of the {}.',
+    'a photo of the small {}.',
+    'a photo of the big {}.',
+]
+imagenet_templates = [
+    'a bad photo of a {}.',
+    'a photo of many {}.',
+    'a sculpture of a {}.',
+    'a photo of the hard to see {}.',
+    'a low resolution photo of the {}.',
+    'a rendering of a {}.',
+    'graffiti of a {}.',
+    'a bad photo of the {}.',
+    'a cropped photo of the {}.',
+    'a tattoo of a {}.',
+    'the embroidered {}.',
+    'a photo of a hard to see {}.',
+    'a bright photo of a {}.',
+    'a photo of a clean {}.',
+    'a photo of a dirty {}.',
+    'a dark photo of the {}.',
+    'a drawing of a {}.',
+    'a photo of my {}.',
+    'the plastic {}.',
+    'a photo of the cool {}.',
+    'a close-up photo of a {}.',
+    'a black and white photo of the {}.',
+    'a painting of the {}.',
+    'a painting of a {}.',
+    'a pixelated photo of the {}.',
+    'a sculpture of the {}.',
+    'a bright photo of the {}.',
+    'a cropped photo of a {}.',
+    'a plastic {}.',
+    'a photo of the dirty {}.',
+    'a jpeg corrupted photo of a {}.',
+    'a blurry photo of the {}.',
+    'a photo of the {}.',
+    'a good photo of the {}.',
+    'a rendering of the {}.',
+    'a {} in a video game.',
+    'a photo of one {}.',
+    'a doodle of a {}.',
+    'a close-up photo of the {}.',
+    'a photo of a {}.',
+    'the origami {}.',
+    'the {} in a video game.',
+    'a sketch of a {}.',
+    'a doodle of the {}.',
+    'a origami {}.',
+    'a low resolution photo of a {}.',
+    'the toy {}.',
+    'a rendition of the {}.',
+    'a photo of the clean {}.',
+    'a photo of a large {}.',
+    'a rendition of a {}.',
+    'a photo of a nice {}.',
+    'a photo of a weird {}.',
+    'a blurry photo of a {}.',
+    'a cartoon {}.',
+    'art of a {}.',
+    'a sketch of the {}.',
+    'a embroidered {}.',
+    'a pixelated photo of a {}.',
+    'itap of the {}.',
+    'a jpeg corrupted photo of the {}.',
+    'a good photo of a {}.',
+    'a plushie {}.',
+    'a photo of the nice {}.',
+    'a photo of the small {}.',
+    'a photo of the weird {}.',
+    'the cartoon {}.',
+    'art of the {}.',
+    'a drawing of the {}.',
+    'a photo of the large {}.',
+    'a black and white photo of a {}.',
+    'the plushie {}.',
+    'a dark photo of a {}.',
+    'itap of a {}.',
+    'graffiti of the {}.',
+    'a toy {}.',
+    'itap of my {}.',
+    'a photo of a cool {}.',
+    'a photo of a small {}.',
+    'a tattoo of the {}.',
+]
+dashcam_templates = [
+    'a dashcam recording of {}.',
+    'a picture of {}.',
+    'a recording of {}.'
+]
+stl10_templates = [
+    'a photo of a {}.',
+    'a photo of the {}.',
+]
+oxfordpets_templates = [
+    'a photo of a {}, a type of pet.',
+]
 def predict(image, texts):
   texts = texts.split(', ')
   out = pipe(image, candidate_labels=texts)
   return {d['label']: d['score'] for d in out}
 demo = gr.Interface(
   fn=predict,
+  inputs=[gr.Image(type='pil'), gr.Textbox(label='comma separated labels'), gr.Dropwdown(['CIFAR', 'ImageNet','STL-10', 'Oxford Pets', 'Dashcam'], label='text templates')],
   outputs='label',
 )