egmaminta commited on
Commit
b9008f4
1 Parent(s): be555d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -75
app.py CHANGED
@@ -1,95 +1,56 @@
1
- from transformers import AutoFeatureExtractor, AutoModelForImageClassification
2
- import gradio
3
  import torch
 
4
  from einops import rearrange
5
- import numpy
 
6
 
 
7
  extractor = AutoFeatureExtractor.from_pretrained("vincentclaes/mit-indoor-scenes")
 
 
8
  model = AutoModelForImageClassification.from_pretrained("vincentclaes/mit-indoor-scenes")
9
 
10
- labels = {
11
- "0": "airport_inside",
12
- "1": "artstudio",
13
- "2": "auditorium",
14
- "3": "bakery",
15
- "4": "bar",
16
- "5": "bathroom",
17
- "6": "bedroom",
18
- "7": "bookstore",
19
- "8": "bowling",
20
- "9": "buffet",
21
- "10": "casino",
22
- "11": "children_room",
23
- "12": "church_inside",
24
- "13": "classroom",
25
- "14": "cloister",
26
- "15": "closet",
27
- "16": "clothingstore",
28
- "17": "computerroom",
29
- "18": "concert_hall",
30
- "19": "corridor",
31
- "20": "deli",
32
- "21": "dentaloffice",
33
- "22": "dining_room",
34
- "23": "elevator",
35
- "24": "fastfood_restaurant",
36
- "25": "florist",
37
- "26": "gameroom",
38
- "27": "garage",
39
- "28": "greenhouse",
40
- "29": "grocerystore",
41
- "30": "gym",
42
- "31": "hairsalon",
43
- "32": "hospitalroom",
44
- "33": "inside_bus",
45
- "34": "inside_subway",
46
- "35": "jewelleryshop",
47
- "36": "kindergarden",
48
- "37": "kitchen",
49
- "38": "laboratorywet",
50
- "39": "laundromat",
51
- "40": "library",
52
- "41": "livingroom",
53
- "42": "lobby",
54
- "43": "locker_room",
55
- "44": "mall",
56
- "45": "meeting_room",
57
- "46": "movietheater",
58
- "47": "museum",
59
- "48": "nursery",
60
- "49": "office",
61
- "50": "operating_room",
62
- "51": "pantry",
63
- "52": "poolinside",
64
- "53": "prisoncell",
65
- "54": "restaurant",
66
- "55": "restaurant_kitchen",
67
- "56": "shoeshop",
68
- "57": "stairscase",
69
- "58": "studiomusic",
70
- "59": "subway",
71
- "60": "toystore",
72
- "61": "trainstation",
73
- "62": "tv_studio",
74
- "63": "videostore",
75
- "64": "waitingroom",
76
- "65": "warehouse",
77
- "66": "winecellar"
78
- }
79
 
 
80
  model.eval()
81
 
 
82
  def classify(image):
 
83
  with torch.no_grad():
 
84
  inputs = extractor(images=image, return_tensors='pt')
 
85
  outputs = model(**inputs).logits
 
86
  outputs = rearrange(outputs, '1 j->j')
 
87
  outputs = torch.nn.functional.softmax(outputs)
 
88
  outputs = outputs.cpu().numpy()
 
89
  return {labels[str(i)]: float(outputs[i]) for i in range(len(labels))}
90
 
 
91
  gradio.Interface(fn=classify,
92
- inputs=gradio.inputs.Image(shape=(224,224), image_mode='RGB', source='upload', tool='editor', type='pil', label=None, optional=False),
93
- outputs=gradio.outputs.Label(num_top_classes=5, type='confidences'),
94
- theme='huggingface',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  allow_flagging='never').launch()
 
 
 
1
  import torch
2
+ from transformers import AutoFeatureExtractor, AutoModelForImageClassification
3
  from einops import rearrange
4
+ import gradio
5
+ import call_labels
6
 
7
+ # define the feature extractor
8
  extractor = AutoFeatureExtractor.from_pretrained("vincentclaes/mit-indoor-scenes")
9
+
10
+ # define the pretrained model
11
  model = AutoModelForImageClassification.from_pretrained("vincentclaes/mit-indoor-scenes")
12
 
13
+ # retrieve the labels provided from MIT Indoor Scenes dataset (https://www.kaggle.com/itsahmad/indoor-scenes-cvpr-2019)
14
+ labels = call_labels.call_labels()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ # call model.eval() to assert that we are evaluating the model and not updating the weights
17
  model.eval()
18
 
19
+ # define the function used for model inference
20
  def classify(image):
21
+ # disable gradient calculation
22
  with torch.no_grad():
23
+ # extract features from the image input
24
  inputs = extractor(images=image, return_tensors='pt')
25
+ # call the logits parameter only (object: SequenceClassifierOutput)
26
  outputs = model(**inputs).logits
27
+ # remove the batch dimension
28
  outputs = rearrange(outputs, '1 j->j')
29
+ # use the softmax function to convert the logits into probabilities
30
  outputs = torch.nn.functional.softmax(outputs)
31
+ # convert the data type from tensor to a numpy array
32
  outputs = outputs.cpu().numpy()
33
+ # returns a key-value pair of the id labels and its corresponding probabilities
34
  return {labels[str(i)]: float(outputs[i]) for i in range(len(labels))}
35
 
36
+ # define the gradio interface
37
  gradio.Interface(fn=classify,
38
+ inputs=gradio.inputs.Image(shape=(224,224),
39
+ image_mode='RGB',
40
+ source='upload',
41
+ tool='editor',
42
+ type='pil',
43
+ label=None,
44
+ optional=False),
45
+ outputs=gradio.outputs.Label(num_top_classes=5,
46
+ type='auto'),
47
+ theme='dark-huggingface',
48
+ examples=[['bedroom.jpg'],
49
+ ['bathroom_AS.jpg'],
50
+ ['samsung_room.jpg']],
51
+ live=True,
52
+ title='Indoor Scene Recognition',
53
+ description='An indoor scene classifier. Start by uploading an input image. The outputs are the top five indoor scene classes that best fit your input image.',
54
+ interpretation='default',
55
+ article='''<h2><b>Additional Information</b></h2><p style='text-align: justify'>This indoor scene classifier employs the <b>google/vit-base-patch16-224-in21k</b>, a <b>Visual Transformer (ViT)</b> model pre-trained on ImageNet-21k (14 million images, 21,843 classes) at resolution 224x224 introduced in the paper <b><a href='https://arxiv.org/abs/2010.11929' target='_blank'>An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale</a></b> by Dosovitskiy et al. The original GitHub repository of the Visual Transformer is found in <b><a href='https://github.com/google-research/vision_transformer' target='_blank'>this link</a></b>. This model was fine-tuned on the <b><a href='https://www.kaggle.com/itsahmad/indoor-scenes-cvpr-2019' target='_blank'>MIT Indoor Scenes</a></b> from Kaggle. The source model is found in <b><a href='https://huggingface.co/vincentclaes/mit-indoor-scenes' target='_blank'>this link</a></b>.</p>''',
56
  allow_flagging='never').launch()