JerryAnto commited on
Commit
fb9b3a0
1 Parent(s): 9139302

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[ ]:
5
+
6
+
7
+
8
+ from PIL import Image
9
+ from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, PreTrainedTokenizerFast
10
+ import requests
11
+
12
+ model = VisionEncoderDecoderModel.from_pretrained("sachin/vit2distilgpt2")
13
+
14
+ vit_feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
15
+
16
+ tokenizer = PreTrainedTokenizerFast.from_pretrained("distilgpt2")
17
+
18
+
19
+ def vit2distilgpt2(img):
20
+ pixel_values = vit_feature_extractor(images=img, return_tensors="pt").pixel_values
21
+ encoder_outputs = generated_ids = model.generate(pixel_values.to('cpu'),num_beams=5)
22
+ generated_sentences = tokenizer.batch_decode(encoder_outputs, skip_special_tokens=True)
23
+
24
+ return(generated_sentences[0].split('.')[0])
25
+
26
+
27
+
28
+ import gradio as gr
29
+
30
+ inputs = [
31
+ gr.inputs.Image(type="pil", label="Original Image")
32
+ ]
33
+
34
+ outputs = [
35
+ gr.outputs.Textbox(label = 'Caption')
36
+ ]
37
+
38
+ title = "Image Captioning using ViT + GPT2"
39
+ description = "ViT and GPT2 are used to generate Image Caption for the uploaded image. COCO Dataset was used for training. This image captioning model might have some biases that we couldn't figure during our stress testing, so if you find any bias (gender, race and so on) please use `Flag` button to flag the image with bias"
40
+ article = " <a href='https://huggingface.co/sachin/vit2distilgpt2'>Model Repo on Hugging Face Model Hub</a>"
41
+ examples = [
42
+ ["horses.png"],
43
+ ["persons.png"],
44
+ ["football_player.png"]
45
+
46
+ ]
47
+
48
+ gr.Interface(
49
+ vit2distilgpt2,
50
+ inputs,
51
+ outputs,
52
+ title=title,
53
+ description=description,
54
+ article=article,
55
+ examples=examples,
56
+ theme="huggingface",
57
+ ).launch(debug=True, enable_queue=True)
58
+