Vrooh933 commited on
Commit
8f6f7fb
·
1 Parent(s): 311d8e3

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[8]:
5
+
6
+
7
+ import gradio as gr
8
+ from PIL import Image
9
+ from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, PreTrainedTokenizerFast, AutoTokenizer
10
+ import requests
11
+
12
+
13
+ # In[5]:
14
+
15
+
16
+ model = VisionEncoderDecoderModel.from_pretrained(
17
+ "nlpconnect/vit-gpt2-image-captioning")
18
+
19
+
20
+ # In[6]:
21
+
22
+
23
+ feature_extractor = ViTFeatureExtractor.from_pretrained(
24
+ "nlpconnect/vit-gpt2-image-captioning")
25
+
26
+
27
+ # In[9]:
28
+
29
+
30
+ tokenizer = AutoTokenizer.from_pretrained(
31
+ "nlpconnect/vit-gpt2-image-captioning")
32
+
33
+
34
+ # In[17]:
35
+
36
+
37
+ def vit2distilgpt2(img):
38
+ pixel_values = feature_extractor(
39
+ images=img, return_tensors="pt").pixel_values
40
+ encoder_outputs = generated_ids = model.generate(
41
+ pixel_values.to('cpu'), num_beams=5)
42
+ generated_sentences = tokenizer.batch_decode(
43
+ encoder_outputs, skip_special_tokens=True)
44
+
45
+ return (generated_sentences[0].split('.')[0])
46
+
47
+
48
+ # In[18]:
49
+
50
+
51
+ # In[29]:
52
+ inputs = [
53
+ gr.inputs.Image(type="pil", label="Original Image")
54
+ ]
55
+
56
+ outputs = [
57
+ gr.outputs.Textbox(label='Caption')
58
+ ]
59
+
60
+ title = "Image Captioning using ViT + GPT2"
61
+ description = "ViT and GPT2 are used to generate Image Caption for the uploaded image. COCO Dataset was used for training. The validation Metric is Still low so there are high chance that some picture that model give caption with high bias"
62
+ article = "<p style='text-align: center'><a href='https://www.linkedin.com/in/m-afif-rizky-a-a96048182/'>Created by @Vrooh933 Production</a> | <a href='https://github.com/afifrizkyandika11551100310'>GitHub Profile</a>"
63
+
64
+ examples = [["food.jpg"],
65
+ ["bang_mizan1.jpg"],
66
+ ["gambar1.jpg"]]
67
+
68
+ gr.Interface(
69
+ vit2distilgpt2,
70
+ inputs,
71
+ outputs,
72
+ title=title,
73
+ description=description,
74
+ examples=examples,
75
+ article=article,
76
+ ).launch()
77
+
78
+
79
+ # In[ ]: