SRDdev commited on
Commit
dd33bd5
·
1 Parent(s): f0d94cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -14
app.py CHANGED
@@ -1,30 +1,35 @@
1
- import torch
2
- import re
3
  import gradio as gr
4
- from transformers import AutoTokenizer,ViTFeatureExtractor,VisionEncoderDecoderModel
5
 
 
6
  device = 'cpu'
7
  encoder_checkpoint = 'google/vit-base-patch16-224'
8
- decoder_checkpoint = 'gpt2'
9
- model_checkpoint = 'nlpconnect/vit-gpt2-image-captioning'
10
  feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint)
 
11
  model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint).to(device)
12
 
13
 
14
- def predict(image,max_length=64,num_beams=4):
 
15
  image = image.convert('RGB')
16
- image = feature_extractor(image,return_tensor='pt').pixel_values.to(device)
17
- clean_text = lambda x: x.replace('<|endoftext|>','').split('\n')[0]
18
  caption_ids = model.generate(image, max_length = max_length)[0]
19
  caption_text = clean_text(tokenizer.decode(caption_ids))
20
- return caption_text
21
-
 
 
 
 
22
 
23
- input = gr.inputs.Image(label='Image to generate caption',type = 'pil', optional=False)
24
- output = gr.outputs.Textbox(type="auto",label="Caption")
25
 
26
- article = "This is an Image captioning application created by Shreyas Dixit"
27
- title = "Image Captioning"
28
 
29
  interface = gr.Interface(
30
  fn=predict,
 
1
+ import torch
2
+ import re
3
  import gradio as gr
4
+ from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
5
 
6
+
7
  device = 'cpu'
8
  encoder_checkpoint = 'google/vit-base-patch16-224'
9
+ decoder_checkpoint = 'surajp/gpt2-hindi'
10
+ model_checkpoint = 'team-indain-image-caption/hindi-image-captioning'
11
  feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint)
12
+ tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
13
  model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint).to(device)
14
 
15
 
16
+
17
+ def predict(image,max_length=64, num_beams=4):
18
  image = image.convert('RGB')
19
+ image = feature_extractor(image, return_tensors="pt").pixel_values.to(device)
20
+ clean_text = lambda x: x.replace('<|endoftext|>','').split('\n')[0]
21
  caption_ids = model.generate(image, max_length = max_length)[0]
22
  caption_text = clean_text(tokenizer.decode(caption_ids))
23
+ return caption_text
24
+
25
+
26
+
27
+ input = gr.inputs.Image(label="Image to search", type = 'pil', optional=False)
28
+ output = gr.outputs.Textbox(type="auto",label="Captions")
29
 
 
 
30
 
31
+ article = "This HuggingFace Space presents a demo for Image captioning in Hindi built with VIT Encoder and GPT2 Decoder"
32
+ title = "Hindi Image Captioning System"
33
 
34
  interface = gr.Interface(
35
  fn=predict,