krishnv commited on
Commit
d6d76a4
·
verified ·
1 Parent(s): 2c7adc2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -10
app.py CHANGED
@@ -1,15 +1,37 @@
1
- from transformers import AutoProcessor, AutoModelForCausalLM
2
- import requests
3
  from PIL import Image
 
 
4
 
5
- processor = AutoProcessor.from_pretrained("microsoft/git-base-coco")
6
- model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-coco")
 
7
 
8
- url = "http://images.cocodataset.org/val2017/000000039769.jpg"
9
- image = Image.open(requests.get(url, stream=True).raw)
10
 
11
- pixel_values = processor(images=image, return_tensors="pt").pixel_values
 
 
 
12
 
13
- generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
14
- generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
15
- print(generated_caption)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from PIL import Image
2
+ from transformers import VisionEncoderDecoderModel , ViTFeatureExtractor , PreTrainedTokenizerFast
3
+ import gradio as gr
4
 
5
+ model = VisionEncoderDecoderModel.from_pretrained("ydshieh/vit-gpt2-coco-en")
6
+ vit_feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch32-224-in21k")
7
+ tokenizer = PreTrainedTokenizerFast.from_pretrained("distilgpt2")
8
 
 
 
9
 
10
+ def caption_images(image):
11
+ pixel_values = vit_feature_extractor(images=image,return_tensors="pt").pixel_values
12
+ encoder_outputs = model.generate(pixel_values.to('cpu'),num_beams=5)
13
+ generated_sentence = tokenizer.batch_decode(encoder_outputs,skip_special_tokens=True)
14
 
15
+ return (generated_sentence[0].strip())
16
+
17
+
18
+ inputs = [
19
+ gr.components.Image(type='pil',label='Original Image')
20
+ ]
21
+
22
+ outputs = [
23
+ gr.components.Textbox(label='Caption')
24
+ ]
25
+
26
+ title = "Simple Image captioning Application"
27
+ description = "Upload an image to see the caption generated"
28
+ example =['/content/messi.jpg']
29
+
30
+ gr.Interface(
31
+ caption_images,
32
+ inputs,
33
+ outputs,
34
+ title=title,
35
+ description = description,
36
+ examples = example,
37
+ ).launch(debug=True)