oshita-n commited on
Commit
1cba662
Β·
1 Parent(s): 66c70d9

first commit

Browse files
Files changed (2) hide show
  1. app.py +33 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from lavis.models import load_model_and_preprocess
4
+ from PIL import Image
5
+
6
+ def process(input_image, prompt):
7
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
8
+
9
+ model, vis_processors, txt_processors = load_model_and_preprocess(name="pnp_vqa", model_type="base", is_eval=True, device=device)
10
+ input_image = input_image.resize((256, 256))
11
+ image = vis_processors["eval"](input_image).unsqueeze(0).to(device)
12
+ text_input = txt_processors["eval"](prompt)
13
+ sample = {"image": image, "text_input": [text_input]}
14
+
15
+ output = model.forward_itm(samples=sample)
16
+ pred_answers = output['pred_answers']
17
+ answer = pred_answers[0]
18
+
19
+ return answer
20
+
21
+ if __name__ == '__main__':
22
+ input_image = gr.inputs.Image(label='image', type='pil')
23
+ prompt = gr.Textbox(label='Prompt')
24
+ ips = [
25
+ input_image, prompt
26
+ ]
27
+ outputs = gr.outputs.Textbox(label='Answer')
28
+ iface = gr.Interface(fn=process,
29
+ inputs=ips,
30
+ outputs=outputs,
31
+ title='Image Question Answering',
32
+ description='画像に閒するθ³ͺε•γ«η­”γˆγ‚‹γƒ’γƒ‡γƒ«γ‚’δ½Ώγ£γ¦γ€θ³ͺε•γ«η­”γˆγΎγ™γ€‚η”»εƒγ‚’γ‚’γƒƒγƒ—γƒ­γƒΌγƒ‰γ—γ€θ³ͺ問をε…₯εŠ›γ—γ¦γγ γ•γ„γ€‚')
33
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ torch
3
+ salesforce-lavis==1.0.2