File size: 1,324 Bytes
1cba662
 
 
 
 
 
 
 
 
e161c5d
1cba662
 
 
9d6822a
1cba662
7a1a0e9
1cba662
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import torch
import gradio as gr
from lavis.models import load_model_and_preprocess
from PIL import Image

def process(input_image, prompt):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model, vis_processors, txt_processors = load_model_and_preprocess(name="pnp_vqa", model_type="base", is_eval=True, device=device)
    input_image = input_image.resize((256, 256))
    image = vis_processors["eval"](input_image).unsqueeze(0).to(device)
    text_input = txt_processors["eval"](prompt)
    sample = {"image": image, "text_input": [text_input]}
    pred_answers, caption, gradcam = model.predict_answers(sample, num_captions=50, num_patches=20)

    return pred_answers[0]

if __name__ == '__main__':
    input_image = gr.inputs.Image(label='image', type='pil')
    prompt = gr.Textbox(label='Prompt')
    ips = [
            input_image, prompt
        ]
    outputs = gr.outputs.Textbox(label='Answer')
    iface = gr.Interface(fn=process,
                         inputs=ips,
                         outputs=outputs,
                         title='Image Question Answering',
                         description='画像に閒するθ³ͺε•γ«η­”γˆγ‚‹γƒ’γƒ‡γƒ«γ‚’δ½Ώγ£γ¦γ€θ³ͺε•γ«η­”γˆγΎγ™γ€‚η”»εƒγ‚’γ‚’γƒƒγƒ—γƒ­γƒΌγƒ‰γ—γ€θ³ͺ問をε…₯εŠ›γ—γ¦γγ γ•γ„γ€‚')
    iface.launch()