VQA / handler.py
Zhibinhong's picture
Update handler.py
04912a9
from typing import Dict, List, Any
from transformers import BlipProcessor, BlipForQuestionAnswering
from PIL import Image
from io import BytesIO
import base64
import json
class EndpointHandler():
def __init__(self, path=""):
self.processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
self.model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base").to("cuda")
def __call__(self, data):
info=data['inputs']
image_bytes=info.pop('image',data)
raw_image=base64.b64decode(image_bytes)
image=Image.open(BytesIO(raw_image))
question=info.pop('text',data)
info = self.processor(image, question, return_tensors="pt").to("cuda")
out = self.model.generate(**info)
return {'text':self.processor.decode(out[0], skip_special_tokens=True)}
if __name__=="__main__":
my_handler=EndpointHandler(path='.')
with open("/home/ubuntu/guoling/1.png",'rb') as img:
image_bytes=img.read()
image_base64=base64.b64encode(image_bytes).decode('utf-8')
question="are there any people in the picture?"
test_payload=json.dumps({"inputs":
{'image':image_base64,'question':question}
})
test_result=my_handler(test_payload)
print(test_result)