senthilv commited on
Commit
a5df904
1 Parent(s): 8ad8161
Files changed (2) hide show
  1. handler.py +41 -0
  2. requirements.txt +2 -0
handler.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict
2
+ from transformers import BlipProcessor, BlipForConditionalGeneration
3
+ from PIL import Image
4
+ from io import BytesIO
5
+ import torch
6
+
7
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
8
+
9
+ class EndpointHandler():
10
+ def __init__(self, path=""):
11
+ self.model = BlipForConditionalGeneration.from_pretrained("quadranttechnologies/qhub-blip-image-captioning-finetuned").to(device)
12
+ self.processor = BlipProcessor.from_pretrained("quadranttechnologies/qhub-blip-image-captioning-finetuned")
13
+ self.model.eval()
14
+ self.model = self.model.to(device).to(device)
15
+
16
+ def __call__(self, data: Any) -> Dict[str, Any]:
17
+ """
18
+ Args:
19
+ data (:obj:):
20
+ includes the input data and the parameters for the inference.
21
+ Return:
22
+ A :obj:`dict`:. The object returned should be a dict of one list like {"descriptions": ["Description of the image"]} containing :
23
+ - "description": A string corresponding to the generated description.
24
+ """
25
+
26
+ inputs = data.pop("inputs", data)
27
+ parameters = data.pop("parameters", {})
28
+
29
+ raw_images = [Image.open(BytesIO(_img)) for _img in inputs]
30
+
31
+ processed_image = self.processor(images=raw_images, return_tensors="pt")
32
+ processed_image["pixel_values"] = processed_image["pixel_values"].to(device)
33
+ processed_image = {**processed_image, **parameters}
34
+
35
+ with torch.no_grad():
36
+ out = self.model.generate(
37
+ **processed_image
38
+ )
39
+ description = self.processor.batch_decode(out, skip_special_tokens=True)
40
+
41
+ return {"description": description}
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pillow
2
+ transformers