File size: 757 Bytes
27880c4
cb816fd
e67cd9e
 
 
 
 
 
 
9953c0d
e67cd9e
 
 
03c3cc5
 
 
 
e67cd9e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
from PIL import Image
import io
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration

device = "cuda" if torch.cuda.is_available() else "cpu"

class ImageCaptioning:

    def __init__(self):
        self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
        self.model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)

    def get_caption(self, image_bytes):
        img = Image.open(io.BytesIO(image_bytes))
        img_tensors = self.processor(img, return_tensors="pt").to(device)
        output = self.model.generate(**img_tensors)
        caption = self.processor.batch_decode(output, skip_special_tokens=True)[0]
        return caption