File size: 1,526 Bytes
bc05b03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import torch
from torchvision.transforms.functional import to_pil_image
from segment_anything import SamPredictor, sam_model_registry
from PIL import Image

class SegmentationModel:
    def __init__(self) -> None:
        pass
    def generate(self, image: torch.Tensor) -> Image.Image:
        pass

class SamSegmentationModel(SegmentationModel):
    def __init__(
            self, 
            model_type: str, 
            checkpoint_path: str, 
            device = torch.device("cpu"),
        ) -> None:
        super().__init__()
        sam = sam_model_registry[model_type](checkpoint=checkpoint_path)
        sam.to(device)
        self.device = device
        self.model = SamPredictor(sam)

    def generate(self, image: torch.Tensor) -> Image.Image:
        _, H, W = image.size()
        image = image.unsqueeze(0)
        self.model.set_torch_image(image, original_image_size=(H, W))
        center_point = [H / 2, W / 2]
        input_point = torch.tensor([[center_point]]).to(self.device)
        input_label = torch.tensor([[1]]).to(self.device)
        masks, scores, logits = self.model.predict_torch(
            point_coords=input_point,
            point_labels=input_label,
            boxes=None,
            multimask_output=True
        )
        masks = masks.squeeze(0)
        scores = scores.squeeze(0)
        bmask = masks[torch.argmax(scores).item()]
        mask_float = 1.0 - bmask.float()
        final = torch.stack([mask_float, mask_float, mask_float])
        return to_pil_image(final)