File size: 9,553 Bytes

from typing import Dict, Tuple, Union

import cv2
import numpy as np
import torch
from PIL import Image
from PIL.Image import Image as PilImage
from torchvision import transforms
from transformers.image_processing_utils import BaseImageProcessor, BatchFeature
from transformers.image_utils import ImageInput


class RescaleT(object):
    def __init__(self, output_size: Union[int, Tuple[int, int]]) -> None:
        super().__init__()
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size

    def __call__(self, sample) -> Dict[str, np.ndarray]:
        image, label = sample["image"], sample["label"]

        h, w = image.shape[:2]

        if isinstance(self.output_size, int):
            if h > w:
                new_h, new_w = self.output_size * h / w, self.output_size
            else:
                new_h, new_w = self.output_size, self.output_size * w / h
        else:
            new_h, new_w = self.output_size

        new_h, new_w = int(new_h), int(new_w)

        # resize the image to new_h x new_w and convert image from range [0,255] to [0,1]
        # img = transform.resize(image,(new_h,new_w),mode='constant')
        # lbl = transform.resize(label,(new_h,new_w),mode='constant', order=0, preserve_range=True)

        # img = transform.resize(image, (self.output_size, self.output_size), mode='constant')
        img = (
            cv2.resize(
                image,
                (self.output_size, self.output_size),
                interpolation=cv2.INTER_AREA,
            )
            / 255.0
        )
        # lbl = transform.resize(label, (self.output_size, self.output_size),
        #                        mode='constant',
        #                        order=0,
        #                        preserve_range=True)
        lbl = cv2.resize(
            label, (self.output_size, self.output_size), interpolation=cv2.INTER_NEAREST
        )
        lbl = np.expand_dims(lbl, axis=-1)
        lbl = np.clip(lbl, np.min(label), np.max(label))

        return {"image": img, "label": lbl}


class ToTensorLab(object):
    """Convert ndarrays in sample to Tensors."""

    def __init__(self, flag: int = 0) -> None:
        self.flag = flag

    def __call__(self, sample):
        image, label = sample["image"], sample["label"]

        tmpLbl = np.zeros(label.shape)

        if np.max(label) < 1e-6:
            label = label
        else:
            label = label / np.max(label)

        # change the color space
        if self.flag == 2:  # with rgb and Lab colors
            tmpImg = np.zeros((image.shape[0], image.shape[1], 6))
            tmpImgt = np.zeros((image.shape[0], image.shape[1], 3))
            if image.shape[2] == 1:
                tmpImgt[:, :, 0] = image[:, :, 0]
                tmpImgt[:, :, 1] = image[:, :, 0]
                tmpImgt[:, :, 2] = image[:, :, 0]
            else:
                tmpImgt = image
            # tmpImgtl = color.rgb2lab(tmpImgt)
            tmpImgtl = cv2.cvtColor(tmpImgt, cv2.COLOR_RGB2LAB)

            # nomalize image to range [0,1]
            tmpImg[:, :, 0] = (tmpImgt[:, :, 0] - np.min(tmpImgt[:, :, 0])) / (
                np.max(tmpImgt[:, :, 0]) - np.min(tmpImgt[:, :, 0])
            )
            tmpImg[:, :, 1] = (tmpImgt[:, :, 1] - np.min(tmpImgt[:, :, 1])) / (
                np.max(tmpImgt[:, :, 1]) - np.min(tmpImgt[:, :, 1])
            )
            tmpImg[:, :, 2] = (tmpImgt[:, :, 2] - np.min(tmpImgt[:, :, 2])) / (
                np.max(tmpImgt[:, :, 2]) - np.min(tmpImgt[:, :, 2])
            )
            tmpImg[:, :, 3] = (tmpImgtl[:, :, 0] - np.min(tmpImgtl[:, :, 0])) / (
                np.max(tmpImgtl[:, :, 0]) - np.min(tmpImgtl[:, :, 0])
            )
            tmpImg[:, :, 4] = (tmpImgtl[:, :, 1] - np.min(tmpImgtl[:, :, 1])) / (
                np.max(tmpImgtl[:, :, 1]) - np.min(tmpImgtl[:, :, 1])
            )
            tmpImg[:, :, 5] = (tmpImgtl[:, :, 2] - np.min(tmpImgtl[:, :, 2])) / (
                np.max(tmpImgtl[:, :, 2]) - np.min(tmpImgtl[:, :, 2])
            )

            # tmpImg = tmpImg/(np.max(tmpImg)-np.min(tmpImg))

            tmpImg[:, :, 0] = (tmpImg[:, :, 0] - np.mean(tmpImg[:, :, 0])) / np.std(
                tmpImg[:, :, 0]
            )
            tmpImg[:, :, 1] = (tmpImg[:, :, 1] - np.mean(tmpImg[:, :, 1])) / np.std(
                tmpImg[:, :, 1]
            )
            tmpImg[:, :, 2] = (tmpImg[:, :, 2] - np.mean(tmpImg[:, :, 2])) / np.std(
                tmpImg[:, :, 2]
            )
            tmpImg[:, :, 3] = (tmpImg[:, :, 3] - np.mean(tmpImg[:, :, 3])) / np.std(
                tmpImg[:, :, 3]
            )
            tmpImg[:, :, 4] = (tmpImg[:, :, 4] - np.mean(tmpImg[:, :, 4])) / np.std(
                tmpImg[:, :, 4]
            )
            tmpImg[:, :, 5] = (tmpImg[:, :, 5] - np.mean(tmpImg[:, :, 5])) / np.std(
                tmpImg[:, :, 5]
            )

        elif self.flag == 1:  # with Lab color
            tmpImg = np.zeros((image.shape[0], image.shape[1], 3))

            if image.shape[2] == 1:
                tmpImg[:, :, 0] = image[:, :, 0]
                tmpImg[:, :, 1] = image[:, :, 0]
                tmpImg[:, :, 2] = image[:, :, 0]
            else:
                tmpImg = image

            # tmpImg = color.rgb2lab(tmpImg)
            print("tmpImg:", tmpImg.min(), tmpImg.max())
            exit()
            tmpImg = cv2.cvtColor(tmpImg, cv2.COLOR_RGB2LAB)

            # tmpImg = tmpImg/(np.max(tmpImg)-np.min(tmpImg))

            tmpImg[:, :, 0] = (tmpImg[:, :, 0] - np.min(tmpImg[:, :, 0])) / (
                np.max(tmpImg[:, :, 0]) - np.min(tmpImg[:, :, 0])
            )
            tmpImg[:, :, 1] = (tmpImg[:, :, 1] - np.min(tmpImg[:, :, 1])) / (
                np.max(tmpImg[:, :, 1]) - np.min(tmpImg[:, :, 1])
            )
            tmpImg[:, :, 2] = (tmpImg[:, :, 2] - np.min(tmpImg[:, :, 2])) / (
                np.max(tmpImg[:, :, 2]) - np.min(tmpImg[:, :, 2])
            )

            tmpImg[:, :, 0] = (tmpImg[:, :, 0] - np.mean(tmpImg[:, :, 0])) / np.std(
                tmpImg[:, :, 0]
            )
            tmpImg[:, :, 1] = (tmpImg[:, :, 1] - np.mean(tmpImg[:, :, 1])) / np.std(
                tmpImg[:, :, 1]
            )
            tmpImg[:, :, 2] = (tmpImg[:, :, 2] - np.mean(tmpImg[:, :, 2])) / np.std(
                tmpImg[:, :, 2]
            )

        else:  # with rgb color
            tmpImg = np.zeros((image.shape[0], image.shape[1], 3))
            image = image / np.max(image)
            if image.shape[2] == 1:
                tmpImg[:, :, 0] = (image[:, :, 0] - 0.485) / 0.229
                tmpImg[:, :, 1] = (image[:, :, 0] - 0.485) / 0.229
                tmpImg[:, :, 2] = (image[:, :, 0] - 0.485) / 0.229
            else:
                tmpImg[:, :, 0] = (image[:, :, 0] - 0.485) / 0.229
                tmpImg[:, :, 1] = (image[:, :, 1] - 0.456) / 0.224
                tmpImg[:, :, 2] = (image[:, :, 2] - 0.406) / 0.225

        tmpLbl[:, :, 0] = label[:, :, 0]

        # change the r,g,b to b,r,g from [0,255] to [0,1]
        # transforms.Normalize(mean = (0.485, 0.456, 0.406), std = (0.229, 0.224, 0.225))
        tmpImg = tmpImg.transpose((2, 0, 1))
        tmpLbl = label.transpose((2, 0, 1))

        return {"image": torch.from_numpy(tmpImg), "label": torch.from_numpy(tmpLbl)}


def apply_transform(
    data: Dict[str, np.ndarray], rescale_size: int, to_tensor_lab_flag: int
) -> Dict[str, torch.Tensor]:
    transform = transforms.Compose(
        [RescaleT(output_size=rescale_size), ToTensorLab(flag=to_tensor_lab_flag)]
    )
    return transform(data)  # type: ignore


class BASNetImageProcessor(BaseImageProcessor):
    model_input_names = ["pixel_values"]

    def __init__(
        self, rescale_size: int = 256, to_tensor_lab_flag: int = 0, **kwargs
    ) -> None:
        super().__init__(**kwargs)
        self.rescale_size = rescale_size
        self.to_tensor_lab_flag = to_tensor_lab_flag

    def preprocess(self, images: ImageInput, **kwargs) -> BatchFeature:
        if not isinstance(images, PilImage):
            raise ValueError(f"Expected PIL.Image, got {type(images)}")

        image_pil = images
        image_npy = np.array(image_pil, dtype=np.uint8)
        width, height = image_pil.size
        label_npy = np.zeros((height, width), dtype=np.uint8)

        assert image_npy.shape[-1] == 3
        output = apply_transform(
            {"image": image_npy, "label": label_npy},
            rescale_size=self.rescale_size,
            to_tensor_lab_flag=self.to_tensor_lab_flag,
        )
        image = output["image"]

        assert isinstance(image, torch.Tensor)

        return BatchFeature(
            data={"pixel_values": image.float().unsqueeze(dim=0)}, tensor_type="pt"
        )

    def postprocess(
        self, prediction: torch.Tensor, width: int, height: int
    ) -> PilImage:
        def _norm_prediction(d: torch.Tensor) -> torch.Tensor:
            ma, mi = torch.max(d), torch.min(d)

            # division while avoiding zero division
            dn = (d - mi) / ((ma - mi) + torch.finfo(torch.float32).eps)
            return dn

        prediction = _norm_prediction(prediction)
        prediction = prediction.squeeze()
        prediction = prediction * 255 + 0.5
        prediction = prediction.clamp(0, 255)

        prediction_np = prediction.cpu().numpy()
        image = Image.fromarray(prediction_np).convert("RGB")
        image = image.resize((width, height), resample=Image.Resampling.BILINEAR)
        return image