# -*- coding: utf-8 -*- # # Developed by Haozhe Xie # References: # - https://github.com/xiumingzhang/GenRe-ShapeHD import cv2 # import matplotlib.pyplot as plt # import matplotlib.patches as patches import numpy as np import os import random import torch class Compose(object): """ Composes several transforms together. For example: >>> transforms.Compose([ >>> transforms.RandomBackground(), >>> transforms.CenterCrop(127, 127, 3), >>> ]) """ def __init__(self, transforms): self.transforms = transforms def __call__(self, rendering_images, bounding_box=None): for t in self.transforms: if t.__class__.__name__ == 'RandomCrop' or t.__class__.__name__ == 'CenterCrop': rendering_images = t(rendering_images, bounding_box) else: rendering_images = t(rendering_images) return rendering_images class ToTensor(object): """ Convert a PIL Image or numpy.ndarray to tensor. Converts a PIL Image or numpy.ndarray (H x W x C) in the range [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]. """ def __call__(self, rendering_images): assert (isinstance(rendering_images, np.ndarray)) array = np.transpose(rendering_images, (0, 3, 1, 2)) # handle numpy array tensor = torch.from_numpy(array) # put it from HWC to CHW format return tensor.float() class Normalize(object): def __init__(self, mean, std): self.mean = mean self.std = std def __call__(self, rendering_images): assert (isinstance(rendering_images, np.ndarray)) rendering_images -= self.mean rendering_images /= self.std return rendering_images class RandomPermuteRGB(object): def __call__(self, rendering_images): assert (isinstance(rendering_images, np.ndarray)) random_permutation = np.random.permutation(3) for img_idx, img in enumerate(rendering_images): rendering_images[img_idx] = img[..., random_permutation] return rendering_images class CenterCrop(object): def __init__(self, img_size, crop_size): """Set the height and weight before and after cropping""" self.img_size_h = img_size[0] self.img_size_w = img_size[1] self.crop_size_h = crop_size[0] self.crop_size_w = crop_size[1] def __call__(self, rendering_images, bounding_box=None): if len(rendering_images) == 0: return rendering_images crop_size_c = rendering_images[0].shape[2] processed_images = np.empty(shape=(0, self.img_size_h, self.img_size_w, crop_size_c)) for img_idx, img in enumerate(rendering_images): img_height, img_width, _ = img.shape if bounding_box is not None: bounding_box = [ bounding_box[0] * img_width, bounding_box[1] * img_height, bounding_box[2] * img_width, bounding_box[3] * img_height ] # yapf: disable # Calculate the size of bounding boxes bbox_width = bounding_box[2] - bounding_box[0] bbox_height = bounding_box[3] - bounding_box[1] bbox_x_mid = (bounding_box[2] + bounding_box[0]) * .5 bbox_y_mid = (bounding_box[3] + bounding_box[1]) * .5 # Make the crop area as a square square_object_size = max(bbox_width, bbox_height) x_left = int(bbox_x_mid - square_object_size * .5) x_right = int(bbox_x_mid + square_object_size * .5) y_top = int(bbox_y_mid - square_object_size * .5) y_bottom = int(bbox_y_mid + square_object_size * .5) # If the crop position is out of the image, fix it with padding pad_x_left = 0 if x_left < 0: pad_x_left = -x_left x_left = 0 pad_x_right = 0 if x_right >= img_width: pad_x_right = x_right - img_width + 1 x_right = img_width - 1 pad_y_top = 0 if y_top < 0: pad_y_top = -y_top y_top = 0 pad_y_bottom = 0 if y_bottom >= img_height: pad_y_bottom = y_bottom - img_height + 1 y_bottom = img_height - 1 # Padding the image and resize the image processed_image = np.pad(img[y_top:y_bottom + 1, x_left:x_right + 1], ((pad_y_top, pad_y_bottom), (pad_x_left, pad_x_right), (0, 0)), mode='edge') processed_image = cv2.resize(processed_image, (self.img_size_w, self.img_size_h)) else: if img_height > self.crop_size_h and img_width > self.crop_size_w: x_left = int(img_width - self.crop_size_w) // 2 x_right = int(x_left + self.crop_size_w) y_top = int(img_height - self.crop_size_h) // 2 y_bottom = int(y_top + self.crop_size_h) else: x_left = 0 x_right = img_width y_top = 0 y_bottom = img_height processed_image = cv2.resize(img[y_top:y_bottom, x_left:x_right], (self.img_size_w, self.img_size_h)) processed_images = np.append(processed_images, [processed_image], axis=0) # Debug # fig = plt.figure() # ax1 = fig.add_subplot(1, 2, 1) # ax1.imshow(img) # if not bounding_box is None: # rect = patches.Rectangle((bounding_box[0], bounding_box[1]), # bbox_width, # bbox_height, # linewidth=1, # edgecolor='r', # facecolor='none') # ax1.add_patch(rect) # ax2 = fig.add_subplot(1, 2, 2) # ax2.imshow(processed_image) # plt.show() return processed_images class RandomCrop(object): def __init__(self, img_size, crop_size): """Set the height and weight before and after cropping""" self.img_size_h = img_size[0] self.img_size_w = img_size[1] self.crop_size_h = crop_size[0] self.crop_size_w = crop_size[1] def __call__(self, rendering_images, bounding_box=None): if len(rendering_images) == 0: return rendering_images crop_size_c = rendering_images[0].shape[2] processed_images = np.empty(shape=(0, self.img_size_h, self.img_size_w, crop_size_c)) for img_idx, img in enumerate(rendering_images): img_height, img_width, _ = img.shape if bounding_box is not None: bounding_box = [ bounding_box[0] * img_width, bounding_box[1] * img_height, bounding_box[2] * img_width, bounding_box[3] * img_height ] # yapf: disable # Calculate the size of bounding boxes bbox_width = bounding_box[2] - bounding_box[0] bbox_height = bounding_box[3] - bounding_box[1] bbox_x_mid = (bounding_box[2] + bounding_box[0]) * .5 bbox_y_mid = (bounding_box[3] + bounding_box[1]) * .5 # Make the crop area as a square square_object_size = max(bbox_width, bbox_height) square_object_size = square_object_size * random.uniform(0.8, 1.2) x_left = int(bbox_x_mid - square_object_size * random.uniform(.4, .6)) x_right = int(bbox_x_mid + square_object_size * random.uniform(.4, .6)) y_top = int(bbox_y_mid - square_object_size * random.uniform(.4, .6)) y_bottom = int(bbox_y_mid + square_object_size * random.uniform(.4, .6)) # If the crop position is out of the image, fix it with padding pad_x_left = 0 if x_left < 0: pad_x_left = -x_left x_left = 0 pad_x_right = 0 if x_right >= img_width: pad_x_right = x_right - img_width + 1 x_right = img_width - 1 pad_y_top = 0 if y_top < 0: pad_y_top = -y_top y_top = 0 pad_y_bottom = 0 if y_bottom >= img_height: pad_y_bottom = y_bottom - img_height + 1 y_bottom = img_height - 1 # Padding the image and resize the image processed_image = np.pad(img[y_top:y_bottom + 1, x_left:x_right + 1], ((pad_y_top, pad_y_bottom), (pad_x_left, pad_x_right), (0, 0)), mode='edge') processed_image = cv2.resize(processed_image, (self.img_size_w, self.img_size_h)) else: if img_height > self.crop_size_h and img_width > self.crop_size_w: x_left = int(img_width - self.crop_size_w) // 2 x_right = int(x_left + self.crop_size_w) y_top = int(img_height - self.crop_size_h) // 2 y_bottom = int(y_top + self.crop_size_h) else: x_left = 0 x_right = img_width y_top = 0 y_bottom = img_height processed_image = cv2.resize(img[y_top:y_bottom, x_left:x_right], (self.img_size_w, self.img_size_h)) processed_images = np.append(processed_images, [processed_image], axis=0) return processed_images class RandomFlip(object): def __call__(self, rendering_images): assert (isinstance(rendering_images, np.ndarray)) for img_idx, img in enumerate(rendering_images): if random.randint(0, 1): rendering_images[img_idx] = np.fliplr(img) return rendering_images class ColorJitter(object): def __init__(self, brightness, contrast, saturation): self.brightness = brightness self.contrast = contrast self.saturation = saturation def __call__(self, rendering_images): if len(rendering_images) == 0: return rendering_images # Allocate new space for storing processed images img_height, img_width, img_channels = rendering_images[0].shape processed_images = np.empty(shape=(0, img_height, img_width, img_channels)) # Randomize the value of changing brightness, contrast, and saturation brightness = 1 + np.random.uniform(low=-self.brightness, high=self.brightness) contrast = 1 + np.random.uniform(low=-self.contrast, high=self.contrast) saturation = 1 + np.random.uniform(low=-self.saturation, high=self.saturation) # Randomize the order of changing brightness, contrast, and saturation attr_names = ['brightness', 'contrast', 'saturation'] attr_values = [brightness, contrast, saturation] # The value of changing attrs attr_indexes = np.array(range(len(attr_names))) # The order of changing attrs np.random.shuffle(attr_indexes) for img_idx, img in enumerate(rendering_images): processed_image = img for idx in attr_indexes: processed_image = self._adjust_image_attr(processed_image, attr_names[idx], attr_values[idx]) processed_images = np.append(processed_images, [processed_image], axis=0) # print('ColorJitter', np.mean(ori_img), np.mean(processed_image)) # fig = plt.figure(figsize=(8, 4)) # ax1 = fig.add_subplot(1, 2, 1) # ax1.imshow(ori_img) # ax2 = fig.add_subplot(1, 2, 2) # ax2.imshow(processed_image) # plt.show() return processed_images def _adjust_image_attr(self, img, attr_name, attr_value): """ Adjust or randomize the specified attribute of the image Args: img: Image in BGR format Numpy array of shape (h, w, 3) attr_name: Image attribute to adjust or randomize 'brightness', 'saturation', or 'contrast' attr_value: the alpha for blending is randomly drawn from [1 - d, 1 + d] Returns: Output image in BGR format Numpy array of the same shape as input """ gs = self._bgr_to_gray(img) if attr_name == 'contrast': img = self._alpha_blend(img, np.mean(gs[:, :, 0]), attr_value) elif attr_name == 'saturation': img = self._alpha_blend(img, gs, attr_value) elif attr_name == 'brightness': img = self._alpha_blend(img, 0, attr_value) else: raise NotImplementedError(attr_name) return img def _bgr_to_gray(self, bgr): """ Convert a RGB image to a grayscale image Differences from cv2.cvtColor(): 1. Input image can be float 2. Output image has three repeated channels, other than a single channel Args: bgr: Image in BGR format Numpy array of shape (h, w, 3) Returns: gs: Grayscale image Numpy array of the same shape as input; the three channels are the same """ ch = 0.114 * bgr[:, :, 0] + 0.587 * bgr[:, :, 1] + 0.299 * bgr[:, :, 2] gs = np.dstack((ch, ch, ch)) return gs def _alpha_blend(self, im1, im2, alpha): """ Alpha blending of two images or one image and a scalar Args: im1, im2: Image or scalar Numpy array and a scalar or two numpy arrays of the same shape alpha: Weight of im1 Float ranging usually from 0 to 1 Returns: im_blend: Blended image -- alpha * im1 + (1 - alpha) * im2 Numpy array of the same shape as input image """ im_blend = alpha * im1 + (1 - alpha) * im2 return im_blend class RandomNoise(object): def __init__(self, noise_std, eigvals=(0.2175, 0.0188, 0.0045), eigvecs=((-0.5675, 0.7192, 0.4009), (-0.5808, -0.0045, -0.8140), (-0.5836, -0.6948, 0.4203))): self.noise_std = noise_std self.eigvals = np.array(eigvals) self.eigvecs = np.array(eigvecs) def __call__(self, rendering_images): alpha = np.random.normal(loc=0, scale=self.noise_std, size=3) noise_rgb = \ np.sum( np.multiply( np.multiply( self.eigvecs, np.tile(alpha, (3, 1)) ), np.tile(self.eigvals, (3, 1)) ), axis=1 ) # Allocate new space for storing processed images img_height, img_width, img_channels = rendering_images[0].shape assert (img_channels == 3), "Please use RandomBackground to normalize image channels" processed_images = np.empty(shape=(0, img_height, img_width, img_channels)) for img_idx, img in enumerate(rendering_images): processed_image = img[:, :, ::-1] # BGR -> RGB for i in range(img_channels): processed_image[:, :, i] += noise_rgb[i] processed_image = processed_image[:, :, ::-1] # RGB -> BGR processed_images = np.append(processed_images, [processed_image], axis=0) # from copy import deepcopy # ori_img = deepcopy(img) # print(noise_rgb, np.mean(processed_image), np.mean(ori_img)) # print('RandomNoise', np.mean(ori_img), np.mean(processed_image)) # fig = plt.figure(figsize=(8, 4)) # ax1 = fig.add_subplot(1, 2, 1) # ax1.imshow(ori_img) # ax2 = fig.add_subplot(1, 2, 2) # ax2.imshow(processed_image) # plt.show() return processed_images class RandomBackground(object): def __init__(self, random_bg_color_range, random_bg_folder_path=None): self.random_bg_color_range = random_bg_color_range self.random_bg_files = [] if random_bg_folder_path is not None: self.random_bg_files = os.listdir(random_bg_folder_path) self.random_bg_files = [os.path.join(random_bg_folder_path, rbf) for rbf in self.random_bg_files] def __call__(self, rendering_images): if len(rendering_images) == 0: return rendering_images img_height, img_width, img_channels = rendering_images[0].shape # If the image has the alpha channel, add the background if not img_channels == 4: return rendering_images # Generate random background r, g, b = np.array([ np.random.randint(self.random_bg_color_range[i][0], self.random_bg_color_range[i][1] + 1) for i in range(3) ]) / 255. random_bg = None if len(self.random_bg_files) > 0: random_bg_file_path = random.choice(self.random_bg_files) random_bg = cv2.imread(random_bg_file_path).astype(np.float32) / 255. # Apply random background processed_images = np.empty(shape=(0, img_height, img_width, img_channels - 1)) for img_idx, img in enumerate(rendering_images): alpha = (np.expand_dims(img[:, :, 3], axis=2) == 0).astype(np.float32) img = img[:, :, :3] bg_color = random_bg if random.randint(0, 1) and random_bg is not None else np.array([[[r, g, b]]]) img = alpha * bg_color + (1 - alpha) * img processed_images = np.append(processed_images, [img], axis=0) return processed_images