|
|
|
|
|
|
|
"""
|
|
See "Data Augmentation" tutorial for an overview of the system:
|
|
https://detectron2.readthedocs.io/tutorials/augmentation.html
|
|
"""
|
|
|
|
import numpy as np
|
|
import torch
|
|
import torch.nn.functional as F
|
|
from fvcore.transforms.transform import (
|
|
CropTransform,
|
|
HFlipTransform,
|
|
NoOpTransform,
|
|
Transform,
|
|
TransformList,
|
|
)
|
|
from PIL import Image
|
|
|
|
try:
|
|
import cv2
|
|
except ImportError:
|
|
|
|
pass
|
|
|
|
__all__ = [
|
|
"ExtentTransform",
|
|
"ResizeTransform",
|
|
"RotationTransform",
|
|
"ColorTransform",
|
|
"PILColorTransform",
|
|
]
|
|
|
|
|
|
class ExtentTransform(Transform):
|
|
"""
|
|
Extracts a subregion from the source image and scales it to the output size.
|
|
|
|
The fill color is used to map pixels from the source rect that fall outside
|
|
the source image.
|
|
|
|
See: https://pillow.readthedocs.io/en/latest/PIL.html#PIL.ImageTransform.ExtentTransform
|
|
"""
|
|
|
|
def __init__(self, src_rect, output_size, interp=Image.BILINEAR, fill=0):
|
|
"""
|
|
Args:
|
|
src_rect (x0, y0, x1, y1): src coordinates
|
|
output_size (h, w): dst image size
|
|
interp: PIL interpolation methods
|
|
fill: Fill color used when src_rect extends outside image
|
|
"""
|
|
super().__init__()
|
|
self._set_attributes(locals())
|
|
|
|
def apply_image(self, img, interp=None):
|
|
h, w = self.output_size
|
|
if len(img.shape) > 2 and img.shape[2] == 1:
|
|
pil_image = Image.fromarray(img[:, :, 0], mode="L")
|
|
else:
|
|
pil_image = Image.fromarray(img)
|
|
pil_image = pil_image.transform(
|
|
size=(w, h),
|
|
method=Image.EXTENT,
|
|
data=self.src_rect,
|
|
resample=interp if interp else self.interp,
|
|
fill=self.fill,
|
|
)
|
|
ret = np.asarray(pil_image)
|
|
if len(img.shape) > 2 and img.shape[2] == 1:
|
|
ret = np.expand_dims(ret, -1)
|
|
return ret
|
|
|
|
def apply_coords(self, coords):
|
|
|
|
|
|
h, w = self.output_size
|
|
x0, y0, x1, y1 = self.src_rect
|
|
new_coords = coords.astype(np.float32)
|
|
new_coords[:, 0] -= 0.5 * (x0 + x1)
|
|
new_coords[:, 1] -= 0.5 * (y0 + y1)
|
|
new_coords[:, 0] *= w / (x1 - x0)
|
|
new_coords[:, 1] *= h / (y1 - y0)
|
|
new_coords[:, 0] += 0.5 * w
|
|
new_coords[:, 1] += 0.5 * h
|
|
return new_coords
|
|
|
|
def apply_segmentation(self, segmentation):
|
|
segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
|
|
return segmentation
|
|
|
|
|
|
class ResizeTransform(Transform):
|
|
"""
|
|
Resize the image to a target size.
|
|
"""
|
|
|
|
def __init__(self, h, w, new_h, new_w, interp=None):
|
|
"""
|
|
Args:
|
|
h, w (int): original image size
|
|
new_h, new_w (int): new image size
|
|
interp: PIL interpolation methods, defaults to bilinear.
|
|
"""
|
|
|
|
super().__init__()
|
|
if interp is None:
|
|
interp = Image.BILINEAR
|
|
self._set_attributes(locals())
|
|
|
|
def apply_image(self, img, interp=None):
|
|
assert img.shape[:2] == (self.h, self.w)
|
|
assert len(img.shape) <= 4
|
|
interp_method = interp if interp is not None else self.interp
|
|
|
|
if img.dtype == np.uint8:
|
|
if len(img.shape) > 2 and img.shape[2] == 1:
|
|
pil_image = Image.fromarray(img[:, :, 0], mode="L")
|
|
else:
|
|
pil_image = Image.fromarray(img)
|
|
pil_image = pil_image.resize((self.new_w, self.new_h), interp_method)
|
|
ret = np.asarray(pil_image)
|
|
if len(img.shape) > 2 and img.shape[2] == 1:
|
|
ret = np.expand_dims(ret, -1)
|
|
else:
|
|
|
|
if any(x < 0 for x in img.strides):
|
|
img = np.ascontiguousarray(img)
|
|
img = torch.from_numpy(img)
|
|
shape = list(img.shape)
|
|
shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:]
|
|
img = img.view(shape_4d).permute(2, 3, 0, 1)
|
|
_PIL_RESIZE_TO_INTERPOLATE_MODE = {
|
|
Image.NEAREST: "nearest",
|
|
Image.BILINEAR: "bilinear",
|
|
Image.BICUBIC: "bicubic",
|
|
}
|
|
mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[interp_method]
|
|
align_corners = None if mode == "nearest" else False
|
|
img = F.interpolate(
|
|
img, (self.new_h, self.new_w), mode=mode, align_corners=align_corners
|
|
)
|
|
shape[:2] = (self.new_h, self.new_w)
|
|
ret = img.permute(2, 3, 0, 1).view(shape).numpy()
|
|
|
|
return ret
|
|
|
|
def apply_coords(self, coords):
|
|
coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w)
|
|
coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h)
|
|
return coords
|
|
|
|
def apply_segmentation(self, segmentation):
|
|
segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
|
|
return segmentation
|
|
|
|
def inverse(self):
|
|
return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp)
|
|
|
|
|
|
class RotationTransform(Transform):
|
|
"""
|
|
This method returns a copy of this image, rotated the given
|
|
number of degrees counter clockwise around its center.
|
|
"""
|
|
|
|
def __init__(self, h, w, angle, expand=True, center=None, interp=None):
|
|
"""
|
|
Args:
|
|
h, w (int): original image size
|
|
angle (float): degrees for rotation
|
|
expand (bool): choose if the image should be resized to fit the whole
|
|
rotated image (default), or simply cropped
|
|
center (tuple (width, height)): coordinates of the rotation center
|
|
if left to None, the center will be fit to the center of each image
|
|
center has no effect if expand=True because it only affects shifting
|
|
interp: cv2 interpolation method, default cv2.INTER_LINEAR
|
|
"""
|
|
super().__init__()
|
|
image_center = np.array((w / 2, h / 2))
|
|
if center is None:
|
|
center = image_center
|
|
if interp is None:
|
|
interp = cv2.INTER_LINEAR
|
|
abs_cos, abs_sin = (abs(np.cos(np.deg2rad(angle))), abs(np.sin(np.deg2rad(angle))))
|
|
if expand:
|
|
|
|
bound_w, bound_h = np.rint(
|
|
[h * abs_sin + w * abs_cos, h * abs_cos + w * abs_sin]
|
|
).astype(int)
|
|
else:
|
|
bound_w, bound_h = w, h
|
|
|
|
self._set_attributes(locals())
|
|
self.rm_coords = self.create_rotation_matrix()
|
|
|
|
self.rm_image = self.create_rotation_matrix(offset=-0.5)
|
|
|
|
def apply_image(self, img, interp=None):
|
|
"""
|
|
img should be a numpy array, formatted as Height * Width * Nchannels
|
|
"""
|
|
if len(img) == 0 or self.angle % 360 == 0:
|
|
return img
|
|
assert img.shape[:2] == (self.h, self.w)
|
|
interp = interp if interp is not None else self.interp
|
|
return cv2.warpAffine(img, self.rm_image, (self.bound_w, self.bound_h), flags=interp)
|
|
|
|
def apply_coords(self, coords):
|
|
"""
|
|
coords should be a N * 2 array-like, containing N couples of (x, y) points
|
|
"""
|
|
coords = np.asarray(coords, dtype=float)
|
|
if len(coords) == 0 or self.angle % 360 == 0:
|
|
return coords
|
|
return cv2.transform(coords[:, np.newaxis, :], self.rm_coords)[:, 0, :]
|
|
|
|
def apply_segmentation(self, segmentation):
|
|
segmentation = self.apply_image(segmentation, interp=cv2.INTER_NEAREST)
|
|
return segmentation
|
|
|
|
def create_rotation_matrix(self, offset=0):
|
|
center = (self.center[0] + offset, self.center[1] + offset)
|
|
rm = cv2.getRotationMatrix2D(tuple(center), self.angle, 1)
|
|
if self.expand:
|
|
|
|
|
|
rot_im_center = cv2.transform(self.image_center[None, None, :] + offset, rm)[0, 0, :]
|
|
new_center = np.array([self.bound_w / 2, self.bound_h / 2]) + offset - rot_im_center
|
|
|
|
rm[:, 2] += new_center
|
|
return rm
|
|
|
|
def inverse(self):
|
|
"""
|
|
The inverse is to rotate it back with expand, and crop to get the original shape.
|
|
"""
|
|
if not self.expand:
|
|
raise NotImplementedError()
|
|
rotation = RotationTransform(
|
|
self.bound_h, self.bound_w, -self.angle, True, None, self.interp
|
|
)
|
|
crop = CropTransform(
|
|
(rotation.bound_w - self.w) // 2, (rotation.bound_h - self.h) // 2, self.w, self.h
|
|
)
|
|
return TransformList([rotation, crop])
|
|
|
|
|
|
class ColorTransform(Transform):
|
|
"""
|
|
Generic wrapper for any photometric transforms.
|
|
These transformations should only affect the color space and
|
|
not the coordinate space of the image (e.g. annotation
|
|
coordinates such as bounding boxes should not be changed)
|
|
"""
|
|
|
|
def __init__(self, op):
|
|
"""
|
|
Args:
|
|
op (Callable): operation to be applied to the image,
|
|
which takes in an ndarray and returns an ndarray.
|
|
"""
|
|
if not callable(op):
|
|
raise ValueError("op parameter should be callable")
|
|
super().__init__()
|
|
self._set_attributes(locals())
|
|
|
|
def apply_image(self, img):
|
|
return self.op(img)
|
|
|
|
def apply_coords(self, coords):
|
|
return coords
|
|
|
|
def inverse(self):
|
|
return NoOpTransform()
|
|
|
|
def apply_segmentation(self, segmentation):
|
|
return segmentation
|
|
|
|
|
|
class PILColorTransform(ColorTransform):
|
|
"""
|
|
Generic wrapper for PIL Photometric image transforms,
|
|
which affect the color space and not the coordinate
|
|
space of the image
|
|
"""
|
|
|
|
def __init__(self, op):
|
|
"""
|
|
Args:
|
|
op (Callable): operation to be applied to the image,
|
|
which takes in a PIL Image and returns a transformed
|
|
PIL Image.
|
|
For reference on possible operations see:
|
|
- https://pillow.readthedocs.io/en/stable/
|
|
"""
|
|
if not callable(op):
|
|
raise ValueError("op parameter should be callable")
|
|
super().__init__(op)
|
|
|
|
def apply_image(self, img):
|
|
img = Image.fromarray(img)
|
|
return np.asarray(super().apply_image(img))
|
|
|
|
|
|
def HFlip_rotated_box(transform, rotated_boxes):
|
|
"""
|
|
Apply the horizontal flip transform on rotated boxes.
|
|
|
|
Args:
|
|
rotated_boxes (ndarray): Nx5 floating point array of
|
|
(x_center, y_center, width, height, angle_degrees) format
|
|
in absolute coordinates.
|
|
"""
|
|
|
|
rotated_boxes[:, 0] = transform.width - rotated_boxes[:, 0]
|
|
|
|
rotated_boxes[:, 4] = -rotated_boxes[:, 4]
|
|
return rotated_boxes
|
|
|
|
|
|
def Resize_rotated_box(transform, rotated_boxes):
|
|
"""
|
|
Apply the resizing transform on rotated boxes. For details of how these (approximation)
|
|
formulas are derived, please refer to :meth:`RotatedBoxes.scale`.
|
|
|
|
Args:
|
|
rotated_boxes (ndarray): Nx5 floating point array of
|
|
(x_center, y_center, width, height, angle_degrees) format
|
|
in absolute coordinates.
|
|
"""
|
|
scale_factor_x = transform.new_w * 1.0 / transform.w
|
|
scale_factor_y = transform.new_h * 1.0 / transform.h
|
|
rotated_boxes[:, 0] *= scale_factor_x
|
|
rotated_boxes[:, 1] *= scale_factor_y
|
|
theta = rotated_boxes[:, 4] * np.pi / 180.0
|
|
c = np.cos(theta)
|
|
s = np.sin(theta)
|
|
rotated_boxes[:, 2] *= np.sqrt(np.square(scale_factor_x * c) + np.square(scale_factor_y * s))
|
|
rotated_boxes[:, 3] *= np.sqrt(np.square(scale_factor_x * s) + np.square(scale_factor_y * c))
|
|
rotated_boxes[:, 4] = np.arctan2(scale_factor_x * s, scale_factor_y * c) * 180 / np.pi
|
|
|
|
return rotated_boxes
|
|
|
|
|
|
HFlipTransform.register_type("rotated_box", HFlip_rotated_box)
|
|
ResizeTransform.register_type("rotated_box", Resize_rotated_box)
|
|
|
|
|
|
NoOpTransform.register_type("rotated_box", lambda t, x: x)
|
|
|