|
|
|
import math
|
|
import numpy as np
|
|
from enum import IntEnum, unique
|
|
from typing import List, Tuple, Union
|
|
import torch
|
|
from torch import device
|
|
|
|
_RawBoxType = Union[List[float], Tuple[float, ...], torch.Tensor, np.ndarray]
|
|
|
|
|
|
@unique
|
|
class BoxMode(IntEnum):
|
|
"""
|
|
Enum of different ways to represent a box.
|
|
"""
|
|
|
|
XYXY_ABS = 0
|
|
"""
|
|
(x0, y0, x1, y1) in absolute floating points coordinates.
|
|
The coordinates in range [0, width or height].
|
|
"""
|
|
XYWH_ABS = 1
|
|
"""
|
|
(x0, y0, w, h) in absolute floating points coordinates.
|
|
"""
|
|
XYXY_REL = 2
|
|
"""
|
|
Not yet supported!
|
|
(x0, y0, x1, y1) in range [0, 1]. They are relative to the size of the image.
|
|
"""
|
|
XYWH_REL = 3
|
|
"""
|
|
Not yet supported!
|
|
(x0, y0, w, h) in range [0, 1]. They are relative to the size of the image.
|
|
"""
|
|
XYWHA_ABS = 4
|
|
"""
|
|
(xc, yc, w, h, a) in absolute floating points coordinates.
|
|
(xc, yc) is the center of the rotated box, and the angle a is in degrees ccw.
|
|
"""
|
|
|
|
@staticmethod
|
|
def convert(box: _RawBoxType, from_mode: "BoxMode", to_mode: "BoxMode") -> _RawBoxType:
|
|
"""
|
|
Args:
|
|
box: can be a k-tuple, k-list or an Nxk array/tensor, where k = 4 or 5
|
|
from_mode, to_mode (BoxMode)
|
|
|
|
Returns:
|
|
The converted box of the same type.
|
|
"""
|
|
if from_mode == to_mode:
|
|
return box
|
|
|
|
original_type = type(box)
|
|
is_numpy = isinstance(box, np.ndarray)
|
|
single_box = isinstance(box, (list, tuple))
|
|
if single_box:
|
|
assert len(box) == 4 or len(box) == 5, (
|
|
"BoxMode.convert takes either a k-tuple/list or an Nxk array/tensor,"
|
|
" where k == 4 or 5"
|
|
)
|
|
arr = torch.tensor(box)[None, :]
|
|
else:
|
|
|
|
if is_numpy:
|
|
arr = torch.from_numpy(np.asarray(box)).clone()
|
|
else:
|
|
arr = box.clone()
|
|
|
|
assert to_mode not in [BoxMode.XYXY_REL, BoxMode.XYWH_REL] and from_mode not in [
|
|
BoxMode.XYXY_REL,
|
|
BoxMode.XYWH_REL,
|
|
], "Relative mode not yet supported!"
|
|
|
|
if from_mode == BoxMode.XYWHA_ABS and to_mode == BoxMode.XYXY_ABS:
|
|
assert (
|
|
arr.shape[-1] == 5
|
|
), "The last dimension of input shape must be 5 for XYWHA format"
|
|
original_dtype = arr.dtype
|
|
arr = arr.double()
|
|
|
|
w = arr[:, 2]
|
|
h = arr[:, 3]
|
|
a = arr[:, 4]
|
|
c = torch.abs(torch.cos(a * math.pi / 180.0))
|
|
s = torch.abs(torch.sin(a * math.pi / 180.0))
|
|
|
|
new_w = c * w + s * h
|
|
new_h = c * h + s * w
|
|
|
|
|
|
arr[:, 0] -= new_w / 2.0
|
|
arr[:, 1] -= new_h / 2.0
|
|
|
|
arr[:, 2] = arr[:, 0] + new_w
|
|
arr[:, 3] = arr[:, 1] + new_h
|
|
|
|
arr = arr[:, :4].to(dtype=original_dtype)
|
|
elif from_mode == BoxMode.XYWH_ABS and to_mode == BoxMode.XYWHA_ABS:
|
|
original_dtype = arr.dtype
|
|
arr = arr.double()
|
|
arr[:, 0] += arr[:, 2] / 2.0
|
|
arr[:, 1] += arr[:, 3] / 2.0
|
|
angles = torch.zeros((arr.shape[0], 1), dtype=arr.dtype)
|
|
arr = torch.cat((arr, angles), axis=1).to(dtype=original_dtype)
|
|
else:
|
|
if to_mode == BoxMode.XYXY_ABS and from_mode == BoxMode.XYWH_ABS:
|
|
arr[:, 2] += arr[:, 0]
|
|
arr[:, 3] += arr[:, 1]
|
|
elif from_mode == BoxMode.XYXY_ABS and to_mode == BoxMode.XYWH_ABS:
|
|
arr[:, 2] -= arr[:, 0]
|
|
arr[:, 3] -= arr[:, 1]
|
|
else:
|
|
raise NotImplementedError(
|
|
"Conversion from BoxMode {} to {} is not supported yet".format(
|
|
from_mode, to_mode
|
|
)
|
|
)
|
|
|
|
if single_box:
|
|
return original_type(arr.flatten().tolist())
|
|
if is_numpy:
|
|
return arr.numpy()
|
|
else:
|
|
return arr
|
|
|
|
|
|
class Boxes:
|
|
"""
|
|
This structure stores a list of boxes as a Nx4 torch.Tensor.
|
|
It supports some common methods about boxes
|
|
(`area`, `clip`, `nonempty`, etc),
|
|
and also behaves like a Tensor
|
|
(support indexing, `to(device)`, `.device`, and iteration over all boxes)
|
|
|
|
Attributes:
|
|
tensor (torch.Tensor): float matrix of Nx4. Each row is (x1, y1, x2, y2).
|
|
"""
|
|
|
|
def __init__(self, tensor: torch.Tensor):
|
|
"""
|
|
Args:
|
|
tensor (Tensor[float]): a Nx4 matrix. Each row is (x1, y1, x2, y2).
|
|
"""
|
|
if not isinstance(tensor, torch.Tensor):
|
|
tensor = torch.as_tensor(tensor, dtype=torch.float32, device=torch.device("cpu"))
|
|
else:
|
|
tensor = tensor.to(torch.float32)
|
|
if tensor.numel() == 0:
|
|
|
|
|
|
tensor = tensor.reshape((-1, 4)).to(dtype=torch.float32)
|
|
assert tensor.dim() == 2 and tensor.size(-1) == 4, tensor.size()
|
|
|
|
self.tensor = tensor
|
|
|
|
def clone(self) -> "Boxes":
|
|
"""
|
|
Clone the Boxes.
|
|
|
|
Returns:
|
|
Boxes
|
|
"""
|
|
return Boxes(self.tensor.clone())
|
|
|
|
def to(self, device: torch.device):
|
|
|
|
return Boxes(self.tensor.to(device=device))
|
|
|
|
def area(self) -> torch.Tensor:
|
|
"""
|
|
Computes the area of all the boxes.
|
|
|
|
Returns:
|
|
torch.Tensor: a vector with areas of each box.
|
|
"""
|
|
box = self.tensor
|
|
area = (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1])
|
|
return area
|
|
|
|
def clip(self, box_size: Tuple[int, int]) -> None:
|
|
"""
|
|
Clip (in place) the boxes by limiting x coordinates to the range [0, width]
|
|
and y coordinates to the range [0, height].
|
|
|
|
Args:
|
|
box_size (height, width): The clipping box's size.
|
|
"""
|
|
assert torch.isfinite(self.tensor).all(), "Box tensor contains infinite or NaN!"
|
|
h, w = box_size
|
|
x1 = self.tensor[:, 0].clamp(min=0, max=w)
|
|
y1 = self.tensor[:, 1].clamp(min=0, max=h)
|
|
x2 = self.tensor[:, 2].clamp(min=0, max=w)
|
|
y2 = self.tensor[:, 3].clamp(min=0, max=h)
|
|
self.tensor = torch.stack((x1, y1, x2, y2), dim=-1)
|
|
|
|
def nonempty(self, threshold: float = 0.0) -> torch.Tensor:
|
|
"""
|
|
Find boxes that are non-empty.
|
|
A box is considered empty, if either of its side is no larger than threshold.
|
|
|
|
Returns:
|
|
Tensor:
|
|
a binary vector which represents whether each box is empty
|
|
(False) or non-empty (True).
|
|
"""
|
|
box = self.tensor
|
|
widths = box[:, 2] - box[:, 0]
|
|
heights = box[:, 3] - box[:, 1]
|
|
keep = (widths > threshold) & (heights > threshold)
|
|
return keep
|
|
|
|
def __getitem__(self, item) -> "Boxes":
|
|
"""
|
|
Args:
|
|
item: int, slice, or a BoolTensor
|
|
|
|
Returns:
|
|
Boxes: Create a new :class:`Boxes` by indexing.
|
|
|
|
The following usage are allowed:
|
|
|
|
1. `new_boxes = boxes[3]`: return a `Boxes` which contains only one box.
|
|
2. `new_boxes = boxes[2:10]`: return a slice of boxes.
|
|
3. `new_boxes = boxes[vector]`, where vector is a torch.BoolTensor
|
|
with `length = len(boxes)`. Nonzero elements in the vector will be selected.
|
|
|
|
Note that the returned Boxes might share storage with this Boxes,
|
|
subject to Pytorch's indexing semantics.
|
|
"""
|
|
if isinstance(item, int):
|
|
return Boxes(self.tensor[item].view(1, -1))
|
|
b = self.tensor[item]
|
|
assert b.dim() == 2, "Indexing on Boxes with {} failed to return a matrix!".format(item)
|
|
return Boxes(b)
|
|
|
|
def __len__(self) -> int:
|
|
return self.tensor.shape[0]
|
|
|
|
def __repr__(self) -> str:
|
|
return "Boxes(" + str(self.tensor) + ")"
|
|
|
|
def inside_box(self, box_size: Tuple[int, int], boundary_threshold: int = 0) -> torch.Tensor:
|
|
"""
|
|
Args:
|
|
box_size (height, width): Size of the reference box.
|
|
boundary_threshold (int): Boxes that extend beyond the reference box
|
|
boundary by more than boundary_threshold are considered "outside".
|
|
|
|
Returns:
|
|
a binary vector, indicating whether each box is inside the reference box.
|
|
"""
|
|
height, width = box_size
|
|
inds_inside = (
|
|
(self.tensor[..., 0] >= -boundary_threshold)
|
|
& (self.tensor[..., 1] >= -boundary_threshold)
|
|
& (self.tensor[..., 2] < width + boundary_threshold)
|
|
& (self.tensor[..., 3] < height + boundary_threshold)
|
|
)
|
|
return inds_inside
|
|
|
|
def get_centers(self) -> torch.Tensor:
|
|
"""
|
|
Returns:
|
|
The box centers in a Nx2 array of (x, y).
|
|
"""
|
|
return (self.tensor[:, :2] + self.tensor[:, 2:]) / 2
|
|
|
|
def scale(self, scale_x: float, scale_y: float) -> None:
|
|
"""
|
|
Scale the box with horizontal and vertical scaling factors
|
|
"""
|
|
self.tensor[:, 0::2] *= scale_x
|
|
self.tensor[:, 1::2] *= scale_y
|
|
|
|
@classmethod
|
|
def cat(cls, boxes_list: List["Boxes"]) -> "Boxes":
|
|
"""
|
|
Concatenates a list of Boxes into a single Boxes
|
|
|
|
Arguments:
|
|
boxes_list (list[Boxes])
|
|
|
|
Returns:
|
|
Boxes: the concatenated Boxes
|
|
"""
|
|
assert isinstance(boxes_list, (list, tuple))
|
|
if len(boxes_list) == 0:
|
|
return cls(torch.empty(0))
|
|
assert all([isinstance(box, Boxes) for box in boxes_list])
|
|
|
|
|
|
cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0))
|
|
return cat_boxes
|
|
|
|
@property
|
|
def device(self) -> device:
|
|
return self.tensor.device
|
|
|
|
|
|
|
|
@torch.jit.unused
|
|
def __iter__(self):
|
|
"""
|
|
Yield a box as a Tensor of shape (4,) at a time.
|
|
"""
|
|
yield from self.tensor
|
|
|
|
|
|
def pairwise_intersection(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
|
|
"""
|
|
Given two lists of boxes of size N and M,
|
|
compute the intersection area between __all__ N x M pairs of boxes.
|
|
The box order must be (xmin, ymin, xmax, ymax)
|
|
|
|
Args:
|
|
boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
|
|
|
|
Returns:
|
|
Tensor: intersection, sized [N,M].
|
|
"""
|
|
boxes1, boxes2 = boxes1.tensor, boxes2.tensor
|
|
width_height = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) - torch.max(
|
|
boxes1[:, None, :2], boxes2[:, :2]
|
|
)
|
|
|
|
width_height.clamp_(min=0)
|
|
intersection = width_height.prod(dim=2)
|
|
return intersection
|
|
|
|
|
|
|
|
|
|
def pairwise_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
|
|
"""
|
|
Given two lists of boxes of size N and M, compute the IoU
|
|
(intersection over union) between **all** N x M pairs of boxes.
|
|
The box order must be (xmin, ymin, xmax, ymax).
|
|
|
|
Args:
|
|
boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
|
|
|
|
Returns:
|
|
Tensor: IoU, sized [N,M].
|
|
"""
|
|
area1 = boxes1.area()
|
|
area2 = boxes2.area()
|
|
inter = pairwise_intersection(boxes1, boxes2)
|
|
|
|
|
|
iou = torch.where(
|
|
inter > 0,
|
|
inter / (area1[:, None] + area2 - inter),
|
|
torch.zeros(1, dtype=inter.dtype, device=inter.device),
|
|
)
|
|
return iou
|
|
|
|
|
|
def pairwise_ioa(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
|
|
"""
|
|
Similar to :func:`pariwise_iou` but compute the IoA (intersection over boxes2 area).
|
|
|
|
Args:
|
|
boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
|
|
|
|
Returns:
|
|
Tensor: IoA, sized [N,M].
|
|
"""
|
|
area2 = boxes2.area()
|
|
inter = pairwise_intersection(boxes1, boxes2)
|
|
|
|
|
|
ioa = torch.where(
|
|
inter > 0, inter / area2, torch.zeros(1, dtype=inter.dtype, device=inter.device)
|
|
)
|
|
return ioa
|
|
|
|
|
|
def pairwise_point_box_distance(points: torch.Tensor, boxes: Boxes):
|
|
"""
|
|
Pairwise distance between N points and M boxes. The distance between a
|
|
point and a box is represented by the distance from the point to 4 edges
|
|
of the box. Distances are all positive when the point is inside the box.
|
|
|
|
Args:
|
|
points: Nx2 coordinates. Each row is (x, y)
|
|
boxes: M boxes
|
|
|
|
Returns:
|
|
Tensor: distances of size (N, M, 4). The 4 values are distances from
|
|
the point to the left, top, right, bottom of the box.
|
|
"""
|
|
x, y = points.unsqueeze(dim=2).unbind(dim=1)
|
|
x0, y0, x1, y1 = boxes.tensor.unsqueeze(dim=0).unbind(dim=2)
|
|
return torch.stack([x - x0, y - y0, x1 - x, y1 - y], dim=2)
|
|
|
|
|
|
def matched_pairwise_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
|
|
"""
|
|
Compute pairwise intersection over union (IOU) of two sets of matched
|
|
boxes that have the same number of boxes.
|
|
Similar to :func:`pairwise_iou`, but computes only diagonal elements of the matrix.
|
|
|
|
Args:
|
|
boxes1 (Boxes): bounding boxes, sized [N,4].
|
|
boxes2 (Boxes): same length as boxes1
|
|
Returns:
|
|
Tensor: iou, sized [N].
|
|
"""
|
|
assert len(boxes1) == len(
|
|
boxes2
|
|
), "boxlists should have the same" "number of entries, got {}, {}".format(
|
|
len(boxes1), len(boxes2)
|
|
)
|
|
area1 = boxes1.area()
|
|
area2 = boxes2.area()
|
|
box1, box2 = boxes1.tensor, boxes2.tensor
|
|
lt = torch.max(box1[:, :2], box2[:, :2])
|
|
rb = torch.min(box1[:, 2:], box2[:, 2:])
|
|
wh = (rb - lt).clamp(min=0)
|
|
inter = wh[:, 0] * wh[:, 1]
|
|
iou = inter / (area1 + area2 - inter)
|
|
return iou
|
|
|