|
|
|
|
|
|
|
import torch
|
|
from torchvision.ops import boxes as box_ops
|
|
from torchvision.ops import nms
|
|
|
|
|
|
def batched_nms(
|
|
boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float
|
|
):
|
|
"""
|
|
Same as torchvision.ops.boxes.batched_nms, but with float().
|
|
"""
|
|
assert boxes.shape[-1] == 4
|
|
|
|
|
|
|
|
|
|
return box_ops.batched_nms(boxes.float(), scores, idxs, iou_threshold)
|
|
|
|
|
|
|
|
|
|
def nms_rotated(boxes: torch.Tensor, scores: torch.Tensor, iou_threshold: float):
|
|
"""
|
|
Performs non-maximum suppression (NMS) on the rotated boxes according
|
|
to their intersection-over-union (IoU).
|
|
|
|
Rotated NMS iteratively removes lower scoring rotated boxes which have an
|
|
IoU greater than iou_threshold with another (higher scoring) rotated box.
|
|
|
|
Note that RotatedBox (5, 3, 4, 2, -90) covers exactly the same region as
|
|
RotatedBox (5, 3, 4, 2, 90) does, and their IoU will be 1. However, they
|
|
can be representing completely different objects in certain tasks, e.g., OCR.
|
|
|
|
As for the question of whether rotated-NMS should treat them as faraway boxes
|
|
even though their IOU is 1, it depends on the application and/or ground truth annotation.
|
|
|
|
As an extreme example, consider a single character v and the square box around it.
|
|
|
|
If the angle is 0 degree, the object (text) would be read as 'v';
|
|
|
|
If the angle is 90 degrees, the object (text) would become '>';
|
|
|
|
If the angle is 180 degrees, the object (text) would become '^';
|
|
|
|
If the angle is 270/-90 degrees, the object (text) would become '<'
|
|
|
|
All of these cases have IoU of 1 to each other, and rotated NMS that only
|
|
uses IoU as criterion would only keep one of them with the highest score -
|
|
which, practically, still makes sense in most cases because typically
|
|
only one of theses orientations is the correct one. Also, it does not matter
|
|
as much if the box is only used to classify the object (instead of transcribing
|
|
them with a sequential OCR recognition model) later.
|
|
|
|
On the other hand, when we use IoU to filter proposals that are close to the
|
|
ground truth during training, we should definitely take the angle into account if
|
|
we know the ground truth is labeled with the strictly correct orientation (as in,
|
|
upside-down words are annotated with -180 degrees even though they can be covered
|
|
with a 0/90/-90 degree box, etc.)
|
|
|
|
The way the original dataset is annotated also matters. For example, if the dataset
|
|
is a 4-point polygon dataset that does not enforce ordering of vertices/orientation,
|
|
we can estimate a minimum rotated bounding box to this polygon, but there's no way
|
|
we can tell the correct angle with 100% confidence (as shown above, there could be 4 different
|
|
rotated boxes, with angles differed by 90 degrees to each other, covering the exactly
|
|
same region). In that case we have to just use IoU to determine the box
|
|
proximity (as many detection benchmarks (even for text) do) unless there're other
|
|
assumptions we can make (like width is always larger than height, or the object is not
|
|
rotated by more than 90 degrees CCW/CW, etc.)
|
|
|
|
In summary, not considering angles in rotated NMS seems to be a good option for now,
|
|
but we should be aware of its implications.
|
|
|
|
Args:
|
|
boxes (Tensor[N, 5]): Rotated boxes to perform NMS on. They are expected to be in
|
|
(x_center, y_center, width, height, angle_degrees) format.
|
|
scores (Tensor[N]): Scores for each one of the rotated boxes
|
|
iou_threshold (float): Discards all overlapping rotated boxes with IoU < iou_threshold
|
|
|
|
Returns:
|
|
keep (Tensor): int64 tensor with the indices of the elements that have been kept
|
|
by Rotated NMS, sorted in decreasing order of scores
|
|
"""
|
|
return torch.ops.detectron2.nms_rotated(boxes, scores, iou_threshold)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@torch.jit.script_if_tracing
|
|
def batched_nms_rotated(
|
|
boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float
|
|
):
|
|
"""
|
|
Performs non-maximum suppression in a batched fashion.
|
|
|
|
Each index value correspond to a category, and NMS
|
|
will not be applied between elements of different categories.
|
|
|
|
Args:
|
|
boxes (Tensor[N, 5]):
|
|
boxes where NMS will be performed. They
|
|
are expected to be in (x_ctr, y_ctr, width, height, angle_degrees) format
|
|
scores (Tensor[N]):
|
|
scores for each one of the boxes
|
|
idxs (Tensor[N]):
|
|
indices of the categories for each one of the boxes.
|
|
iou_threshold (float):
|
|
discards all overlapping boxes
|
|
with IoU < iou_threshold
|
|
|
|
Returns:
|
|
Tensor:
|
|
int64 tensor with the indices of the elements that have been kept
|
|
by NMS, sorted in decreasing order of scores
|
|
"""
|
|
assert boxes.shape[-1] == 5
|
|
|
|
if boxes.numel() == 0:
|
|
return torch.empty((0,), dtype=torch.int64, device=boxes.device)
|
|
boxes = boxes.float()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
max_coordinate = (
|
|
torch.max(boxes[:, 0], boxes[:, 1]) + torch.max(boxes[:, 2], boxes[:, 3]) / 2
|
|
).max()
|
|
min_coordinate = (
|
|
torch.min(boxes[:, 0], boxes[:, 1]) - torch.max(boxes[:, 2], boxes[:, 3]) / 2
|
|
).min()
|
|
offsets = idxs.to(boxes) * (max_coordinate - min_coordinate + 1)
|
|
boxes_for_nms = boxes.clone()
|
|
boxes_for_nms[:, :2] += offsets[:, None]
|
|
keep = nms_rotated(boxes_for_nms, scores, iou_threshold)
|
|
return keep
|
|
|