File size: 5,199 Bytes

e26e560

import mmcv
import torch

from mmdet.models.dense_heads import YOLACTHead, YOLACTProtonet, YOLACTSegmHead


def test_yolact_head_loss():
    """Tests yolact head losses when truth is empty and non-empty."""
    s = 550
    img_metas = [{
        'img_shape': (s, s, 3),
        'scale_factor': 1,
        'pad_shape': (s, s, 3)
    }]
    train_cfg = mmcv.Config(
        dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.5,
                neg_iou_thr=0.4,
                min_pos_iou=0.,
                ignore_iof_thr=-1,
                gt_max_assign_all=False),
            smoothl1_beta=1.,
            allowed_border=-1,
            pos_weight=-1,
            neg_pos_ratio=3,
            debug=False,
            min_gt_box_wh=[4.0, 4.0]))
    bbox_head = YOLACTHead(
        num_classes=80,
        in_channels=256,
        feat_channels=256,
        anchor_generator=dict(
            type='AnchorGenerator',
            octave_base_scale=3,
            scales_per_octave=1,
            base_sizes=[8, 16, 32, 64, 128],
            ratios=[0.5, 1.0, 2.0],
            strides=[550.0 / x for x in [69, 35, 18, 9, 5]],
            centers=[(550 * 0.5 / x, 550 * 0.5 / x)
                     for x in [69, 35, 18, 9, 5]]),
        bbox_coder=dict(
            type='DeltaXYWHBBoxCoder',
            target_means=[.0, .0, .0, .0],
            target_stds=[0.1, 0.1, 0.2, 0.2]),
        loss_cls=dict(
            type='CrossEntropyLoss',
            use_sigmoid=False,
            reduction='none',
            loss_weight=1.0),
        loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.5),
        num_head_convs=1,
        num_protos=32,
        use_ohem=True,
        train_cfg=train_cfg)
    segm_head = YOLACTSegmHead(
        in_channels=256,
        num_classes=80,
        loss_segm=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0))
    mask_head = YOLACTProtonet(
        num_classes=80,
        in_channels=256,
        num_protos=32,
        max_masks_to_train=100,
        loss_mask_weight=6.125)
    feat = [
        torch.rand(1, 256, feat_size, feat_size)
        for feat_size in [69, 35, 18, 9, 5]
    ]
    cls_score, bbox_pred, coeff_pred = bbox_head.forward(feat)
    # Test that empty ground truth encourages the network to predict background
    gt_bboxes = [torch.empty((0, 4))]
    gt_labels = [torch.LongTensor([])]
    gt_masks = [torch.empty((0, 550, 550))]
    gt_bboxes_ignore = None
    empty_gt_losses, sampling_results = bbox_head.loss(
        cls_score,
        bbox_pred,
        gt_bboxes,
        gt_labels,
        img_metas,
        gt_bboxes_ignore=gt_bboxes_ignore)
    # When there is no truth, the cls loss should be nonzero but there should
    # be no box loss.
    empty_cls_loss = sum(empty_gt_losses['loss_cls'])
    empty_box_loss = sum(empty_gt_losses['loss_bbox'])
    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'
    assert empty_box_loss.item() == 0, (
        'there should be no box loss when there are no true boxes')

    # Test segm head and mask head
    segm_head_outs = segm_head(feat[0])
    empty_segm_loss = segm_head.loss(segm_head_outs, gt_masks, gt_labels)
    mask_pred = mask_head(feat[0], coeff_pred, gt_bboxes, img_metas,
                          sampling_results)
    empty_mask_loss = mask_head.loss(mask_pred, gt_masks, gt_bboxes, img_metas,
                                     sampling_results)
    # When there is no truth, the segm and mask loss should be zero.
    empty_segm_loss = sum(empty_segm_loss['loss_segm'])
    empty_mask_loss = sum(empty_mask_loss['loss_mask'])
    assert empty_segm_loss.item() == 0, (
        'there should be no segm loss when there are no true boxes')
    assert empty_mask_loss == 0, (
        'there should be no mask loss when there are no true boxes')

    # When truth is non-empty then cls, box, mask, segm loss should be
    # nonzero for random inputs.
    gt_bboxes = [
        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
    ]
    gt_labels = [torch.LongTensor([2])]
    gt_masks = [(torch.rand((1, 550, 550)) > 0.5).float()]

    one_gt_losses, sampling_results = bbox_head.loss(
        cls_score,
        bbox_pred,
        gt_bboxes,
        gt_labels,
        img_metas,
        gt_bboxes_ignore=gt_bboxes_ignore)
    one_gt_cls_loss = sum(one_gt_losses['loss_cls'])
    one_gt_box_loss = sum(one_gt_losses['loss_bbox'])
    assert one_gt_cls_loss.item() > 0, 'cls loss should be non-zero'
    assert one_gt_box_loss.item() > 0, 'box loss should be non-zero'

    one_gt_segm_loss = segm_head.loss(segm_head_outs, gt_masks, gt_labels)
    mask_pred = mask_head(feat[0], coeff_pred, gt_bboxes, img_metas,
                          sampling_results)
    one_gt_mask_loss = mask_head.loss(mask_pred, gt_masks, gt_bboxes,
                                      img_metas, sampling_results)
    one_gt_segm_loss = sum(one_gt_segm_loss['loss_segm'])
    one_gt_mask_loss = sum(one_gt_mask_loss['loss_mask'])
    assert one_gt_segm_loss.item() > 0, 'segm loss should be non-zero'
    assert one_gt_mask_loss.item() > 0, 'mask loss should be non-zero'