Swin-Transformer-Object-Detection
/
configs
/centripetalnet
/centripetalnet_hourglass104_mstest_16x6_210e_coco.py
_base_ = [ | |
'../_base_/default_runtime.py', '../_base_/datasets/coco_detection.py' | |
] | |
# model settings | |
model = dict( | |
type='CornerNet', | |
backbone=dict( | |
type='HourglassNet', | |
downsample_times=5, | |
num_stacks=2, | |
stage_channels=[256, 256, 384, 384, 384, 512], | |
stage_blocks=[2, 2, 2, 2, 2, 4], | |
norm_cfg=dict(type='BN', requires_grad=True)), | |
neck=None, | |
bbox_head=dict( | |
type='CentripetalHead', | |
num_classes=80, | |
in_channels=256, | |
num_feat_levels=2, | |
corner_emb_channels=0, | |
loss_heatmap=dict( | |
type='GaussianFocalLoss', alpha=2.0, gamma=4.0, loss_weight=1), | |
loss_offset=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1), | |
loss_guiding_shift=dict( | |
type='SmoothL1Loss', beta=1.0, loss_weight=0.05), | |
loss_centripetal_shift=dict( | |
type='SmoothL1Loss', beta=1.0, loss_weight=1)), | |
# training and testing settings | |
train_cfg=None, | |
test_cfg=dict( | |
corner_topk=100, | |
local_maximum_kernel=3, | |
distance_threshold=0.5, | |
score_thr=0.05, | |
max_per_img=100, | |
nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian'))) | |
# data settings | |
img_norm_cfg = dict( | |
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |
train_pipeline = [ | |
dict(type='LoadImageFromFile', to_float32=True), | |
dict(type='LoadAnnotations', with_bbox=True), | |
dict( | |
type='PhotoMetricDistortion', | |
brightness_delta=32, | |
contrast_range=(0.5, 1.5), | |
saturation_range=(0.5, 1.5), | |
hue_delta=18), | |
dict( | |
type='RandomCenterCropPad', | |
crop_size=(511, 511), | |
ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3), | |
test_mode=False, | |
test_pad_mode=None, | |
**img_norm_cfg), | |
dict(type='Resize', img_scale=(511, 511), keep_ratio=False), | |
dict(type='RandomFlip', flip_ratio=0.5), | |
dict(type='Normalize', **img_norm_cfg), | |
dict(type='DefaultFormatBundle'), | |
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |
] | |
test_pipeline = [ | |
dict(type='LoadImageFromFile', to_float32=True), | |
dict( | |
type='MultiScaleFlipAug', | |
scale_factor=1.0, | |
flip=True, | |
transforms=[ | |
dict(type='Resize'), | |
dict( | |
type='RandomCenterCropPad', | |
crop_size=None, | |
ratios=None, | |
border=None, | |
test_mode=True, | |
test_pad_mode=['logical_or', 127], | |
**img_norm_cfg), | |
dict(type='RandomFlip'), | |
dict(type='Normalize', **img_norm_cfg), | |
dict(type='ImageToTensor', keys=['img']), | |
dict( | |
type='Collect', | |
keys=['img'], | |
meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape', | |
'scale_factor', 'flip', 'img_norm_cfg', 'border')), | |
]) | |
] | |
data = dict( | |
samples_per_gpu=6, | |
workers_per_gpu=3, | |
train=dict(pipeline=train_pipeline), | |
val=dict(pipeline=test_pipeline), | |
test=dict(pipeline=test_pipeline)) | |
# optimizer | |
optimizer = dict(type='Adam', lr=0.0005) | |
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) | |
# learning policy | |
lr_config = dict( | |
policy='step', | |
warmup='linear', | |
warmup_iters=500, | |
warmup_ratio=1.0 / 3, | |
step=[190]) | |
runner = dict(type='EpochBasedRunner', max_epochs=210) | |