zhb10086 commited on
Commit
d1e29e2
·
verified ·
1 Parent(s): 931f82e

Upload 2 files

Browse files
20240901_104528.log.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
faster_rcnn_vmrn_r101_caffe_c4_1x_vmrd4683.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ norm_cfg = dict(type='BN', requires_grad=False)
2
+ model = dict(
3
+ type='FasterRCNNVMRN',
4
+ backbone=dict(
5
+ type='mmdet.ResNet',
6
+ depth=101,
7
+ num_stages=3,
8
+ strides=(1, 2, 2),
9
+ dilations=(1, 1, 1),
10
+ out_indices=(2, ),
11
+ frozen_stages=1,
12
+ norm_cfg=dict(type='BN', requires_grad=False),
13
+ norm_eval=True,
14
+ style='caffe',
15
+ init_cfg=dict(
16
+ type='Pretrained',
17
+ checkpoint='open-mmlab://detectron2/resnet101_caffe')),
18
+ rpn_head=dict(
19
+ type='mmdet.RPNHead',
20
+ in_channels=1024,
21
+ feat_channels=1024,
22
+ anchor_generator=dict(
23
+ type='AnchorGenerator',
24
+ scales=[8, 16, 32],
25
+ ratios=[0.33, 0.5, 1.0, 2.0, 3.0],
26
+ strides=[16]),
27
+ bbox_coder=dict(
28
+ type='DeltaXYWHBBoxCoder',
29
+ target_means=[0.0, 0.0, 0.0, 0.0],
30
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
31
+ loss_cls=dict(
32
+ type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
33
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0)),
34
+ roi_head=dict(
35
+ type='mmdet.StandardRoIHead',
36
+ shared_head=dict(
37
+ type='mmdet.ResLayer',
38
+ depth=50,
39
+ stage=3,
40
+ stride=1,
41
+ style='caffe',
42
+ norm_cfg=dict(type='BN', requires_grad=False),
43
+ norm_eval=True),
44
+ bbox_roi_extractor=dict(
45
+ type='mmdet.SingleRoIExtractor',
46
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
47
+ out_channels=1024,
48
+ featmap_strides=[16]),
49
+ bbox_head=dict(
50
+ type='mmdet.BBoxHead',
51
+ with_avg_pool=True,
52
+ roi_feat_size=7,
53
+ in_channels=2048,
54
+ num_classes=31,
55
+ bbox_coder=dict(
56
+ type='DeltaXYWHBBoxCoder',
57
+ target_means=[0.0, 0.0, 0.0, 0.0],
58
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
59
+ reg_class_agnostic=False,
60
+ loss_cls=dict(
61
+ type='mmdet.CrossEntropyLoss',
62
+ use_sigmoid=False,
63
+ loss_weight=1.0),
64
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0))),
65
+ vmrn_head=dict(
66
+ type='invigorate.PairedRoIHead',
67
+ shared_head=dict(
68
+ type='invigorate.PairedResLayer',
69
+ depth=50,
70
+ stage=3,
71
+ stride=1,
72
+ style='caffe',
73
+ norm_eval=False,
74
+ share_weights=False),
75
+ paired_roi_extractor=dict(
76
+ type='invigorate.VMRNPairedRoIExtractor',
77
+ roi_layer=dict(type='RoIPool', output_size=7),
78
+ out_channels=1024,
79
+ featmap_strides=[16]),
80
+ relation_head=dict(
81
+ type='invigorate.BBoxPairHead',
82
+ with_avg_pool=True,
83
+ roi_feat_size=7,
84
+ in_channels=2048,
85
+ num_relations=2,
86
+ loss_cls=dict(
87
+ type='mmdet.CrossEntropyLoss',
88
+ use_sigmoid=False,
89
+ loss_weight=1.0))),
90
+ train_cfg=dict(
91
+ rpn=dict(
92
+ assigner=dict(
93
+ type='MaxIoUAssigner',
94
+ pos_iou_thr=0.7,
95
+ neg_iou_thr=0.3,
96
+ min_pos_iou=0.3,
97
+ match_low_quality=True,
98
+ ignore_iof_thr=-1),
99
+ sampler=dict(
100
+ type='RandomSampler',
101
+ num=256,
102
+ pos_fraction=0.5,
103
+ neg_pos_ub=-1,
104
+ add_gt_as_proposals=False),
105
+ allowed_border=0,
106
+ pos_weight=-1,
107
+ debug=False),
108
+ rpn_proposal=dict(
109
+ nms_pre=12000,
110
+ max_per_img=2000,
111
+ nms=dict(type='nms', iou_threshold=0.7),
112
+ min_bbox_size=0),
113
+ rcnn=dict(
114
+ assigner=dict(
115
+ type='MaxIoUAssigner',
116
+ pos_iou_thr=0.5,
117
+ neg_iou_thr=0.5,
118
+ min_pos_iou=0.5,
119
+ match_low_quality=False,
120
+ ignore_iof_thr=-1),
121
+ sampler=dict(
122
+ type='RandomSampler',
123
+ num=256,
124
+ pos_fraction=0.25,
125
+ neg_pos_ub=-1,
126
+ add_gt_as_proposals=True),
127
+ pos_weight=-1,
128
+ debug=False),
129
+ vmrn=dict(
130
+ assigner=dict(
131
+ type='MaxIoUAssigner',
132
+ pos_iou_thr=0.7,
133
+ neg_iou_thr=0.5,
134
+ min_pos_iou=0.7,
135
+ match_low_quality=False,
136
+ ignore_iof_thr=-1),
137
+ relation_sampler=dict(
138
+ type='RandomRelationSampler',
139
+ num=32,
140
+ pos_fraction=0.5,
141
+ cls_ratio_ub=-1,
142
+ add_gt_as_proposals=True,
143
+ num_relation_cls=2),
144
+ pos_weight=-1,
145
+ online_data=True,
146
+ online_start_iteration=0)),
147
+ test_cfg=dict(
148
+ rpn=dict(
149
+ nms_pre=6000,
150
+ max_per_img=300,
151
+ nms=dict(type='nms', iou_threshold=0.7),
152
+ min_bbox_size=0),
153
+ rcnn=dict(
154
+ score_thr=0.05,
155
+ nms=dict(type='nms', iou_threshold=0.3),
156
+ max_per_img=100),
157
+ vmrn=dict(
158
+ bbox_score_thr=0.5, verbose_relation=False, average_scores=False)))
159
+ dataset_type = 'VMRDDataset'
160
+ data_root = 'data/vmrd/'
161
+ img_norm_cfg = dict(
162
+ mean=[123.675, 116.28, 103.53], std=[1.0, 1.0, 1.0], to_rgb=True)
163
+ train_pipeline = [
164
+ dict(type='LoadImageFromFile', to_float32=True),
165
+ dict(
166
+ type='LoadAnnotationsCustom',
167
+ keys=['gt_bboxes', 'gt_labels', 'gt_relmats']),
168
+ dict(type='RandomFlip', flip_ratio=0.5),
169
+ dict(type='PhotoMetricDistortion'),
170
+ dict(type='Expand', mean=[123.675, 116.28, 103.53]),
171
+ dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
172
+ dict(
173
+ type='Normalize',
174
+ mean=[123.675, 116.28, 103.53],
175
+ std=[1.0, 1.0, 1.0],
176
+ to_rgb=True),
177
+ dict(type='Pad', size_divisor=32),
178
+ dict(
179
+ type='DefaultFormatBundleCustom',
180
+ keys=['img', 'gt_bboxes', 'gt_labels', 'gt_relmats']),
181
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_relmats'])
182
+ ]
183
+ test_pipeline = [
184
+ dict(type='LoadImageFromFile'),
185
+ dict(
186
+ type='MultiScaleFlipAug',
187
+ img_scale=(1000, 600),
188
+ flip=False,
189
+ transforms=[
190
+ dict(type='Resize', keep_ratio=True),
191
+ dict(
192
+ type='Normalize',
193
+ mean=[123.675, 116.28, 103.53],
194
+ std=[1.0, 1.0, 1.0],
195
+ to_rgb=True),
196
+ dict(type='Pad', size_divisor=32),
197
+ dict(type='ImageToTensor', keys=['img']),
198
+ dict(type='Collect', keys=['img'])
199
+ ])
200
+ ]
201
+ data = dict(
202
+ samples_per_gpu=8,
203
+ workers_per_gpu=8,
204
+ train=dict(
205
+ type='RepeatDataset',
206
+ times=3,
207
+ dataset=dict(
208
+ type='VMRDDataset',
209
+ ann_file='data/vmrd/ImageSets/Main/trainval.txt',
210
+ img_prefix='data/vmrd/',
211
+ pipeline=[
212
+ dict(type='LoadImageFromFile', to_float32=True),
213
+ dict(
214
+ type='LoadAnnotationsCustom',
215
+ keys=['gt_bboxes', 'gt_labels', 'gt_relmats']),
216
+ dict(type='RandomFlip', flip_ratio=0.5),
217
+ dict(type='PhotoMetricDistortion'),
218
+ dict(type='Expand', mean=[123.675, 116.28, 103.53]),
219
+ dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
220
+ dict(
221
+ type='Normalize',
222
+ mean=[123.675, 116.28, 103.53],
223
+ std=[1.0, 1.0, 1.0],
224
+ to_rgb=True),
225
+ dict(type='Pad', size_divisor=32),
226
+ dict(
227
+ type='DefaultFormatBundleCustom',
228
+ keys=['img', 'gt_bboxes', 'gt_labels', 'gt_relmats']),
229
+ dict(
230
+ type='Collect',
231
+ keys=['img', 'gt_bboxes', 'gt_labels', 'gt_relmats'])
232
+ ])),
233
+ val=dict(
234
+ type='VMRDDataset',
235
+ ann_file='data/vmrd/ImageSets/Main/test.txt',
236
+ img_prefix='data/vmrd/',
237
+ pipeline=[
238
+ dict(type='LoadImageFromFile'),
239
+ dict(
240
+ type='MultiScaleFlipAug',
241
+ img_scale=(1000, 600),
242
+ flip=False,
243
+ transforms=[
244
+ dict(type='Resize', keep_ratio=True),
245
+ dict(
246
+ type='Normalize',
247
+ mean=[123.675, 116.28, 103.53],
248
+ std=[1.0, 1.0, 1.0],
249
+ to_rgb=True),
250
+ dict(type='Pad', size_divisor=32),
251
+ dict(type='ImageToTensor', keys=['img']),
252
+ dict(type='Collect', keys=['img'])
253
+ ])
254
+ ]),
255
+ test=dict(
256
+ type='VMRDDataset',
257
+ ann_file='data/vmrd/ImageSets/Main/test.txt',
258
+ img_prefix='data/vmrd/',
259
+ pipeline=[
260
+ dict(type='LoadImageFromFile'),
261
+ dict(
262
+ type='MultiScaleFlipAug',
263
+ img_scale=(1000, 600),
264
+ flip=False,
265
+ transforms=[
266
+ dict(type='Resize', keep_ratio=True),
267
+ dict(
268
+ type='Normalize',
269
+ mean=[123.675, 116.28, 103.53],
270
+ std=[1.0, 1.0, 1.0],
271
+ to_rgb=True),
272
+ dict(type='Pad', size_divisor=32),
273
+ dict(type='ImageToTensor', keys=['img']),
274
+ dict(type='Collect', keys=['img'])
275
+ ])
276
+ ]))
277
+ evaluation = dict(interval=1, metric=['mAP', 'ImgAcc'])
278
+ optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001)
279
+ optimizer_config = dict(grad_clip=dict(max_norm=100, norm_type=2))
280
+ lr_config = dict(
281
+ policy='step',
282
+ warmup='linear',
283
+ warmup_iters=500,
284
+ warmup_ratio=0.001,
285
+ step=[8])
286
+ runner = dict(type='EpochBasedRunner', max_epochs=20)
287
+ checkpoint_config = dict(interval=1, max_keep_ckpts=3)
288
+ log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
289
+ custom_hooks = [dict(type='NumClassCheckHook')]
290
+ dist_params = dict(backend='nccl')
291
+ log_level = 'INFO'
292
+ load_from = None
293
+ resume_from = None
294
+ workflow = [('train', 1)]
295
+ opencv_num_threads = 0
296
+ mp_start_method = 'fork'
297
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
298
+ mmdet = None
299
+ mmdet_root = '/data/home/hanbo/projects/alpha_vision/mmdetection/mmdet'
300
+ work_dir = './work_dirs/faster_rcnn_vmrn_r101_caffe_c4_1x_vmrd4683'
301
+ gpu_ids = range(0, 2)