Spaces:
CVPR
/

lkeab commited on
Commit
a13a033
1 Parent(s): cbe011a
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +5 -5
  2. app.py +84 -0
  3. configs/Base-RCNN-C4.yaml +18 -0
  4. configs/Base-RCNN-DilatedC5.yaml +31 -0
  5. configs/Base-RCNN-FPN.yaml +43 -0
  6. configs/Base-RetinaNet.yaml +25 -0
  7. configs/Cityscapes/mask_rcnn_R_50_FPN.yaml +27 -0
  8. configs/Cityscapes/mask_rcnn_R_50_FPN_4gpu.yaml +27 -0
  9. configs/Detectron1-Comparisons/README.md +84 -0
  10. configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml +17 -0
  11. configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml +27 -0
  12. configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml +20 -0
  13. configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml +19 -0
  14. configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x_finetune.yaml +19 -0
  15. configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml +19 -0
  16. configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml +23 -0
  17. configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml +22 -0
  18. configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml +22 -0
  19. configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml +26 -0
  20. configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml +12 -0
  21. configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml +15 -0
  22. configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml +36 -0
  23. configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml +10 -0
  24. configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml +8 -0
  25. configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml +11 -0
  26. configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5_4gpu.yaml +11 -0
  27. configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml +21 -0
  28. configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml +24 -0
  29. configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py +151 -0
  30. configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml +26 -0
  31. configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml +13 -0
  32. configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml +19 -0
  33. configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml +19 -0
  34. configs/Misc/semantic_R_50_FPN_1x.yaml +11 -0
  35. configs/Misc/torchvision_imagenet_R_50.py +150 -0
  36. configs/common/README.md +6 -0
  37. configs/common/coco_schedule.py +47 -0
  38. configs/common/data/coco.py +48 -0
  39. configs/common/data/coco_keypoint.py +13 -0
  40. configs/common/data/coco_panoptic_separated.py +26 -0
  41. configs/common/models/cascade_rcnn.py +36 -0
  42. configs/common/models/keypoint_rcnn_fpn.py +33 -0
  43. configs/common/models/mask_rcnn_c4.py +88 -0
  44. configs/common/models/mask_rcnn_fpn.py +93 -0
  45. configs/common/models/panoptic_fpn.py +20 -0
  46. configs/common/models/retinanet.py +52 -0
  47. configs/common/optim.py +15 -0
  48. configs/common/train.py +18 -0
  49. configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py +9 -0
  50. configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py +14 -0
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
  title: Transfiner
3
- emoji: 🌍
4
- colorFrom: gray
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 3.0.20
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Transfiner
3
+ emoji: 📊
4
+ colorFrom: red
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: 2.9.3
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
  ---
12
 
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #try:
2
+ # import detectron2
3
+ #except:
4
+ import os
5
+ os.system('pip install git+https://github.com/SysCV/transfiner.git')
6
+
7
+ from matplotlib.pyplot import axis
8
+ import gradio as gr
9
+ import requests
10
+ import numpy as np
11
+ from torch import nn
12
+ import requests
13
+
14
+ import torch
15
+
16
+ from detectron2 import model_zoo
17
+ from detectron2.engine import DefaultPredictor
18
+ from detectron2.config import get_cfg
19
+ from detectron2.utils.visualizer import Visualizer
20
+ from detectron2.data import MetadataCatalog
21
+
22
+
23
+ model_name='./configs/transfiner/mask_rcnn_R_101_FPN_3x_deform.yaml'
24
+
25
+
26
+ cfg = get_cfg()
27
+ # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
28
+ cfg.merge_from_file(model_name)
29
+ cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model
30
+ cfg.VIS_PERIOD = 100
31
+ # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as w ell
32
+ #cfg.MODEL.WEIGHTS = './output_3x_transfiner_r50.pth'
33
+ cfg.MODEL.WEIGHTS = './output_3x_transfiner_r101_deform.pth'
34
+
35
+ if not torch.cuda.is_available():
36
+ cfg.MODEL.DEVICE='cpu'
37
+
38
+ predictor = DefaultPredictor(cfg)
39
+
40
+
41
+ def inference(image):
42
+ width, height = image.size
43
+ if width > 1300:
44
+ ratio = float(height) / float(width)
45
+ width = 1300
46
+ height = int(ratio * width)
47
+ image = image.resize((width, height))
48
+
49
+ img = np.asarray(image)
50
+
51
+ #img = np.array(image)
52
+ outputs = predictor(img)
53
+
54
+ v = Visualizer(img, MetadataCatalog.get(cfg.DATASETS.TRAIN[0]))
55
+ out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
56
+
57
+ return out.get_image()
58
+
59
+
60
+
61
+ title = "Mask Transfiner [CVPR, 2022]"
62
+ description = "Demo for <a target='_blank' href='https://arxiv.org/abs/2111.13673'>Mask Transfiner for High-Quality Instance Segmentation, CVPR 2022</a> based on R50-FPN. To use it, simply upload your image, or click one of the examples to load them. Note that it runs in the <b>CPU environment</b> provided by Hugging Face so the processing speed may be slow."
63
+ article = "<p style='text-align: center'><a target='_blank' href='https://arxiv.org/abs/2111.13673'>Mask Transfiner for High-Quality Instance Segmentation, CVPR 2022</a> | <a target='_blank' href='https://github.com/SysCV/transfiner'>Mask Transfiner Github Code</a></p>"
64
+
65
+ gr.Interface(
66
+ inference,
67
+ [gr.inputs.Image(type="pil", label="Input")],
68
+ gr.outputs.Image(type="numpy", label="Output"),
69
+ title=title,
70
+ description=description,
71
+ article=article,
72
+ examples=[
73
+ ["demo/sample_imgs/000000131444.jpg"],
74
+ ["demo/sample_imgs/000000157365.jpg"],
75
+ ["demo/sample_imgs/000000176037.jpg"],
76
+ ["demo/sample_imgs/000000018737.jpg"],
77
+ ["demo/sample_imgs/000000224200.jpg"],
78
+ ["demo/sample_imgs/000000558073.jpg"],
79
+ ["demo/sample_imgs/000000404922.jpg"],
80
+ ["demo/sample_imgs/000000252776.jpg"],
81
+ ["demo/sample_imgs/000000482477.jpg"],
82
+ ["demo/sample_imgs/000000344909.jpg"]
83
+ ]).launch()
84
+
configs/Base-RCNN-C4.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MODEL:
2
+ META_ARCHITECTURE: "GeneralizedRCNN"
3
+ RPN:
4
+ PRE_NMS_TOPK_TEST: 6000
5
+ POST_NMS_TOPK_TEST: 1000
6
+ ROI_HEADS:
7
+ NAME: "Res5ROIHeads"
8
+ DATASETS:
9
+ TRAIN: ("coco_2017_train",)
10
+ TEST: ("coco_2017_val",)
11
+ SOLVER:
12
+ IMS_PER_BATCH: 16
13
+ BASE_LR: 0.02
14
+ STEPS: (60000, 80000)
15
+ MAX_ITER: 90000
16
+ INPUT:
17
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
18
+ VERSION: 2
configs/Base-RCNN-DilatedC5.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MODEL:
2
+ META_ARCHITECTURE: "GeneralizedRCNN"
3
+ RESNETS:
4
+ OUT_FEATURES: ["res5"]
5
+ RES5_DILATION: 2
6
+ RPN:
7
+ IN_FEATURES: ["res5"]
8
+ PRE_NMS_TOPK_TEST: 6000
9
+ POST_NMS_TOPK_TEST: 1000
10
+ ROI_HEADS:
11
+ NAME: "StandardROIHeads"
12
+ IN_FEATURES: ["res5"]
13
+ ROI_BOX_HEAD:
14
+ NAME: "FastRCNNConvFCHead"
15
+ NUM_FC: 2
16
+ POOLER_RESOLUTION: 7
17
+ ROI_MASK_HEAD:
18
+ NAME: "MaskRCNNConvUpsampleHead"
19
+ NUM_CONV: 4
20
+ POOLER_RESOLUTION: 14
21
+ DATASETS:
22
+ TRAIN: ("coco_2017_train",)
23
+ TEST: ("coco_2017_val",)
24
+ SOLVER:
25
+ IMS_PER_BATCH: 16
26
+ BASE_LR: 0.02
27
+ STEPS: (60000, 80000)
28
+ MAX_ITER: 90000
29
+ INPUT:
30
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
31
+ VERSION: 2
configs/Base-RCNN-FPN.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MODEL:
2
+ META_ARCHITECTURE: "GeneralizedRCNN"
3
+ BACKBONE:
4
+ NAME: "build_resnet_fpn_backbone"
5
+ RESNETS:
6
+ OUT_FEATURES: ["res2", "res3", "res4", "res5"]
7
+ FPN:
8
+ IN_FEATURES: ["res2", "res3", "res4", "res5"]
9
+ ANCHOR_GENERATOR:
10
+ SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
11
+ ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
12
+ RPN:
13
+ IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14
+ PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
15
+ PRE_NMS_TOPK_TEST: 1000 # Per FPN level
16
+ # Detectron1 uses 2000 proposals per-batch,
17
+ # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18
+ # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19
+ POST_NMS_TOPK_TRAIN: 1000
20
+ POST_NMS_TOPK_TEST: 1000
21
+ ROI_HEADS:
22
+ NAME: "StandardROIHeads"
23
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
24
+ ROI_BOX_HEAD:
25
+ NAME: "FastRCNNConvFCHead"
26
+ NUM_FC: 2
27
+ POOLER_RESOLUTION: 7
28
+ ROI_MASK_HEAD:
29
+ NAME: "MaskRCNNConvUpsampleHead"
30
+ NUM_CONV: 4
31
+ POOLER_RESOLUTION: 14
32
+ DATASETS:
33
+ TRAIN: ("coco_2017_train",)
34
+ #TEST: ("coco_2017_val",)
35
+ TEST: ("coco_2017_test-dev",)
36
+ SOLVER:
37
+ IMS_PER_BATCH: 16 #16
38
+ BASE_LR: 0.02
39
+ STEPS: (60000, 80000)
40
+ MAX_ITER: 90000
41
+ INPUT:
42
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
43
+ VERSION: 2
configs/Base-RetinaNet.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MODEL:
2
+ META_ARCHITECTURE: "RetinaNet"
3
+ BACKBONE:
4
+ NAME: "build_retinanet_resnet_fpn_backbone"
5
+ RESNETS:
6
+ OUT_FEATURES: ["res3", "res4", "res5"]
7
+ ANCHOR_GENERATOR:
8
+ SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
9
+ FPN:
10
+ IN_FEATURES: ["res3", "res4", "res5"]
11
+ RETINANET:
12
+ IOU_THRESHOLDS: [0.4, 0.5]
13
+ IOU_LABELS: [0, -1, 1]
14
+ SMOOTH_L1_LOSS_BETA: 0.0
15
+ DATASETS:
16
+ TRAIN: ("coco_2017_train",)
17
+ TEST: ("coco_2017_val",)
18
+ SOLVER:
19
+ IMS_PER_BATCH: 16
20
+ BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate
21
+ STEPS: (60000, 80000)
22
+ MAX_ITER: 90000
23
+ INPUT:
24
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
25
+ VERSION: 2
configs/Cityscapes/mask_rcnn_R_50_FPN.yaml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ # For better, more stable performance initialize from COCO
5
+ WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
6
+ MASK_ON: True
7
+ ROI_HEADS:
8
+ NUM_CLASSES: 8
9
+ # This is similar to the setting used in Mask R-CNN paper, Appendix A
10
+ # But there are some differences, e.g., we did not initialize the output
11
+ # layer using the corresponding classes from COCO
12
+ INPUT:
13
+ MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
14
+ MIN_SIZE_TRAIN_SAMPLING: "choice"
15
+ MIN_SIZE_TEST: 1024
16
+ MAX_SIZE_TRAIN: 2048
17
+ MAX_SIZE_TEST: 2048
18
+ DATASETS:
19
+ TRAIN: ("cityscapes_fine_instance_seg_train",)
20
+ TEST: ("cityscapes_fine_instance_seg_val",)
21
+ SOLVER:
22
+ BASE_LR: 0.01
23
+ STEPS: (18000,)
24
+ MAX_ITER: 24000
25
+ IMS_PER_BATCH: 8
26
+ TEST:
27
+ EVAL_PERIOD: 8000
configs/Cityscapes/mask_rcnn_R_50_FPN_4gpu.yaml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ # For better, more stable performance initialize from COCO
5
+ WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
6
+ MASK_ON: True
7
+ ROI_HEADS:
8
+ NUM_CLASSES: 8
9
+ # This is similar to the setting used in Mask R-CNN paper, Appendix A
10
+ # But there are some differences, e.g., we did not initialize the output
11
+ # layer using the corresponding classes from COCO
12
+ INPUT:
13
+ MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
14
+ MIN_SIZE_TRAIN_SAMPLING: "choice"
15
+ MIN_SIZE_TEST: 1024
16
+ MAX_SIZE_TRAIN: 2048
17
+ MAX_SIZE_TEST: 2048
18
+ DATASETS:
19
+ TRAIN: ("cityscapes_fine_instance_seg_train",)
20
+ TEST: ("cityscapes_fine_instance_seg_val",)
21
+ SOLVER:
22
+ BASE_LR: 0.005
23
+ STEPS: (36000,)
24
+ MAX_ITER: 48000
25
+ IMS_PER_BATCH: 4
26
+ TEST:
27
+ EVAL_PERIOD: 48000
configs/Detectron1-Comparisons/README.md ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Detectron2 model zoo's experimental settings and a few implementation details are different from Detectron.
3
+
4
+ The differences in implementation details are shared in
5
+ [Compatibility with Other Libraries](../../docs/notes/compatibility.md).
6
+
7
+ The differences in model zoo's experimental settings include:
8
+ * Use scale augmentation during training. This improves AP with lower training cost.
9
+ * Use L1 loss instead of smooth L1 loss for simplicity. This sometimes improves box AP but may
10
+ affect other AP.
11
+ * Use `POOLER_SAMPLING_RATIO=0` instead of 2. This does not significantly affect AP.
12
+ * Use `ROIAlignV2`. This does not significantly affect AP.
13
+
14
+ In this directory, we provide a few configs that __do not__ have the above changes.
15
+ They mimic Detectron's behavior as close as possible,
16
+ and provide a fair comparison of accuracy and speed against Detectron.
17
+
18
+ <!--
19
+ ./gen_html_table.py --config 'Detectron1-Comparisons/*.yaml' --name "Faster R-CNN" "Keypoint R-CNN" "Mask R-CNN" --fields lr_sched train_speed inference_speed mem box_AP mask_AP keypoint_AP --base-dir ../../../configs/Detectron1-Comparisons
20
+ -->
21
+
22
+
23
+ <table><tbody>
24
+ <!-- START TABLE -->
25
+ <!-- TABLE HEADER -->
26
+ <th valign="bottom">Name</th>
27
+ <th valign="bottom">lr<br/>sched</th>
28
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
29
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
30
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
31
+ <th valign="bottom">box<br/>AP</th>
32
+ <th valign="bottom">mask<br/>AP</th>
33
+ <th valign="bottom">kp.<br/>AP</th>
34
+ <th valign="bottom">model id</th>
35
+ <th valign="bottom">download</th>
36
+ <!-- TABLE BODY -->
37
+ <!-- ROW: faster_rcnn_R_50_FPN_noaug_1x -->
38
+ <tr><td align="left"><a href="faster_rcnn_R_50_FPN_noaug_1x.yaml">Faster R-CNN</a></td>
39
+ <td align="center">1x</td>
40
+ <td align="center">0.219</td>
41
+ <td align="center">0.038</td>
42
+ <td align="center">3.1</td>
43
+ <td align="center">36.9</td>
44
+ <td align="center"></td>
45
+ <td align="center"></td>
46
+ <td align="center">137781054</td>
47
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/model_final_7ab50c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/metrics.json">metrics</a></td>
48
+ </tr>
49
+ <!-- ROW: keypoint_rcnn_R_50_FPN_1x -->
50
+ <tr><td align="left"><a href="keypoint_rcnn_R_50_FPN_1x.yaml">Keypoint R-CNN</a></td>
51
+ <td align="center">1x</td>
52
+ <td align="center">0.313</td>
53
+ <td align="center">0.071</td>
54
+ <td align="center">5.0</td>
55
+ <td align="center">53.1</td>
56
+ <td align="center"></td>
57
+ <td align="center">64.2</td>
58
+ <td align="center">137781195</td>
59
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/model_final_cce136.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/metrics.json">metrics</a></td>
60
+ </tr>
61
+ <!-- ROW: mask_rcnn_R_50_FPN_noaug_1x -->
62
+ <tr><td align="left"><a href="mask_rcnn_R_50_FPN_noaug_1x.yaml">Mask R-CNN</a></td>
63
+ <td align="center">1x</td>
64
+ <td align="center">0.273</td>
65
+ <td align="center">0.043</td>
66
+ <td align="center">3.4</td>
67
+ <td align="center">37.8</td>
68
+ <td align="center">34.9</td>
69
+ <td align="center"></td>
70
+ <td align="center">137781281</td>
71
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/model_final_62ca52.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/metrics.json">metrics</a></td>
72
+ </tr>
73
+ </tbody></table>
74
+
75
+ ## Comparisons:
76
+
77
+ * Faster R-CNN: Detectron's AP is 36.7, similar to ours.
78
+ * Keypoint R-CNN: Detectron's AP is box 53.6, keypoint 64.2. Fixing a Detectron's
79
+ [bug](https://github.com/facebookresearch/Detectron/issues/459) lead to a drop in box AP, and can be
80
+ compensated back by some parameter tuning.
81
+ * Mask R-CNN: Detectron's AP is box 37.7, mask 33.9. We're 1 AP better in mask AP, due to more correct implementation.
82
+ See [this article](https://ppwwyyxx.com/blog/2021/Where-are-Pixels/) for details.
83
+
84
+ For speed comparison, see [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html).
configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ MASK_ON: False
5
+ RESNETS:
6
+ DEPTH: 50
7
+ # Detectron1 uses smooth L1 loss with some magic beta values.
8
+ # The defaults are changed to L1 loss in Detectron2.
9
+ RPN:
10
+ SMOOTH_L1_BETA: 0.1111
11
+ ROI_BOX_HEAD:
12
+ SMOOTH_L1_BETA: 1.0
13
+ POOLER_SAMPLING_RATIO: 2
14
+ POOLER_TYPE: "ROIAlign"
15
+ INPUT:
16
+ # no scale augmentation
17
+ MIN_SIZE_TRAIN: (800, )
configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ KEYPOINT_ON: True
5
+ RESNETS:
6
+ DEPTH: 50
7
+ ROI_HEADS:
8
+ NUM_CLASSES: 1
9
+ ROI_KEYPOINT_HEAD:
10
+ POOLER_RESOLUTION: 14
11
+ POOLER_SAMPLING_RATIO: 2
12
+ POOLER_TYPE: "ROIAlign"
13
+ # Detectron1 uses smooth L1 loss with some magic beta values.
14
+ # The defaults are changed to L1 loss in Detectron2.
15
+ ROI_BOX_HEAD:
16
+ SMOOTH_L1_BETA: 1.0
17
+ POOLER_SAMPLING_RATIO: 2
18
+ POOLER_TYPE: "ROIAlign"
19
+ RPN:
20
+ SMOOTH_L1_BETA: 0.1111
21
+ # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2
22
+ # 1000 proposals per-image is found to hurt box AP.
23
+ # Therefore we increase it to 1500 per-image.
24
+ POST_NMS_TOPK_TRAIN: 1500
25
+ DATASETS:
26
+ TRAIN: ("keypoints_coco_2017_train",)
27
+ TEST: ("keypoints_coco_2017_val",)
configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ MASK_ON: True
5
+ RESNETS:
6
+ DEPTH: 50
7
+ # Detectron1 uses smooth L1 loss with some magic beta values.
8
+ # The defaults are changed to L1 loss in Detectron2.
9
+ RPN:
10
+ SMOOTH_L1_BETA: 0.1111
11
+ ROI_BOX_HEAD:
12
+ SMOOTH_L1_BETA: 1.0
13
+ POOLER_SAMPLING_RATIO: 2
14
+ POOLER_TYPE: "ROIAlign"
15
+ ROI_MASK_HEAD:
16
+ POOLER_SAMPLING_RATIO: 2
17
+ POOLER_TYPE: "ROIAlign"
18
+ INPUT:
19
+ # no scale augmentation
20
+ MIN_SIZE_TRAIN: (800, )
configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4
+ MASK_ON: True
5
+ RESNETS:
6
+ DEPTH: 101
7
+ ROI_HEADS:
8
+ NUM_CLASSES: 1230
9
+ SCORE_THRESH_TEST: 0.0001
10
+ INPUT:
11
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12
+ DATASETS:
13
+ TRAIN: ("lvis_v0.5_train",)
14
+ TEST: ("lvis_v0.5_val",)
15
+ TEST:
16
+ DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
17
+ DATALOADER:
18
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
19
+ REPEAT_THRESHOLD: 0.001
configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x_finetune.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "./model_final_824ab5.pkl"
4
+ MASK_ON: True
5
+ RESNETS:
6
+ DEPTH: 101
7
+ ROI_HEADS:
8
+ NUM_CLASSES: 1230
9
+ SCORE_THRESH_TEST: 0.0001
10
+ INPUT:
11
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12
+ DATASETS:
13
+ TRAIN: ("lvis_v0.5_train",)
14
+ TEST: ("lvis_v0.5_val",)
15
+ TEST:
16
+ DETECTIONS_PER_IMAGE: 150 #300 # LVIS allows up to 300
17
+ DATALOADER:
18
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
19
+ REPEAT_THRESHOLD: 0.001
configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ MASK_ON: True
5
+ RESNETS:
6
+ DEPTH: 50
7
+ ROI_HEADS:
8
+ NUM_CLASSES: 1230
9
+ SCORE_THRESH_TEST: 0.0001
10
+ INPUT:
11
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12
+ DATASETS:
13
+ TRAIN: ("lvis_v0.5_train",)
14
+ TEST: ("lvis_v0.5_val",)
15
+ TEST:
16
+ DETECTIONS_PER_IMAGE: 150 # LVIS allows up to 300
17
+ DATALOADER:
18
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
19
+ REPEAT_THRESHOLD: 0.001
configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
4
+ PIXEL_STD: [57.375, 57.120, 58.395]
5
+ MASK_ON: True
6
+ RESNETS:
7
+ STRIDE_IN_1X1: False # this is a C2 model
8
+ NUM_GROUPS: 32
9
+ WIDTH_PER_GROUP: 8
10
+ DEPTH: 101
11
+ ROI_HEADS:
12
+ NUM_CLASSES: 1230
13
+ SCORE_THRESH_TEST: 0.0001
14
+ INPUT:
15
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
16
+ DATASETS:
17
+ TRAIN: ("lvis_v0.5_train",)
18
+ TEST: ("lvis_v0.5_val",)
19
+ TEST:
20
+ DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
21
+ DATALOADER:
22
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
23
+ REPEAT_THRESHOLD: 0.001
configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4
+ MASK_ON: True
5
+ RESNETS:
6
+ DEPTH: 101
7
+ ROI_HEADS:
8
+ NUM_CLASSES: 1203
9
+ SCORE_THRESH_TEST: 0.0001
10
+ INPUT:
11
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12
+ DATASETS:
13
+ TRAIN: ("lvis_v1_train",)
14
+ TEST: ("lvis_v1_val",)
15
+ TEST:
16
+ DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
17
+ SOLVER:
18
+ STEPS: (120000, 160000)
19
+ MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs
20
+ DATALOADER:
21
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
22
+ REPEAT_THRESHOLD: 0.001
configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ MASK_ON: True
5
+ RESNETS:
6
+ DEPTH: 50
7
+ ROI_HEADS:
8
+ NUM_CLASSES: 1203
9
+ SCORE_THRESH_TEST: 0.0001
10
+ INPUT:
11
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12
+ DATASETS:
13
+ TRAIN: ("lvis_v1_train",)
14
+ TEST: ("lvis_v1_val",)
15
+ TEST:
16
+ DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
17
+ SOLVER:
18
+ STEPS: (120000, 160000)
19
+ MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs
20
+ DATALOADER:
21
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
22
+ REPEAT_THRESHOLD: 0.001
configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
4
+ PIXEL_STD: [57.375, 57.120, 58.395]
5
+ MASK_ON: True
6
+ RESNETS:
7
+ STRIDE_IN_1X1: False # this is a C2 model
8
+ NUM_GROUPS: 32
9
+ WIDTH_PER_GROUP: 8
10
+ DEPTH: 101
11
+ ROI_HEADS:
12
+ NUM_CLASSES: 1203
13
+ SCORE_THRESH_TEST: 0.0001
14
+ INPUT:
15
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
16
+ DATASETS:
17
+ TRAIN: ("lvis_v1_train",)
18
+ TEST: ("lvis_v1_val",)
19
+ SOLVER:
20
+ STEPS: (120000, 160000)
21
+ MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs
22
+ TEST:
23
+ DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
24
+ DATALOADER:
25
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
26
+ REPEAT_THRESHOLD: 0.001
configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ MASK_ON: True
5
+ RESNETS:
6
+ DEPTH: 50
7
+ ROI_HEADS:
8
+ NAME: CascadeROIHeads
9
+ ROI_BOX_HEAD:
10
+ CLS_AGNOSTIC_BBOX_REG: True
11
+ RPN:
12
+ POST_NMS_TOPK_TRAIN: 2000
configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ MASK_ON: True
5
+ RESNETS:
6
+ DEPTH: 50
7
+ ROI_HEADS:
8
+ NAME: CascadeROIHeads
9
+ ROI_BOX_HEAD:
10
+ CLS_AGNOSTIC_BBOX_REG: True
11
+ RPN:
12
+ POST_NMS_TOPK_TRAIN: 2000
13
+ SOLVER:
14
+ STEPS: (210000, 250000)
15
+ MAX_ITER: 270000
configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ MASK_ON: True
4
+ WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
5
+ RESNETS:
6
+ STRIDE_IN_1X1: False # this is a C2 model
7
+ NUM_GROUPS: 32
8
+ WIDTH_PER_GROUP: 8
9
+ DEPTH: 152
10
+ DEFORM_ON_PER_STAGE: [False, True, True, True]
11
+ ROI_HEADS:
12
+ NAME: "CascadeROIHeads"
13
+ ROI_BOX_HEAD:
14
+ NAME: "FastRCNNConvFCHead"
15
+ NUM_CONV: 4
16
+ NUM_FC: 1
17
+ NORM: "GN"
18
+ CLS_AGNOSTIC_BBOX_REG: True
19
+ ROI_MASK_HEAD:
20
+ NUM_CONV: 8
21
+ NORM: "GN"
22
+ RPN:
23
+ POST_NMS_TOPK_TRAIN: 2000
24
+ SOLVER:
25
+ IMS_PER_BATCH: 128
26
+ STEPS: (35000, 45000)
27
+ MAX_ITER: 50000
28
+ BASE_LR: 0.16
29
+ INPUT:
30
+ MIN_SIZE_TRAIN: (640, 864)
31
+ MIN_SIZE_TRAIN_SAMPLING: "range"
32
+ MAX_SIZE_TRAIN: 1440
33
+ CROP:
34
+ ENABLED: True
35
+ TEST:
36
+ EVAL_PERIOD: 2500
configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ MASK_ON: True
5
+ RESNETS:
6
+ DEPTH: 50
7
+ ROI_BOX_HEAD:
8
+ CLS_AGNOSTIC_BBOX_REG: True
9
+ ROI_MASK_HEAD:
10
+ CLS_AGNOSTIC_MASK: True
configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ MASK_ON: True
5
+ RESNETS:
6
+ DEPTH: 50
7
+ DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
8
+ DEFORM_MODULATED: False
configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ MASK_ON: True
5
+ RESNETS:
6
+ DEPTH: 50
7
+ DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
8
+ DEFORM_MODULATED: False
9
+ SOLVER:
10
+ STEPS: (210000, 250000)
11
+ MAX_ITER: 270000
configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5_4gpu.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN-4gpu.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ MASK_ON: True
5
+ RESNETS:
6
+ DEPTH: 50
7
+ DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
8
+ DEFORM_MODULATED: False
9
+ SOLVER:
10
+ STEPS: (420000, 500000) # (210000, 250000)
11
+ MAX_ITER: 540000 # 270000
configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN"
4
+ MASK_ON: True
5
+ RESNETS:
6
+ DEPTH: 50
7
+ NORM: "GN"
8
+ STRIDE_IN_1X1: False
9
+ FPN:
10
+ NORM: "GN"
11
+ ROI_BOX_HEAD:
12
+ NAME: "FastRCNNConvFCHead"
13
+ NUM_CONV: 4
14
+ NUM_FC: 1
15
+ NORM: "GN"
16
+ ROI_MASK_HEAD:
17
+ NORM: "GN"
18
+ SOLVER:
19
+ # 3x schedule
20
+ STEPS: (210000, 250000)
21
+ MAX_ITER: 270000
configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ MASK_ON: True
5
+ RESNETS:
6
+ DEPTH: 50
7
+ NORM: "SyncBN"
8
+ STRIDE_IN_1X1: True
9
+ FPN:
10
+ NORM: "SyncBN"
11
+ ROI_BOX_HEAD:
12
+ NAME: "FastRCNNConvFCHead"
13
+ NUM_CONV: 4
14
+ NUM_FC: 1
15
+ NORM: "SyncBN"
16
+ ROI_MASK_HEAD:
17
+ NORM: "SyncBN"
18
+ SOLVER:
19
+ # 3x schedule
20
+ STEPS: (210000, 250000)
21
+ MAX_ITER: 270000
22
+ TEST:
23
+ PRECISE_BN:
24
+ ENABLED: True
configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # An example config to train a mmdetection model using detectron2.
2
+
3
+ from ..common.data.coco import dataloader
4
+ from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
5
+ from ..common.optim import SGD as optimizer
6
+ from ..common.train import train
7
+
8
+ from detectron2.modeling.mmdet_wrapper import MMDetDetector
9
+ from detectron2.config import LazyCall as L
10
+
11
+ model = L(MMDetDetector)(
12
+ detector=dict(
13
+ type="MaskRCNN",
14
+ pretrained="torchvision://resnet50",
15
+ backbone=dict(
16
+ type="ResNet",
17
+ depth=50,
18
+ num_stages=4,
19
+ out_indices=(0, 1, 2, 3),
20
+ frozen_stages=1,
21
+ norm_cfg=dict(type="BN", requires_grad=True),
22
+ norm_eval=True,
23
+ style="pytorch",
24
+ ),
25
+ neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5),
26
+ rpn_head=dict(
27
+ type="RPNHead",
28
+ in_channels=256,
29
+ feat_channels=256,
30
+ anchor_generator=dict(
31
+ type="AnchorGenerator",
32
+ scales=[8],
33
+ ratios=[0.5, 1.0, 2.0],
34
+ strides=[4, 8, 16, 32, 64],
35
+ ),
36
+ bbox_coder=dict(
37
+ type="DeltaXYWHBBoxCoder",
38
+ target_means=[0.0, 0.0, 0.0, 0.0],
39
+ target_stds=[1.0, 1.0, 1.0, 1.0],
40
+ ),
41
+ loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0),
42
+ loss_bbox=dict(type="L1Loss", loss_weight=1.0),
43
+ ),
44
+ roi_head=dict(
45
+ type="StandardRoIHead",
46
+ bbox_roi_extractor=dict(
47
+ type="SingleRoIExtractor",
48
+ roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0),
49
+ out_channels=256,
50
+ featmap_strides=[4, 8, 16, 32],
51
+ ),
52
+ bbox_head=dict(
53
+ type="Shared2FCBBoxHead",
54
+ in_channels=256,
55
+ fc_out_channels=1024,
56
+ roi_feat_size=7,
57
+ num_classes=80,
58
+ bbox_coder=dict(
59
+ type="DeltaXYWHBBoxCoder",
60
+ target_means=[0.0, 0.0, 0.0, 0.0],
61
+ target_stds=[0.1, 0.1, 0.2, 0.2],
62
+ ),
63
+ reg_class_agnostic=False,
64
+ loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
65
+ loss_bbox=dict(type="L1Loss", loss_weight=1.0),
66
+ ),
67
+ mask_roi_extractor=dict(
68
+ type="SingleRoIExtractor",
69
+ roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0),
70
+ out_channels=256,
71
+ featmap_strides=[4, 8, 16, 32],
72
+ ),
73
+ mask_head=dict(
74
+ type="FCNMaskHead",
75
+ num_convs=4,
76
+ in_channels=256,
77
+ conv_out_channels=256,
78
+ num_classes=80,
79
+ loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0),
80
+ ),
81
+ ),
82
+ # model training and testing settings
83
+ train_cfg=dict(
84
+ rpn=dict(
85
+ assigner=dict(
86
+ type="MaxIoUAssigner",
87
+ pos_iou_thr=0.7,
88
+ neg_iou_thr=0.3,
89
+ min_pos_iou=0.3,
90
+ match_low_quality=True,
91
+ ignore_iof_thr=-1,
92
+ ),
93
+ sampler=dict(
94
+ type="RandomSampler",
95
+ num=256,
96
+ pos_fraction=0.5,
97
+ neg_pos_ub=-1,
98
+ add_gt_as_proposals=False,
99
+ ),
100
+ allowed_border=-1,
101
+ pos_weight=-1,
102
+ debug=False,
103
+ ),
104
+ rpn_proposal=dict(
105
+ nms_pre=2000,
106
+ max_per_img=1000,
107
+ nms=dict(type="nms", iou_threshold=0.7),
108
+ min_bbox_size=0,
109
+ ),
110
+ rcnn=dict(
111
+ assigner=dict(
112
+ type="MaxIoUAssigner",
113
+ pos_iou_thr=0.5,
114
+ neg_iou_thr=0.5,
115
+ min_pos_iou=0.5,
116
+ match_low_quality=True,
117
+ ignore_iof_thr=-1,
118
+ ),
119
+ sampler=dict(
120
+ type="RandomSampler",
121
+ num=512,
122
+ pos_fraction=0.25,
123
+ neg_pos_ub=-1,
124
+ add_gt_as_proposals=True,
125
+ ),
126
+ mask_size=28,
127
+ pos_weight=-1,
128
+ debug=False,
129
+ ),
130
+ ),
131
+ test_cfg=dict(
132
+ rpn=dict(
133
+ nms_pre=1000,
134
+ max_per_img=1000,
135
+ nms=dict(type="nms", iou_threshold=0.7),
136
+ min_bbox_size=0,
137
+ ),
138
+ rcnn=dict(
139
+ score_thr=0.05,
140
+ nms=dict(type="nms", iou_threshold=0.5),
141
+ max_per_img=100,
142
+ mask_thr_binary=0.5,
143
+ ),
144
+ ),
145
+ ),
146
+ pixel_mean=[123.675, 116.280, 103.530],
147
+ pixel_std=[58.395, 57.120, 57.375],
148
+ )
149
+
150
+ dataloader.train.mapper.image_format = "RGB" # torchvision pretrained model
151
+ train.init_checkpoint = None # pretrained model is loaded inside backbone
configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # A large PanopticFPN for demo purposes.
2
+ # Use GN on backbone to support semantic seg.
3
+ # Use Cascade + Deform Conv to improve localization.
4
+ _BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml"
5
+ MODEL:
6
+ WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN"
7
+ RESNETS:
8
+ DEPTH: 101
9
+ NORM: "GN"
10
+ DEFORM_ON_PER_STAGE: [False, True, True, True]
11
+ STRIDE_IN_1X1: False
12
+ FPN:
13
+ NORM: "GN"
14
+ ROI_HEADS:
15
+ NAME: CascadeROIHeads
16
+ ROI_BOX_HEAD:
17
+ CLS_AGNOSTIC_BBOX_REG: True
18
+ ROI_MASK_HEAD:
19
+ NORM: "GN"
20
+ RPN:
21
+ POST_NMS_TOPK_TRAIN: 2000
22
+ SOLVER:
23
+ STEPS: (105000, 125000)
24
+ MAX_ITER: 135000
25
+ IMS_PER_BATCH: 32
26
+ BASE_LR: 0.04
configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
2
+ MODEL:
3
+ # Train from random initialization.
4
+ WEIGHTS: ""
5
+ # It makes sense to divide by STD when training from scratch
6
+ # But it seems to make no difference on the results and C2's models didn't do this.
7
+ # So we keep things consistent with C2.
8
+ # PIXEL_STD: [57.375, 57.12, 58.395]
9
+ MASK_ON: True
10
+ BACKBONE:
11
+ FREEZE_AT: 0
12
+ # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
13
+ # to learn what you need for training from scratch.
configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
2
+ MODEL:
3
+ PIXEL_STD: [57.375, 57.12, 58.395]
4
+ WEIGHTS: ""
5
+ MASK_ON: True
6
+ RESNETS:
7
+ STRIDE_IN_1X1: False
8
+ BACKBONE:
9
+ FREEZE_AT: 0
10
+ SOLVER:
11
+ # 9x schedule
12
+ IMS_PER_BATCH: 64 # 4x the standard
13
+ STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k
14
+ MAX_ITER: 202500 # 90k * 9 / 4
15
+ BASE_LR: 0.08
16
+ TEST:
17
+ EVAL_PERIOD: 2500
18
+ # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
19
+ # to learn what you need for training from scratch.
configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml"
2
+ MODEL:
3
+ PIXEL_STD: [57.375, 57.12, 58.395]
4
+ WEIGHTS: ""
5
+ MASK_ON: True
6
+ RESNETS:
7
+ STRIDE_IN_1X1: False
8
+ BACKBONE:
9
+ FREEZE_AT: 0
10
+ SOLVER:
11
+ # 9x schedule
12
+ IMS_PER_BATCH: 64 # 4x the standard
13
+ STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k
14
+ MAX_ITER: 202500 # 90k * 9 / 4
15
+ BASE_LR: 0.08
16
+ TEST:
17
+ EVAL_PERIOD: 2500
18
+ # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
19
+ # to learn what you need for training from scratch.
configs/Misc/semantic_R_50_FPN_1x.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ META_ARCHITECTURE: "SemanticSegmentor"
4
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
5
+ RESNETS:
6
+ DEPTH: 50
7
+ DATASETS:
8
+ TRAIN: ("coco_2017_train_panoptic_stuffonly",)
9
+ TEST: ("coco_2017_val_panoptic_stuffonly",)
10
+ INPUT:
11
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
configs/Misc/torchvision_imagenet_R_50.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ An example config file to train a ImageNet classifier with detectron2.
3
+ Model and dataloader both come from torchvision.
4
+ This shows how to use detectron2 as a general engine for any new models and tasks.
5
+
6
+ To run, use the following command:
7
+
8
+ python tools/lazyconfig_train_net.py --config-file configs/Misc/torchvision_imagenet_R_50.py \
9
+ --num-gpus 8 dataloader.train.dataset.root=/path/to/imagenet/
10
+
11
+ """
12
+
13
+
14
+ import torch
15
+ from torch import nn
16
+ from torch.nn import functional as F
17
+ from omegaconf import OmegaConf
18
+ import torchvision
19
+ from torchvision.transforms import transforms as T
20
+ from torchvision.models.resnet import ResNet, Bottleneck
21
+ from fvcore.common.param_scheduler import MultiStepParamScheduler
22
+
23
+ from detectron2.solver import WarmupParamScheduler
24
+ from detectron2.solver.build import get_default_optimizer_params
25
+ from detectron2.config import LazyCall as L
26
+ from detectron2.model_zoo import get_config
27
+ from detectron2.data.samplers import TrainingSampler, InferenceSampler
28
+ from detectron2.evaluation import DatasetEvaluator
29
+ from detectron2.utils import comm
30
+
31
+
32
+ """
33
+ Note: Here we put reusable code (models, evaluation, data) together with configs just as a
34
+ proof-of-concept, to easily demonstrate what's needed to train a ImageNet classifier in detectron2.
35
+ Writing code in configs offers extreme flexibility but is often not a good engineering practice.
36
+ In practice, you might want to put code in your project and import them instead.
37
+ """
38
+
39
+
40
+ def build_data_loader(dataset, batch_size, num_workers, training=True):
41
+ return torch.utils.data.DataLoader(
42
+ dataset,
43
+ sampler=(TrainingSampler if training else InferenceSampler)(len(dataset)),
44
+ batch_size=batch_size,
45
+ num_workers=num_workers,
46
+ pin_memory=True,
47
+ )
48
+
49
+
50
+ class ClassificationNet(nn.Module):
51
+ def __init__(self, model: nn.Module):
52
+ super().__init__()
53
+ self.model = model
54
+
55
+ @property
56
+ def device(self):
57
+ return list(self.model.parameters())[0].device
58
+
59
+ def forward(self, inputs):
60
+ image, label = inputs
61
+ pred = self.model(image.to(self.device))
62
+ if self.training:
63
+ label = label.to(self.device)
64
+ return F.cross_entropy(pred, label)
65
+ else:
66
+ return pred
67
+
68
+
69
+ class ClassificationAcc(DatasetEvaluator):
70
+ def reset(self):
71
+ self.corr = self.total = 0
72
+
73
+ def process(self, inputs, outputs):
74
+ image, label = inputs
75
+ self.corr += (outputs.argmax(dim=1).cpu() == label.cpu()).sum().item()
76
+ self.total += len(label)
77
+
78
+ def evaluate(self):
79
+ all_corr_total = comm.all_gather([self.corr, self.total])
80
+ corr = sum(x[0] for x in all_corr_total)
81
+ total = sum(x[1] for x in all_corr_total)
82
+ return {"accuracy": corr / total}
83
+
84
+
85
+ # --- End of code that could be in a project and be imported
86
+
87
+
88
+ dataloader = OmegaConf.create()
89
+ dataloader.train = L(build_data_loader)(
90
+ dataset=L(torchvision.datasets.ImageNet)(
91
+ root="/path/to/imagenet",
92
+ split="train",
93
+ transform=L(T.Compose)(
94
+ transforms=[
95
+ L(T.RandomResizedCrop)(size=224),
96
+ L(T.RandomHorizontalFlip)(),
97
+ T.ToTensor(),
98
+ L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
99
+ ]
100
+ ),
101
+ ),
102
+ batch_size=256 // 8,
103
+ num_workers=4,
104
+ training=True,
105
+ )
106
+
107
+ dataloader.test = L(build_data_loader)(
108
+ dataset=L(torchvision.datasets.ImageNet)(
109
+ root="${...train.dataset.root}",
110
+ split="val",
111
+ transform=L(T.Compose)(
112
+ transforms=[
113
+ L(T.Resize)(size=256),
114
+ L(T.CenterCrop)(size=224),
115
+ T.ToTensor(),
116
+ L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
117
+ ]
118
+ ),
119
+ ),
120
+ batch_size=256 // 8,
121
+ num_workers=4,
122
+ training=False,
123
+ )
124
+
125
+ dataloader.evaluator = L(ClassificationAcc)()
126
+
127
+ model = L(ClassificationNet)(
128
+ model=(ResNet)(block=Bottleneck, layers=[3, 4, 6, 3], zero_init_residual=True)
129
+ )
130
+
131
+
132
+ optimizer = L(torch.optim.SGD)(
133
+ params=L(get_default_optimizer_params)(),
134
+ lr=0.1,
135
+ momentum=0.9,
136
+ weight_decay=1e-4,
137
+ )
138
+
139
+ lr_multiplier = L(WarmupParamScheduler)(
140
+ scheduler=L(MultiStepParamScheduler)(
141
+ values=[1.0, 0.1, 0.01, 0.001], milestones=[30, 60, 90, 100]
142
+ ),
143
+ warmup_length=1 / 100,
144
+ warmup_factor=0.1,
145
+ )
146
+
147
+
148
+ train = get_config("common/train.py").train
149
+ train.init_checkpoint = None
150
+ train.max_iter = 100 * 1281167 // 256
configs/common/README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ This directory provides definitions for a few common models, dataloaders, scheduler,
2
+ and optimizers that are often used in training.
3
+ The definition of these objects are provided in the form of lazy instantiation:
4
+ their arguments can be edited by users before constructing the objects.
5
+
6
+ They can be imported, or loaded by `model_zoo.get_config` API in users' own configs.
configs/common/coco_schedule.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fvcore.common.param_scheduler import MultiStepParamScheduler
2
+
3
+ from detectron2.config import LazyCall as L
4
+ from detectron2.solver import WarmupParamScheduler
5
+
6
+
7
+ def default_X_scheduler(num_X):
8
+ """
9
+ Returns the config for a default multi-step LR scheduler such as "1x", "3x",
10
+ commonly referred to in papers, where every 1x has the total length of 1440k
11
+ training images (~12 COCO epochs). LR is decayed twice at the end of training
12
+ following the strategy defined in "Rethinking ImageNet Pretraining", Sec 4.
13
+
14
+ Args:
15
+ num_X: a positive real number
16
+
17
+ Returns:
18
+ DictConfig: configs that define the multiplier for LR during training
19
+ """
20
+ # total number of iterations assuming 16 batch size, using 1440000/16=90000
21
+ total_steps_16bs = num_X * 90000
22
+
23
+ if num_X <= 2:
24
+ scheduler = L(MultiStepParamScheduler)(
25
+ values=[1.0, 0.1, 0.01],
26
+ # note that scheduler is scale-invariant. This is equivalent to
27
+ # milestones=[6, 8, 9]
28
+ milestones=[60000, 80000, 90000],
29
+ )
30
+ else:
31
+ scheduler = L(MultiStepParamScheduler)(
32
+ values=[1.0, 0.1, 0.01],
33
+ milestones=[total_steps_16bs - 60000, total_steps_16bs - 20000, total_steps_16bs],
34
+ )
35
+ return L(WarmupParamScheduler)(
36
+ scheduler=scheduler,
37
+ warmup_length=1000 / total_steps_16bs,
38
+ warmup_method="linear",
39
+ warmup_factor=0.001,
40
+ )
41
+
42
+
43
+ lr_multiplier_1x = default_X_scheduler(1)
44
+ lr_multiplier_2x = default_X_scheduler(2)
45
+ lr_multiplier_3x = default_X_scheduler(3)
46
+ lr_multiplier_6x = default_X_scheduler(6)
47
+ lr_multiplier_9x = default_X_scheduler(9)
configs/common/data/coco.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from omegaconf import OmegaConf
2
+
3
+ import detectron2.data.transforms as T
4
+ from detectron2.config import LazyCall as L
5
+ from detectron2.data import (
6
+ DatasetMapper,
7
+ build_detection_test_loader,
8
+ build_detection_train_loader,
9
+ get_detection_dataset_dicts,
10
+ )
11
+ from detectron2.evaluation import COCOEvaluator
12
+
13
+ dataloader = OmegaConf.create()
14
+
15
+ dataloader.train = L(build_detection_train_loader)(
16
+ dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"),
17
+ mapper=L(DatasetMapper)(
18
+ is_train=True,
19
+ augmentations=[
20
+ L(T.ResizeShortestEdge)(
21
+ short_edge_length=(640, 672, 704, 736, 768, 800),
22
+ sample_style="choice",
23
+ max_size=1333,
24
+ ),
25
+ L(T.RandomFlip)(horizontal=True),
26
+ ],
27
+ image_format="BGR",
28
+ use_instance_mask=True,
29
+ ),
30
+ total_batch_size=16,
31
+ num_workers=4,
32
+ )
33
+
34
+ dataloader.test = L(build_detection_test_loader)(
35
+ dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False),
36
+ mapper=L(DatasetMapper)(
37
+ is_train=False,
38
+ augmentations=[
39
+ L(T.ResizeShortestEdge)(short_edge_length=800, max_size=1333),
40
+ ],
41
+ image_format="${...train.mapper.image_format}",
42
+ ),
43
+ num_workers=4,
44
+ )
45
+
46
+ dataloader.evaluator = L(COCOEvaluator)(
47
+ dataset_name="${..test.dataset.names}",
48
+ )
configs/common/data/coco_keypoint.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from detectron2.data.detection_utils import create_keypoint_hflip_indices
2
+
3
+ from .coco import dataloader
4
+
5
+ dataloader.train.dataset.min_keypoints = 1
6
+ dataloader.train.dataset.names = "keypoints_coco_2017_train"
7
+ dataloader.test.dataset.names = "keypoints_coco_2017_val"
8
+
9
+ dataloader.train.mapper.update(
10
+ use_instance_mask=False,
11
+ use_keypoint=True,
12
+ keypoint_hflip_indices=create_keypoint_hflip_indices(dataloader.train.dataset.names),
13
+ )
configs/common/data/coco_panoptic_separated.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from detectron2.config import LazyCall as L
2
+ from detectron2.evaluation import (
3
+ COCOEvaluator,
4
+ COCOPanopticEvaluator,
5
+ DatasetEvaluators,
6
+ SemSegEvaluator,
7
+ )
8
+
9
+ from .coco import dataloader
10
+
11
+ dataloader.train.dataset.names = "coco_2017_train_panoptic_separated"
12
+ dataloader.train.dataset.filter_empty = False
13
+ dataloader.test.dataset.names = "coco_2017_val_panoptic_separated"
14
+
15
+
16
+ dataloader.evaluator = [
17
+ L(COCOEvaluator)(
18
+ dataset_name="${...test.dataset.names}",
19
+ ),
20
+ L(SemSegEvaluator)(
21
+ dataset_name="${...test.dataset.names}",
22
+ ),
23
+ L(COCOPanopticEvaluator)(
24
+ dataset_name="${...test.dataset.names}",
25
+ ),
26
+ ]
configs/common/models/cascade_rcnn.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from detectron2.config import LazyCall as L
2
+ from detectron2.layers import ShapeSpec
3
+ from detectron2.modeling.box_regression import Box2BoxTransform
4
+ from detectron2.modeling.matcher import Matcher
5
+ from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads
6
+
7
+ from .mask_rcnn_fpn import model
8
+
9
+ # arguments that don't exist for Cascade R-CNN
10
+ [model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]]
11
+
12
+ model.roi_heads.update(
13
+ _target_=CascadeROIHeads,
14
+ box_heads=[
15
+ L(FastRCNNConvFCHead)(
16
+ input_shape=ShapeSpec(channels=256, height=7, width=7),
17
+ conv_dims=[],
18
+ fc_dims=[1024, 1024],
19
+ )
20
+ for k in range(3)
21
+ ],
22
+ box_predictors=[
23
+ L(FastRCNNOutputLayers)(
24
+ input_shape=ShapeSpec(channels=1024),
25
+ test_score_thresh=0.05,
26
+ box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)),
27
+ cls_agnostic_bbox_reg=True,
28
+ num_classes="${...num_classes}",
29
+ )
30
+ for (w1, w2) in [(10, 5), (20, 10), (30, 15)]
31
+ ],
32
+ proposal_matchers=[
33
+ L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False)
34
+ for th in [0.5, 0.6, 0.7]
35
+ ],
36
+ )
configs/common/models/keypoint_rcnn_fpn.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from detectron2.config import LazyCall as L
2
+ from detectron2.layers import ShapeSpec
3
+ from detectron2.modeling.poolers import ROIPooler
4
+ from detectron2.modeling.roi_heads import KRCNNConvDeconvUpsampleHead
5
+
6
+ from .mask_rcnn_fpn import model
7
+
8
+ [model.roi_heads.pop(x) for x in ["mask_in_features", "mask_pooler", "mask_head"]]
9
+
10
+ model.roi_heads.update(
11
+ num_classes=1,
12
+ keypoint_in_features=["p2", "p3", "p4", "p5"],
13
+ keypoint_pooler=L(ROIPooler)(
14
+ output_size=14,
15
+ scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
16
+ sampling_ratio=0,
17
+ pooler_type="ROIAlignV2",
18
+ ),
19
+ keypoint_head=L(KRCNNConvDeconvUpsampleHead)(
20
+ input_shape=ShapeSpec(channels=256, width=14, height=14),
21
+ num_keypoints=17,
22
+ conv_dims=[512] * 8,
23
+ loss_normalizer="visible",
24
+ ),
25
+ )
26
+
27
+ # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
28
+ # 1000 proposals per-image is found to hurt box AP.
29
+ # Therefore we increase it to 1500 per-image.
30
+ model.proposal_generator.post_nms_topk = (1500, 1000)
31
+
32
+ # Keypoint AP degrades (though box AP improves) when using plain L1 loss
33
+ model.roi_heads.box_predictor.smooth_l1_beta = 0.5
configs/common/models/mask_rcnn_c4.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from detectron2.config import LazyCall as L
2
+ from detectron2.layers import ShapeSpec
3
+ from detectron2.modeling.meta_arch import GeneralizedRCNN
4
+ from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
5
+ from detectron2.modeling.backbone import BasicStem, BottleneckBlock, ResNet
6
+ from detectron2.modeling.box_regression import Box2BoxTransform
7
+ from detectron2.modeling.matcher import Matcher
8
+ from detectron2.modeling.poolers import ROIPooler
9
+ from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
10
+ from detectron2.modeling.roi_heads import (
11
+ FastRCNNOutputLayers,
12
+ MaskRCNNConvUpsampleHead,
13
+ Res5ROIHeads,
14
+ )
15
+
16
+ model = L(GeneralizedRCNN)(
17
+ backbone=L(ResNet)(
18
+ stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
19
+ stages=L(ResNet.make_default_stages)(
20
+ depth=50,
21
+ stride_in_1x1=True,
22
+ norm="FrozenBN",
23
+ ),
24
+ out_features=["res4"],
25
+ ),
26
+ proposal_generator=L(RPN)(
27
+ in_features=["res4"],
28
+ head=L(StandardRPNHead)(in_channels=1024, num_anchors=15),
29
+ anchor_generator=L(DefaultAnchorGenerator)(
30
+ sizes=[[32, 64, 128, 256, 512]],
31
+ aspect_ratios=[0.5, 1.0, 2.0],
32
+ strides=[16],
33
+ offset=0.0,
34
+ ),
35
+ anchor_matcher=L(Matcher)(
36
+ thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
37
+ ),
38
+ box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
39
+ batch_size_per_image=256,
40
+ positive_fraction=0.5,
41
+ pre_nms_topk=(12000, 6000),
42
+ post_nms_topk=(2000, 1000),
43
+ nms_thresh=0.7,
44
+ ),
45
+ roi_heads=L(Res5ROIHeads)(
46
+ num_classes=80,
47
+ batch_size_per_image=512,
48
+ positive_fraction=0.25,
49
+ proposal_matcher=L(Matcher)(
50
+ thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
51
+ ),
52
+ in_features=["res4"],
53
+ pooler=L(ROIPooler)(
54
+ output_size=14,
55
+ scales=(1.0 / 16,),
56
+ sampling_ratio=0,
57
+ pooler_type="ROIAlignV2",
58
+ ),
59
+ res5=L(ResNet.make_stage)(
60
+ block_class=BottleneckBlock,
61
+ num_blocks=3,
62
+ stride_per_block=[2, 1, 1],
63
+ in_channels=1024,
64
+ bottleneck_channels=512,
65
+ out_channels=2048,
66
+ norm="FrozenBN",
67
+ stride_in_1x1=True,
68
+ ),
69
+ box_predictor=L(FastRCNNOutputLayers)(
70
+ input_shape=L(ShapeSpec)(channels="${...res5.out_channels}", height=1, width=1),
71
+ test_score_thresh=0.05,
72
+ box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
73
+ num_classes="${..num_classes}",
74
+ ),
75
+ mask_head=L(MaskRCNNConvUpsampleHead)(
76
+ input_shape=L(ShapeSpec)(
77
+ channels="${...res5.out_channels}",
78
+ width="${...pooler.output_size}",
79
+ height="${...pooler.output_size}",
80
+ ),
81
+ num_classes="${..num_classes}",
82
+ conv_dims=[256],
83
+ ),
84
+ ),
85
+ pixel_mean=[103.530, 116.280, 123.675],
86
+ pixel_std=[1.0, 1.0, 1.0],
87
+ input_format="BGR",
88
+ )
configs/common/models/mask_rcnn_fpn.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from detectron2.config import LazyCall as L
2
+ from detectron2.layers import ShapeSpec
3
+ from detectron2.modeling.meta_arch import GeneralizedRCNN
4
+ from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
5
+ from detectron2.modeling.backbone.fpn import LastLevelMaxPool
6
+ from detectron2.modeling.backbone import BasicStem, FPN, ResNet
7
+ from detectron2.modeling.box_regression import Box2BoxTransform
8
+ from detectron2.modeling.matcher import Matcher
9
+ from detectron2.modeling.poolers import ROIPooler
10
+ from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
11
+ from detectron2.modeling.roi_heads import (
12
+ StandardROIHeads,
13
+ FastRCNNOutputLayers,
14
+ MaskRCNNConvUpsampleHead,
15
+ FastRCNNConvFCHead,
16
+ )
17
+
18
+ model = L(GeneralizedRCNN)(
19
+ backbone=L(FPN)(
20
+ bottom_up=L(ResNet)(
21
+ stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
22
+ stages=L(ResNet.make_default_stages)(
23
+ depth=50,
24
+ stride_in_1x1=True,
25
+ norm="FrozenBN",
26
+ ),
27
+ out_features=["res2", "res3", "res4", "res5"],
28
+ ),
29
+ in_features="${.bottom_up.out_features}",
30
+ out_channels=256,
31
+ top_block=L(LastLevelMaxPool)(),
32
+ ),
33
+ proposal_generator=L(RPN)(
34
+ in_features=["p2", "p3", "p4", "p5", "p6"],
35
+ head=L(StandardRPNHead)(in_channels=256, num_anchors=3),
36
+ anchor_generator=L(DefaultAnchorGenerator)(
37
+ sizes=[[32], [64], [128], [256], [512]],
38
+ aspect_ratios=[0.5, 1.0, 2.0],
39
+ strides=[4, 8, 16, 32, 64],
40
+ offset=0.0,
41
+ ),
42
+ anchor_matcher=L(Matcher)(
43
+ thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
44
+ ),
45
+ box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
46
+ batch_size_per_image=256,
47
+ positive_fraction=0.5,
48
+ pre_nms_topk=(2000, 1000),
49
+ post_nms_topk=(1000, 1000),
50
+ nms_thresh=0.7,
51
+ ),
52
+ roi_heads=L(StandardROIHeads)(
53
+ num_classes=80,
54
+ batch_size_per_image=512,
55
+ positive_fraction=0.25,
56
+ proposal_matcher=L(Matcher)(
57
+ thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
58
+ ),
59
+ box_in_features=["p2", "p3", "p4", "p5"],
60
+ box_pooler=L(ROIPooler)(
61
+ output_size=7,
62
+ scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
63
+ sampling_ratio=0,
64
+ pooler_type="ROIAlignV2",
65
+ ),
66
+ box_head=L(FastRCNNConvFCHead)(
67
+ input_shape=ShapeSpec(channels=256, height=7, width=7),
68
+ conv_dims=[],
69
+ fc_dims=[1024, 1024],
70
+ ),
71
+ box_predictor=L(FastRCNNOutputLayers)(
72
+ input_shape=ShapeSpec(channels=1024),
73
+ test_score_thresh=0.05,
74
+ box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
75
+ num_classes="${..num_classes}",
76
+ ),
77
+ mask_in_features=["p2", "p3", "p4", "p5"],
78
+ mask_pooler=L(ROIPooler)(
79
+ output_size=14, # ori is 14
80
+ scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
81
+ sampling_ratio=0,
82
+ pooler_type="ROIAlignV2",
83
+ ),
84
+ mask_head=L(MaskRCNNConvUpsampleHead)(
85
+ input_shape=ShapeSpec(channels=256, width=14, height=14),
86
+ num_classes="${..num_classes}",
87
+ conv_dims=[256, 256, 256, 256, 256],
88
+ ),
89
+ ),
90
+ pixel_mean=[103.530, 116.280, 123.675],
91
+ pixel_std=[1.0, 1.0, 1.0],
92
+ input_format="BGR",
93
+ )
configs/common/models/panoptic_fpn.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from detectron2.config import LazyCall as L
2
+ from detectron2.layers import ShapeSpec
3
+ from detectron2.modeling import PanopticFPN
4
+ from detectron2.modeling.meta_arch.semantic_seg import SemSegFPNHead
5
+
6
+ from .mask_rcnn_fpn import model
7
+
8
+ model._target_ = PanopticFPN
9
+ model.sem_seg_head = L(SemSegFPNHead)(
10
+ input_shape={
11
+ f: L(ShapeSpec)(stride=s, channels="${....backbone.out_channels}")
12
+ for f, s in zip(["p2", "p3", "p4", "p5"], [4, 8, 16, 32])
13
+ },
14
+ ignore_value=255,
15
+ num_classes=54, # COCO stuff + 1
16
+ conv_dims=128,
17
+ common_stride=4,
18
+ loss_weight=0.5,
19
+ norm="GN",
20
+ )
configs/common/models/retinanet.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from detectron2.config import LazyCall as L
4
+ from detectron2.layers import ShapeSpec
5
+ from detectron2.modeling.meta_arch import RetinaNet
6
+ from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
7
+ from detectron2.modeling.backbone.fpn import LastLevelP6P7
8
+ from detectron2.modeling.backbone import BasicStem, FPN, ResNet
9
+ from detectron2.modeling.box_regression import Box2BoxTransform
10
+ from detectron2.modeling.matcher import Matcher
11
+ from detectron2.modeling.meta_arch.retinanet import RetinaNetHead
12
+
13
+ model = L(RetinaNet)(
14
+ backbone=L(FPN)(
15
+ bottom_up=L(ResNet)(
16
+ stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
17
+ stages=L(ResNet.make_default_stages)(
18
+ depth=50,
19
+ stride_in_1x1=True,
20
+ norm="FrozenBN",
21
+ ),
22
+ out_features=["res3", "res4", "res5"],
23
+ ),
24
+ in_features=["res3", "res4", "res5"],
25
+ out_channels=256,
26
+ top_block=L(LastLevelP6P7)(in_channels=2048, out_channels="${..out_channels}"),
27
+ ),
28
+ head=L(RetinaNetHead)(
29
+ input_shape=[ShapeSpec(channels=256)],
30
+ num_classes="${..num_classes}",
31
+ conv_dims=[256, 256, 256, 256],
32
+ prior_prob=0.01,
33
+ num_anchors=9,
34
+ ),
35
+ anchor_generator=L(DefaultAnchorGenerator)(
36
+ sizes=[[x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)] for x in [32, 64, 128, 256, 512]],
37
+ aspect_ratios=[0.5, 1.0, 2.0],
38
+ strides=[8, 16, 32, 64, 128],
39
+ offset=0.0,
40
+ ),
41
+ box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
42
+ anchor_matcher=L(Matcher)(
43
+ thresholds=[0.4, 0.5], labels=[0, -1, 1], allow_low_quality_matches=True
44
+ ),
45
+ num_classes=80,
46
+ head_in_features=["p3", "p4", "p5", "p6", "p7"],
47
+ focal_loss_alpha=0.25,
48
+ focal_loss_gamma=2.0,
49
+ pixel_mean=[103.530, 116.280, 123.675],
50
+ pixel_std=[1.0, 1.0, 1.0],
51
+ input_format="BGR",
52
+ )
configs/common/optim.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from detectron2.config import LazyCall as L
4
+ from detectron2.solver.build import get_default_optimizer_params
5
+
6
+ SGD = L(torch.optim.SGD)(
7
+ params=L(get_default_optimizer_params)(
8
+ # params.model is meant to be set to the model object, before instantiating
9
+ # the optimizer.
10
+ weight_decay_norm=0.0
11
+ ),
12
+ lr=0.02,
13
+ momentum=0.9,
14
+ weight_decay=1e-4,
15
+ )
configs/common/train.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Common training-related configs that are designed for "tools/lazyconfig_train_net.py"
2
+ # You can use your own instead, together with your own train_net.py
3
+ train = dict(
4
+ output_dir="./output",
5
+ init_checkpoint="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
6
+ max_iter=90000,
7
+ amp=dict(enabled=False), # options for Automatic Mixed Precision
8
+ ddp=dict( # options for DistributedDataParallel
9
+ broadcast_buffers=False,
10
+ find_unused_parameters=False,
11
+ fp16_compression=False,
12
+ ),
13
+ checkpointer=dict(period=5000, max_to_keep=100), # options for PeriodicCheckpointer
14
+ eval_period=5000,
15
+ log_period=20,
16
+ device="cuda"
17
+ # ...
18
+ )
configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from .mask_rcnn_R_50_FPN_100ep_LSJ import (
2
+ dataloader,
3
+ lr_multiplier,
4
+ model,
5
+ optimizer,
6
+ train,
7
+ )
8
+
9
+ model.backbone.bottom_up.stages.depth = 101
configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .mask_rcnn_R_101_FPN_100ep_LSJ import (
2
+ dataloader,
3
+ lr_multiplier,
4
+ model,
5
+ optimizer,
6
+ train,
7
+ )
8
+
9
+ train.max_iter *= 2 # 100ep -> 200ep
10
+
11
+ lr_multiplier.scheduler.milestones = [
12
+ milestone * 2 for milestone in lr_multiplier.scheduler.milestones
13
+ ]
14
+ lr_multiplier.scheduler.num_updates = train.max_iter