Spaces:

CVPR
/

transfiner

App Files Files Community

lkeab commited on Jun 26, 2022

Commit

a13a033

•

1 Parent(s): cbe011a

update

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +5 -5
app.py +84 -0
configs/Base-RCNN-C4.yaml +18 -0
configs/Base-RCNN-DilatedC5.yaml +31 -0
configs/Base-RCNN-FPN.yaml +43 -0
configs/Base-RetinaNet.yaml +25 -0
configs/Cityscapes/mask_rcnn_R_50_FPN.yaml +27 -0
configs/Cityscapes/mask_rcnn_R_50_FPN_4gpu.yaml +27 -0
configs/Detectron1-Comparisons/README.md +84 -0
configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml +17 -0
configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml +27 -0
configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml +20 -0
configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml +19 -0
configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x_finetune.yaml +19 -0
configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml +19 -0
configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml +23 -0
configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml +22 -0
configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml +22 -0
configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml +26 -0
configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml +12 -0
configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml +15 -0
configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml +36 -0
configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml +10 -0
configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml +8 -0
configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml +11 -0
configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5_4gpu.yaml +11 -0
configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml +21 -0
configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml +24 -0
configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py +151 -0
configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml +26 -0
configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml +13 -0
configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml +19 -0
configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml +19 -0
configs/Misc/semantic_R_50_FPN_1x.yaml +11 -0
configs/Misc/torchvision_imagenet_R_50.py +150 -0
configs/common/README.md +6 -0
configs/common/coco_schedule.py +47 -0
configs/common/data/coco.py +48 -0
configs/common/data/coco_keypoint.py +13 -0
configs/common/data/coco_panoptic_separated.py +26 -0
configs/common/models/cascade_rcnn.py +36 -0
configs/common/models/keypoint_rcnn_fpn.py +33 -0
configs/common/models/mask_rcnn_c4.py +88 -0
configs/common/models/mask_rcnn_fpn.py +93 -0
configs/common/models/panoptic_fpn.py +20 -0
configs/common/models/retinanet.py +52 -0
configs/common/optim.py +15 -0
configs/common/train.py +18 -0
configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py +9 -0
configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py +14 -0

README.md CHANGED Viewed

@@ -1,13 +1,13 @@
 ---
 title: Transfiner
-emoji: 🌍
-colorFrom: gray
-colorTo: gray
 sdk: gradio
-sdk_version: 3.0.20
 app_file: app.py
 pinned: false
 license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Transfiner
+emoji: 📊
+colorFrom: red
+colorTo: green
 sdk: gradio
+sdk_version: 2.9.3
 app_file: app.py
 pinned: false
 license: apache-2.0
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference

app.py ADDED Viewed

	@@ -0,0 +1,84 @@

+#try:
+#    import detectron2
+#except:
+import os
+os.system('pip install git+https://github.com/SysCV/transfiner.git')
+from matplotlib.pyplot import axis
+import gradio as gr
+import requests
+import numpy as np
+from torch import nn
+import requests
+import torch
+from detectron2 import model_zoo
+from detectron2.engine import DefaultPredictor
+from detectron2.config import get_cfg
+from detectron2.utils.visualizer import Visualizer
+from detectron2.data import MetadataCatalog
+model_name='./configs/transfiner/mask_rcnn_R_101_FPN_3x_deform.yaml'
+cfg = get_cfg()
+# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
+cfg.merge_from_file(model_name)
+cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
+cfg.VIS_PERIOD = 100
+# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as w ell
+#cfg.MODEL.WEIGHTS = './output_3x_transfiner_r50.pth'
+cfg.MODEL.WEIGHTS = './output_3x_transfiner_r101_deform.pth'
+if not torch.cuda.is_available():
+    cfg.MODEL.DEVICE='cpu'
+predictor = DefaultPredictor(cfg)
+def inference(image):
+    width, height = image.size
+    if width > 1300:
+        ratio = float(height) / float(width)
+        width = 1300
+        height = int(ratio * width)
+        image = image.resize((width, height))
+    img = np.asarray(image)
+    #img = np.array(image)
+    outputs = predictor(img)
+    v = Visualizer(img, MetadataCatalog.get(cfg.DATASETS.TRAIN[0]))
+    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
+    return out.get_image()
+title = "Mask Transfiner [CVPR, 2022]"
+description = "Demo for <a target='_blank' href='https://arxiv.org/abs/2111.13673'>Mask Transfiner for High-Quality Instance Segmentation, CVPR 2022</a> based on R50-FPN. To use it, simply upload your image, or click one of the examples to load them. Note that it runs in the <b>CPU environment</b> provided by Hugging Face so the processing speed may be slow."
+article = "<p style='text-align: center'><a target='_blank' href='https://arxiv.org/abs/2111.13673'>Mask Transfiner for High-Quality Instance Segmentation, CVPR 2022</a> | <a target='_blank' href='https://github.com/SysCV/transfiner'>Mask Transfiner Github Code</a></p>"
+gr.Interface(
+    inference,
+    [gr.inputs.Image(type="pil", label="Input")],
+    gr.outputs.Image(type="numpy", label="Output"),
+    title=title,
+    description=description,
+    article=article,
+    examples=[
+            ["demo/sample_imgs/000000131444.jpg"],
+            ["demo/sample_imgs/000000157365.jpg"],
+            ["demo/sample_imgs/000000176037.jpg"],
+            ["demo/sample_imgs/000000018737.jpg"],
+            ["demo/sample_imgs/000000224200.jpg"],
+            ["demo/sample_imgs/000000558073.jpg"],
+            ["demo/sample_imgs/000000404922.jpg"],
+            ["demo/sample_imgs/000000252776.jpg"],
+            ["demo/sample_imgs/000000482477.jpg"],
+            ["demo/sample_imgs/000000344909.jpg"]
+        ]).launch()

configs/Base-RCNN-C4.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  RPN:
+    PRE_NMS_TOPK_TEST: 6000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "Res5ROIHeads"
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.02
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2

configs/Base-RCNN-DilatedC5.yaml ADDED Viewed

	@@ -0,0 +1,31 @@

+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  RESNETS:
+    OUT_FEATURES: ["res5"]
+    RES5_DILATION: 2
+  RPN:
+    IN_FEATURES: ["res5"]
+    PRE_NMS_TOPK_TEST: 6000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "StandardROIHeads"
+    IN_FEATURES: ["res5"]
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+  ROI_MASK_HEAD:
+    NAME: "MaskRCNNConvUpsampleHead"
+    NUM_CONV: 4
+    POOLER_RESOLUTION: 14
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.02
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2

configs/Base-RCNN-FPN.yaml ADDED Viewed

	@@ -0,0 +1,43 @@

+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  BACKBONE:
+    NAME: "build_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+  FPN:
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
+    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
+  RPN:
+    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
+    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
+    # Detectron1 uses 2000 proposals per-batch,
+    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+    POST_NMS_TOPK_TRAIN: 1000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "StandardROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+  ROI_MASK_HEAD:
+    NAME: "MaskRCNNConvUpsampleHead"
+    NUM_CONV: 4
+    POOLER_RESOLUTION: 14
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  #TEST: ("coco_2017_val",)
+  TEST: ("coco_2017_test-dev",)
+SOLVER:
+  IMS_PER_BATCH: 16 #16
+  BASE_LR: 0.02
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2

configs/Base-RetinaNet.yaml ADDED Viewed

	@@ -0,0 +1,25 @@

+MODEL:
+  META_ARCHITECTURE: "RetinaNet"
+  BACKBONE:
+    NAME: "build_retinanet_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
+  FPN:
+    IN_FEATURES: ["res3", "res4", "res5"]
+  RETINANET:
+    IOU_THRESHOLDS: [0.4, 0.5]
+    IOU_LABELS: [0, -1, 1]
+    SMOOTH_L1_LOSS_BETA: 0.0
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2

configs/Cityscapes/mask_rcnn_R_50_FPN.yaml ADDED Viewed

	@@ -0,0 +1,27 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  # For better, more stable performance initialize from COCO
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
+  MASK_ON: True
+  ROI_HEADS:
+    NUM_CLASSES: 8
+# This is similar to the setting used in Mask R-CNN paper, Appendix A
+# But there are some differences, e.g., we did not initialize the output
+# layer using the corresponding classes from COCO
+INPUT:
+  MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
+  MIN_SIZE_TRAIN_SAMPLING: "choice"
+  MIN_SIZE_TEST: 1024
+  MAX_SIZE_TRAIN: 2048
+  MAX_SIZE_TEST: 2048
+DATASETS:
+  TRAIN: ("cityscapes_fine_instance_seg_train",)
+  TEST: ("cityscapes_fine_instance_seg_val",)
+SOLVER:
+  BASE_LR: 0.01
+  STEPS: (18000,)
+  MAX_ITER: 24000
+  IMS_PER_BATCH: 8
+TEST:
+  EVAL_PERIOD: 8000

configs/Cityscapes/mask_rcnn_R_50_FPN_4gpu.yaml ADDED Viewed

	@@ -0,0 +1,27 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  # For better, more stable performance initialize from COCO
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
+  MASK_ON: True
+  ROI_HEADS:
+    NUM_CLASSES: 8
+# This is similar to the setting used in Mask R-CNN paper, Appendix A
+# But there are some differences, e.g., we did not initialize the output
+# layer using the corresponding classes from COCO
+INPUT:
+  MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
+  MIN_SIZE_TRAIN_SAMPLING: "choice"
+  MIN_SIZE_TEST: 1024
+  MAX_SIZE_TRAIN: 2048
+  MAX_SIZE_TEST: 2048
+DATASETS:
+  TRAIN: ("cityscapes_fine_instance_seg_train",)
+  TEST: ("cityscapes_fine_instance_seg_val",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (36000,)
+  MAX_ITER: 48000
+  IMS_PER_BATCH: 4
+TEST:
+  EVAL_PERIOD: 48000

configs/Detectron1-Comparisons/README.md ADDED Viewed

	@@ -0,0 +1,84 @@

+Detectron2 model zoo's experimental settings and a few implementation details are different from Detectron.
+The differences in implementation details are shared in
+[Compatibility with Other Libraries](../../docs/notes/compatibility.md).
+The differences in model zoo's experimental settings include:
+* Use scale augmentation during training. This improves AP with lower training cost.
+* Use L1 loss instead of smooth L1 loss for simplicity. This sometimes improves box AP but may
+  affect other AP.
+* Use `POOLER_SAMPLING_RATIO=0` instead of 2. This does not significantly affect AP.
+* Use `ROIAlignV2`. This does not significantly affect AP.
+In this directory, we provide a few configs that __do not__ have the above changes.
+They mimic Detectron's behavior as close as possible,
+and provide a fair comparison of accuracy and speed against Detectron.
+<!--
+./gen_html_table.py --config 'Detectron1-Comparisons/*.yaml' --name "Faster R-CNN" "Keypoint R-CNN" "Mask R-CNN" --fields lr_sched train_speed inference_speed mem box_AP mask_AP keypoint_AP --base-dir ../../../configs/Detectron1-Comparisons
+-->
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">lr<br/>sched</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">mask<br/>AP</th>
+<th valign="bottom">kp.<br/>AP</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: faster_rcnn_R_50_FPN_noaug_1x -->
+ <tr><td align="left"><a href="faster_rcnn_R_50_FPN_noaug_1x.yaml">Faster R-CNN</a></td>
+<td align="center">1x</td>
+<td align="center">0.219</td>
+<td align="center">0.038</td>
+<td align="center">3.1</td>
+<td align="center">36.9</td>
+<td align="center"></td>
+<td align="center"></td>
+<td align="center">137781054</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/model_final_7ab50c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: keypoint_rcnn_R_50_FPN_1x -->
+ <tr><td align="left"><a href="keypoint_rcnn_R_50_FPN_1x.yaml">Keypoint R-CNN</a></td>
+<td align="center">1x</td>
+<td align="center">0.313</td>
+<td align="center">0.071</td>
+<td align="center">5.0</td>
+<td align="center">53.1</td>
+<td align="center"></td>
+<td align="center">64.2</td>
+<td align="center">137781195</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/model_final_cce136.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_R_50_FPN_noaug_1x -->
+ <tr><td align="left"><a href="mask_rcnn_R_50_FPN_noaug_1x.yaml">Mask R-CNN</a></td>
+<td align="center">1x</td>
+<td align="center">0.273</td>
+<td align="center">0.043</td>
+<td align="center">3.4</td>
+<td align="center">37.8</td>
+<td align="center">34.9</td>
+<td align="center"></td>
+<td align="center">137781281</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/model_final_62ca52.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/metrics.json">metrics</a></td>
+</tr>
+</tbody></table>
+## Comparisons:
+* Faster R-CNN: Detectron's AP is 36.7, similar to ours.
+* Keypoint R-CNN: Detectron's AP is box 53.6, keypoint 64.2. Fixing a Detectron's
+  [bug](https://github.com/facebookresearch/Detectron/issues/459) lead to a drop in box AP, and can be
+	compensated back by some parameter tuning.
+* Mask R-CNN: Detectron's AP is box 37.7, mask 33.9. We're 1 AP better in mask AP, due to more correct implementation.
+  See [this article](https://ppwwyyxx.com/blog/2021/Where-are-Pixels/) for details.
+For speed comparison, see [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html).

configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml ADDED Viewed

	@@ -0,0 +1,17 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+  # Detectron1 uses smooth L1 loss with some magic beta values.
+  # The defaults are changed to L1 loss in Detectron2.
+  RPN:
+    SMOOTH_L1_BETA: 0.1111
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+INPUT:
+  # no scale augmentation
+  MIN_SIZE_TRAIN: (800, )

configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml ADDED Viewed

	@@ -0,0 +1,27 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  KEYPOINT_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 1
+  ROI_KEYPOINT_HEAD:
+    POOLER_RESOLUTION: 14
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  # Detectron1 uses smooth L1 loss with some magic beta values.
+  # The defaults are changed to L1 loss in Detectron2.
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  RPN:
+    SMOOTH_L1_BETA: 0.1111
+    # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2
+    # 1000 proposals per-image is found to hurt box AP.
+    # Therefore we increase it to 1500 per-image.
+    POST_NMS_TOPK_TRAIN: 1500
+DATASETS:
+  TRAIN: ("keypoints_coco_2017_train",)
+  TEST: ("keypoints_coco_2017_val",)

configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  # Detectron1 uses smooth L1 loss with some magic beta values.
+  # The defaults are changed to L1 loss in Detectron2.
+  RPN:
+    SMOOTH_L1_BETA: 0.1111
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  ROI_MASK_HEAD:
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+INPUT:
+  # no scale augmentation
+  MIN_SIZE_TRAIN: (800, )

configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001

configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x_finetune.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "./model_final_824ab5.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 150 #300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001

configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 150  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001

configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001

configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1203
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v1_train",)
+  TEST: ("lvis_v1_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001

configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 1203
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v1_train",)
+  TEST: ("lvis_v1_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001

configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1203
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v1_train",)
+  TEST: ("lvis_v1_val",)
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001

configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NAME: CascadeROIHeads
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000

configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NAME: CascadeROIHeads
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000

configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml ADDED Viewed

	@@ -0,0 +1,36 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  MASK_ON: True
+  WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 152
+    DEFORM_ON_PER_STAGE: [False, True, True, True]
+  ROI_HEADS:
+    NAME: "CascadeROIHeads"
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_CONV: 4
+    NUM_FC: 1
+    NORM: "GN"
+    CLS_AGNOSTIC_BBOX_REG: True
+  ROI_MASK_HEAD:
+    NUM_CONV: 8
+    NORM: "GN"
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+  IMS_PER_BATCH: 128
+  STEPS: (35000, 45000)
+  MAX_ITER: 50000
+  BASE_LR: 0.16
+INPUT:
+  MIN_SIZE_TRAIN: (640, 864)
+  MIN_SIZE_TRAIN_SAMPLING: "range"
+  MAX_SIZE_TRAIN: 1440
+  CROP:
+    ENABLED: True
+TEST:
+  EVAL_PERIOD: 2500

configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  ROI_MASK_HEAD:
+    CLS_AGNOSTIC_MASK: True

configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+    DEFORM_MODULATED: False

configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+    DEFORM_MODULATED: False
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000

configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5_4gpu.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+_BASE_: "../Base-RCNN-FPN-4gpu.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+    DEFORM_MODULATED: False
+SOLVER:
+  STEPS: (420000, 500000) # (210000, 250000)
+  MAX_ITER: 540000 # 270000

configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml ADDED Viewed

	@@ -0,0 +1,21 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    NORM: "GN"
+    STRIDE_IN_1X1: False
+  FPN:
+    NORM: "GN"
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_CONV: 4
+    NUM_FC: 1
+    NORM: "GN"
+  ROI_MASK_HEAD:
+    NORM: "GN"
+SOLVER:
+  # 3x schedule
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000

configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    NORM: "SyncBN"
+    STRIDE_IN_1X1: True
+  FPN:
+    NORM: "SyncBN"
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_CONV: 4
+    NUM_FC: 1
+    NORM: "SyncBN"
+  ROI_MASK_HEAD:
+    NORM: "SyncBN"
+SOLVER:
+  # 3x schedule
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
+TEST:
+  PRECISE_BN:
+    ENABLED: True

configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py ADDED Viewed

	@@ -0,0 +1,151 @@

+# An example config to train a mmdetection model using detectron2.
+from ..common.data.coco import dataloader
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.optim import SGD as optimizer
+from ..common.train import train
+from detectron2.modeling.mmdet_wrapper import MMDetDetector
+from detectron2.config import LazyCall as L
+model = L(MMDetDetector)(
+    detector=dict(
+        type="MaskRCNN",
+        pretrained="torchvision://resnet50",
+        backbone=dict(
+            type="ResNet",
+            depth=50,
+            num_stages=4,
+            out_indices=(0, 1, 2, 3),
+            frozen_stages=1,
+            norm_cfg=dict(type="BN", requires_grad=True),
+            norm_eval=True,
+            style="pytorch",
+        ),
+        neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5),
+        rpn_head=dict(
+            type="RPNHead",
+            in_channels=256,
+            feat_channels=256,
+            anchor_generator=dict(
+                type="AnchorGenerator",
+                scales=[8],
+                ratios=[0.5, 1.0, 2.0],
+                strides=[4, 8, 16, 32, 64],
+            ),
+            bbox_coder=dict(
+                type="DeltaXYWHBBoxCoder",
+                target_means=[0.0, 0.0, 0.0, 0.0],
+                target_stds=[1.0, 1.0, 1.0, 1.0],
+            ),
+            loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0),
+            loss_bbox=dict(type="L1Loss", loss_weight=1.0),
+        ),
+        roi_head=dict(
+            type="StandardRoIHead",
+            bbox_roi_extractor=dict(
+                type="SingleRoIExtractor",
+                roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0),
+                out_channels=256,
+                featmap_strides=[4, 8, 16, 32],
+            ),
+            bbox_head=dict(
+                type="Shared2FCBBoxHead",
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=80,
+                bbox_coder=dict(
+                    type="DeltaXYWHBBoxCoder",
+                    target_means=[0.0, 0.0, 0.0, 0.0],
+                    target_stds=[0.1, 0.1, 0.2, 0.2],
+                ),
+                reg_class_agnostic=False,
+                loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
+                loss_bbox=dict(type="L1Loss", loss_weight=1.0),
+            ),
+            mask_roi_extractor=dict(
+                type="SingleRoIExtractor",
+                roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0),
+                out_channels=256,
+                featmap_strides=[4, 8, 16, 32],
+            ),
+            mask_head=dict(
+                type="FCNMaskHead",
+                num_convs=4,
+                in_channels=256,
+                conv_out_channels=256,
+                num_classes=80,
+                loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0),
+            ),
+        ),
+        # model training and testing settings
+        train_cfg=dict(
+            rpn=dict(
+                assigner=dict(
+                    type="MaxIoUAssigner",
+                    pos_iou_thr=0.7,
+                    neg_iou_thr=0.3,
+                    min_pos_iou=0.3,
+                    match_low_quality=True,
+                    ignore_iof_thr=-1,
+                ),
+                sampler=dict(
+                    type="RandomSampler",
+                    num=256,
+                    pos_fraction=0.5,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=False,
+                ),
+                allowed_border=-1,
+                pos_weight=-1,
+                debug=False,
+            ),
+            rpn_proposal=dict(
+                nms_pre=2000,
+                max_per_img=1000,
+                nms=dict(type="nms", iou_threshold=0.7),
+                min_bbox_size=0,
+            ),
+            rcnn=dict(
+                assigner=dict(
+                    type="MaxIoUAssigner",
+                    pos_iou_thr=0.5,
+                    neg_iou_thr=0.5,
+                    min_pos_iou=0.5,
+                    match_low_quality=True,
+                    ignore_iof_thr=-1,
+                ),
+                sampler=dict(
+                    type="RandomSampler",
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True,
+                ),
+                mask_size=28,
+                pos_weight=-1,
+                debug=False,
+            ),
+        ),
+        test_cfg=dict(
+            rpn=dict(
+                nms_pre=1000,
+                max_per_img=1000,
+                nms=dict(type="nms", iou_threshold=0.7),
+                min_bbox_size=0,
+            ),
+            rcnn=dict(
+                score_thr=0.05,
+                nms=dict(type="nms", iou_threshold=0.5),
+                max_per_img=100,
+                mask_thr_binary=0.5,
+            ),
+        ),
+    ),
+    pixel_mean=[123.675, 116.280, 103.530],
+    pixel_std=[58.395, 57.120, 57.375],
+)
+dataloader.train.mapper.image_format = "RGB"  # torchvision pretrained model
+train.init_checkpoint = None  # pretrained model is loaded inside backbone

configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+# A large PanopticFPN for demo purposes.
+# Use GN on backbone to support semantic seg.
+# Use Cascade + Deform Conv to improve localization.
+_BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml"
+MODEL:
+  WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN"
+  RESNETS:
+    DEPTH: 101
+    NORM: "GN"
+    DEFORM_ON_PER_STAGE: [False, True, True, True]
+    STRIDE_IN_1X1: False
+  FPN:
+    NORM: "GN"
+  ROI_HEADS:
+    NAME: CascadeROIHeads
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  ROI_MASK_HEAD:
+    NORM: "GN"
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+  STEPS: (105000, 125000)
+  MAX_ITER: 135000
+  IMS_PER_BATCH: 32
+  BASE_LR: 0.04

configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
+MODEL:
+  # Train from random initialization.
+  WEIGHTS: ""
+  # It makes sense to divide by STD when training from scratch
+  # But it seems to make no difference on the results and C2's models didn't do this.
+  # So we keep things consistent with C2.
+  # PIXEL_STD: [57.375, 57.12, 58.395]
+  MASK_ON: True
+  BACKBONE:
+    FREEZE_AT: 0
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.

configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
+MODEL:
+  PIXEL_STD: [57.375, 57.12, 58.395]
+  WEIGHTS: ""
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False
+  BACKBONE:
+    FREEZE_AT: 0
+SOLVER:
+  # 9x schedule
+  IMS_PER_BATCH: 64  # 4x the standard
+  STEPS: (187500, 197500)  # last 60/4==15k and last 20/4==5k
+  MAX_ITER: 202500   # 90k * 9 / 4
+  BASE_LR: 0.08
+TEST:
+  EVAL_PERIOD: 2500
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.

configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+_BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml"
+MODEL:
+  PIXEL_STD: [57.375, 57.12, 58.395]
+  WEIGHTS: ""
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False
+  BACKBONE:
+    FREEZE_AT: 0
+SOLVER:
+  # 9x schedule
+  IMS_PER_BATCH: 64  # 4x the standard
+  STEPS: (187500, 197500)  # last 60/4==15k and last 20/4==5k
+  MAX_ITER: 202500   # 90k * 9 / 4
+  BASE_LR: 0.08
+TEST:
+  EVAL_PERIOD: 2500
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.

configs/Misc/semantic_R_50_FPN_1x.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "SemanticSegmentor"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+DATASETS:
+  TRAIN: ("coco_2017_train_panoptic_stuffonly",)
+  TEST: ("coco_2017_val_panoptic_stuffonly",)
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)

configs/Misc/torchvision_imagenet_R_50.py ADDED Viewed

	@@ -0,0 +1,150 @@

+"""
+An example config file to train a ImageNet classifier with detectron2.
+Model and dataloader both come from torchvision.
+This shows how to use detectron2 as a general engine for any new models and tasks.
+To run, use the following command:
+python tools/lazyconfig_train_net.py --config-file configs/Misc/torchvision_imagenet_R_50.py \
+    --num-gpus 8 dataloader.train.dataset.root=/path/to/imagenet/
+"""
+import torch
+from torch import nn
+from torch.nn import functional as F
+from omegaconf import OmegaConf
+import torchvision
+from torchvision.transforms import transforms as T
+from torchvision.models.resnet import ResNet, Bottleneck
+from fvcore.common.param_scheduler import MultiStepParamScheduler
+from detectron2.solver import WarmupParamScheduler
+from detectron2.solver.build import get_default_optimizer_params
+from detectron2.config import LazyCall as L
+from detectron2.model_zoo import get_config
+from detectron2.data.samplers import TrainingSampler, InferenceSampler
+from detectron2.evaluation import DatasetEvaluator
+from detectron2.utils import comm
+"""
+Note: Here we put reusable code (models, evaluation, data) together with configs just as a
+proof-of-concept, to easily demonstrate what's needed to train a ImageNet classifier in detectron2.
+Writing code in configs offers extreme flexibility but is often not a good engineering practice.
+In practice, you might want to put code in your project and import them instead.
+"""
+def build_data_loader(dataset, batch_size, num_workers, training=True):
+    return torch.utils.data.DataLoader(
+        dataset,
+        sampler=(TrainingSampler if training else InferenceSampler)(len(dataset)),
+        batch_size=batch_size,
+        num_workers=num_workers,
+        pin_memory=True,
+    )
+class ClassificationNet(nn.Module):
+    def __init__(self, model: nn.Module):
+        super().__init__()
+        self.model = model
+    @property
+    def device(self):
+        return list(self.model.parameters())[0].device
+    def forward(self, inputs):
+        image, label = inputs
+        pred = self.model(image.to(self.device))
+        if self.training:
+            label = label.to(self.device)
+            return F.cross_entropy(pred, label)
+        else:
+            return pred
+class ClassificationAcc(DatasetEvaluator):
+    def reset(self):
+        self.corr = self.total = 0
+    def process(self, inputs, outputs):
+        image, label = inputs
+        self.corr += (outputs.argmax(dim=1).cpu() == label.cpu()).sum().item()
+        self.total += len(label)
+    def evaluate(self):
+        all_corr_total = comm.all_gather([self.corr, self.total])
+        corr = sum(x[0] for x in all_corr_total)
+        total = sum(x[1] for x in all_corr_total)
+        return {"accuracy": corr / total}
+# --- End of code that could be in a project and be imported
+dataloader = OmegaConf.create()
+dataloader.train = L(build_data_loader)(
+    dataset=L(torchvision.datasets.ImageNet)(
+        root="/path/to/imagenet",
+        split="train",
+        transform=L(T.Compose)(
+            transforms=[
+                L(T.RandomResizedCrop)(size=224),
+                L(T.RandomHorizontalFlip)(),
+                T.ToTensor(),
+                L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+            ]
+        ),
+    ),
+    batch_size=256 // 8,
+    num_workers=4,
+    training=True,
+)
+dataloader.test = L(build_data_loader)(
+    dataset=L(torchvision.datasets.ImageNet)(
+        root="${...train.dataset.root}",
+        split="val",
+        transform=L(T.Compose)(
+            transforms=[
+                L(T.Resize)(size=256),
+                L(T.CenterCrop)(size=224),
+                T.ToTensor(),
+                L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+            ]
+        ),
+    ),
+    batch_size=256 // 8,
+    num_workers=4,
+    training=False,
+)
+dataloader.evaluator = L(ClassificationAcc)()
+model = L(ClassificationNet)(
+    model=(ResNet)(block=Bottleneck, layers=[3, 4, 6, 3], zero_init_residual=True)
+)
+optimizer = L(torch.optim.SGD)(
+    params=L(get_default_optimizer_params)(),
+    lr=0.1,
+    momentum=0.9,
+    weight_decay=1e-4,
+)
+lr_multiplier = L(WarmupParamScheduler)(
+    scheduler=L(MultiStepParamScheduler)(
+        values=[1.0, 0.1, 0.01, 0.001], milestones=[30, 60, 90, 100]
+    ),
+    warmup_length=1 / 100,
+    warmup_factor=0.1,
+)
+train = get_config("common/train.py").train
+train.init_checkpoint = None
+train.max_iter = 100 * 1281167 // 256

configs/common/README.md ADDED Viewed

	@@ -0,0 +1,6 @@

+This directory provides definitions for a few common models, dataloaders, scheduler,
+and optimizers that are often used in training.
+The definition of these objects are provided in the form of lazy instantiation:
+their arguments can be edited by users before constructing the objects.
+They can be imported, or loaded by `model_zoo.get_config` API in users' own configs.

configs/common/coco_schedule.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from fvcore.common.param_scheduler import MultiStepParamScheduler
+from detectron2.config import LazyCall as L
+from detectron2.solver import WarmupParamScheduler
+def default_X_scheduler(num_X):
+    """
+    Returns the config for a default multi-step LR scheduler such as "1x", "3x",
+    commonly referred to in papers, where every 1x has the total length of 1440k
+    training images (~12 COCO epochs). LR is decayed twice at the end of training
+    following the strategy defined in "Rethinking ImageNet Pretraining", Sec 4.
+    Args:
+        num_X: a positive real number
+    Returns:
+        DictConfig: configs that define the multiplier for LR during training
+    """
+    # total number of iterations assuming 16 batch size, using 1440000/16=90000
+    total_steps_16bs = num_X * 90000
+    if num_X <= 2:
+        scheduler = L(MultiStepParamScheduler)(
+            values=[1.0, 0.1, 0.01],
+            # note that scheduler is scale-invariant. This is equivalent to
+            # milestones=[6, 8, 9]
+            milestones=[60000, 80000, 90000],
+        )
+    else:
+        scheduler = L(MultiStepParamScheduler)(
+            values=[1.0, 0.1, 0.01],
+            milestones=[total_steps_16bs - 60000, total_steps_16bs - 20000, total_steps_16bs],
+        )
+    return L(WarmupParamScheduler)(
+        scheduler=scheduler,
+        warmup_length=1000 / total_steps_16bs,
+        warmup_method="linear",
+        warmup_factor=0.001,
+    )
+lr_multiplier_1x = default_X_scheduler(1)
+lr_multiplier_2x = default_X_scheduler(2)
+lr_multiplier_3x = default_X_scheduler(3)
+lr_multiplier_6x = default_X_scheduler(6)
+lr_multiplier_9x = default_X_scheduler(9)

configs/common/data/coco.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from omegaconf import OmegaConf
+import detectron2.data.transforms as T
+from detectron2.config import LazyCall as L
+from detectron2.data import (
+    DatasetMapper,
+    build_detection_test_loader,
+    build_detection_train_loader,
+    get_detection_dataset_dicts,
+)
+from detectron2.evaluation import COCOEvaluator
+dataloader = OmegaConf.create()
+dataloader.train = L(build_detection_train_loader)(
+    dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"),
+    mapper=L(DatasetMapper)(
+        is_train=True,
+        augmentations=[
+            L(T.ResizeShortestEdge)(
+                short_edge_length=(640, 672, 704, 736, 768, 800),
+                sample_style="choice",
+                max_size=1333,
+            ),
+            L(T.RandomFlip)(horizontal=True),
+        ],
+        image_format="BGR",
+        use_instance_mask=True,
+    ),
+    total_batch_size=16,
+    num_workers=4,
+)
+dataloader.test = L(build_detection_test_loader)(
+    dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False),
+    mapper=L(DatasetMapper)(
+        is_train=False,
+        augmentations=[
+            L(T.ResizeShortestEdge)(short_edge_length=800, max_size=1333),
+        ],
+        image_format="${...train.mapper.image_format}",
+    ),
+    num_workers=4,
+)
+dataloader.evaluator = L(COCOEvaluator)(
+    dataset_name="${..test.dataset.names}",
+)

configs/common/data/coco_keypoint.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from detectron2.data.detection_utils import create_keypoint_hflip_indices
+from .coco import dataloader
+dataloader.train.dataset.min_keypoints = 1
+dataloader.train.dataset.names = "keypoints_coco_2017_train"
+dataloader.test.dataset.names = "keypoints_coco_2017_val"
+dataloader.train.mapper.update(
+    use_instance_mask=False,
+    use_keypoint=True,
+    keypoint_hflip_indices=create_keypoint_hflip_indices(dataloader.train.dataset.names),
+)

configs/common/data/coco_panoptic_separated.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from detectron2.config import LazyCall as L
+from detectron2.evaluation import (
+    COCOEvaluator,
+    COCOPanopticEvaluator,
+    DatasetEvaluators,
+    SemSegEvaluator,
+)
+from .coco import dataloader
+dataloader.train.dataset.names = "coco_2017_train_panoptic_separated"
+dataloader.train.dataset.filter_empty = False
+dataloader.test.dataset.names = "coco_2017_val_panoptic_separated"
+dataloader.evaluator = [
+    L(COCOEvaluator)(
+        dataset_name="${...test.dataset.names}",
+    ),
+    L(SemSegEvaluator)(
+        dataset_name="${...test.dataset.names}",
+    ),
+    L(COCOPanopticEvaluator)(
+        dataset_name="${...test.dataset.names}",
+    ),
+]

configs/common/models/cascade_rcnn.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.matcher import Matcher
+from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads
+from .mask_rcnn_fpn import model
+# arguments that don't exist for Cascade R-CNN
+[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]]
+model.roi_heads.update(
+    _target_=CascadeROIHeads,
+    box_heads=[
+        L(FastRCNNConvFCHead)(
+            input_shape=ShapeSpec(channels=256, height=7, width=7),
+            conv_dims=[],
+            fc_dims=[1024, 1024],
+        )
+        for k in range(3)
+    ],
+    box_predictors=[
+        L(FastRCNNOutputLayers)(
+            input_shape=ShapeSpec(channels=1024),
+            test_score_thresh=0.05,
+            box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)),
+            cls_agnostic_bbox_reg=True,
+            num_classes="${...num_classes}",
+        )
+        for (w1, w2) in [(10, 5), (20, 10), (30, 15)]
+    ],
+    proposal_matchers=[
+        L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False)
+        for th in [0.5, 0.6, 0.7]
+    ],
+)

configs/common/models/keypoint_rcnn_fpn.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.modeling.roi_heads import KRCNNConvDeconvUpsampleHead
+from .mask_rcnn_fpn import model
+[model.roi_heads.pop(x) for x in ["mask_in_features", "mask_pooler", "mask_head"]]
+model.roi_heads.update(
+    num_classes=1,
+    keypoint_in_features=["p2", "p3", "p4", "p5"],
+    keypoint_pooler=L(ROIPooler)(
+        output_size=14,
+        scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
+        sampling_ratio=0,
+        pooler_type="ROIAlignV2",
+    ),
+    keypoint_head=L(KRCNNConvDeconvUpsampleHead)(
+        input_shape=ShapeSpec(channels=256, width=14, height=14),
+        num_keypoints=17,
+        conv_dims=[512] * 8,
+        loss_normalizer="visible",
+    ),
+)
+# Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
+# 1000 proposals per-image is found to hurt box AP.
+# Therefore we increase it to 1500 per-image.
+model.proposal_generator.post_nms_topk = (1500, 1000)
+# Keypoint AP degrades (though box AP improves) when using plain L1 loss
+model.roi_heads.box_predictor.smooth_l1_beta = 0.5

configs/common/models/mask_rcnn_c4.py ADDED Viewed

	@@ -0,0 +1,88 @@

+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.meta_arch import GeneralizedRCNN
+from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
+from detectron2.modeling.backbone import BasicStem, BottleneckBlock, ResNet
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.matcher import Matcher
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
+from detectron2.modeling.roi_heads import (
+    FastRCNNOutputLayers,
+    MaskRCNNConvUpsampleHead,
+    Res5ROIHeads,
+)
+model = L(GeneralizedRCNN)(
+    backbone=L(ResNet)(
+        stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
+        stages=L(ResNet.make_default_stages)(
+            depth=50,
+            stride_in_1x1=True,
+            norm="FrozenBN",
+        ),
+        out_features=["res4"],
+    ),
+    proposal_generator=L(RPN)(
+        in_features=["res4"],
+        head=L(StandardRPNHead)(in_channels=1024, num_anchors=15),
+        anchor_generator=L(DefaultAnchorGenerator)(
+            sizes=[[32, 64, 128, 256, 512]],
+            aspect_ratios=[0.5, 1.0, 2.0],
+            strides=[16],
+            offset=0.0,
+        ),
+        anchor_matcher=L(Matcher)(
+            thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
+        ),
+        box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
+        batch_size_per_image=256,
+        positive_fraction=0.5,
+        pre_nms_topk=(12000, 6000),
+        post_nms_topk=(2000, 1000),
+        nms_thresh=0.7,
+    ),
+    roi_heads=L(Res5ROIHeads)(
+        num_classes=80,
+        batch_size_per_image=512,
+        positive_fraction=0.25,
+        proposal_matcher=L(Matcher)(
+            thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
+        ),
+        in_features=["res4"],
+        pooler=L(ROIPooler)(
+            output_size=14,
+            scales=(1.0 / 16,),
+            sampling_ratio=0,
+            pooler_type="ROIAlignV2",
+        ),
+        res5=L(ResNet.make_stage)(
+            block_class=BottleneckBlock,
+            num_blocks=3,
+            stride_per_block=[2, 1, 1],
+            in_channels=1024,
+            bottleneck_channels=512,
+            out_channels=2048,
+            norm="FrozenBN",
+            stride_in_1x1=True,
+        ),
+        box_predictor=L(FastRCNNOutputLayers)(
+            input_shape=L(ShapeSpec)(channels="${...res5.out_channels}", height=1, width=1),
+            test_score_thresh=0.05,
+            box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
+            num_classes="${..num_classes}",
+        ),
+        mask_head=L(MaskRCNNConvUpsampleHead)(
+            input_shape=L(ShapeSpec)(
+                channels="${...res5.out_channels}",
+                width="${...pooler.output_size}",
+                height="${...pooler.output_size}",
+            ),
+            num_classes="${..num_classes}",
+            conv_dims=[256],
+        ),
+    ),
+    pixel_mean=[103.530, 116.280, 123.675],
+    pixel_std=[1.0, 1.0, 1.0],
+    input_format="BGR",
+)

configs/common/models/mask_rcnn_fpn.py ADDED Viewed

	@@ -0,0 +1,93 @@

+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.meta_arch import GeneralizedRCNN
+from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
+from detectron2.modeling.backbone.fpn import LastLevelMaxPool
+from detectron2.modeling.backbone import BasicStem, FPN, ResNet
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.matcher import Matcher
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
+from detectron2.modeling.roi_heads import (
+    StandardROIHeads,
+    FastRCNNOutputLayers,
+    MaskRCNNConvUpsampleHead,
+    FastRCNNConvFCHead,
+)
+model = L(GeneralizedRCNN)(
+    backbone=L(FPN)(
+        bottom_up=L(ResNet)(
+            stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
+            stages=L(ResNet.make_default_stages)(
+                depth=50,
+                stride_in_1x1=True,
+                norm="FrozenBN",
+            ),
+            out_features=["res2", "res3", "res4", "res5"],
+        ),
+        in_features="${.bottom_up.out_features}",
+        out_channels=256,
+        top_block=L(LastLevelMaxPool)(),
+    ),
+    proposal_generator=L(RPN)(
+        in_features=["p2", "p3", "p4", "p5", "p6"],
+        head=L(StandardRPNHead)(in_channels=256, num_anchors=3),
+        anchor_generator=L(DefaultAnchorGenerator)(
+            sizes=[[32], [64], [128], [256], [512]],
+            aspect_ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64],
+            offset=0.0,
+        ),
+        anchor_matcher=L(Matcher)(
+            thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
+        ),
+        box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
+        batch_size_per_image=256,
+        positive_fraction=0.5,
+        pre_nms_topk=(2000, 1000),
+        post_nms_topk=(1000, 1000),
+        nms_thresh=0.7,
+    ),
+    roi_heads=L(StandardROIHeads)(
+        num_classes=80,
+        batch_size_per_image=512,
+        positive_fraction=0.25,
+        proposal_matcher=L(Matcher)(
+            thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
+        ),
+        box_in_features=["p2", "p3", "p4", "p5"],
+        box_pooler=L(ROIPooler)(
+            output_size=7,
+            scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
+            sampling_ratio=0,
+            pooler_type="ROIAlignV2",
+        ),
+        box_head=L(FastRCNNConvFCHead)(
+            input_shape=ShapeSpec(channels=256, height=7, width=7),
+            conv_dims=[],
+            fc_dims=[1024, 1024],
+        ),
+        box_predictor=L(FastRCNNOutputLayers)(
+            input_shape=ShapeSpec(channels=1024),
+            test_score_thresh=0.05,
+            box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
+            num_classes="${..num_classes}",
+        ),
+        mask_in_features=["p2", "p3", "p4", "p5"],
+        mask_pooler=L(ROIPooler)(
+            output_size=14, # ori is 14
+            scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
+            sampling_ratio=0,
+            pooler_type="ROIAlignV2",
+        ),
+        mask_head=L(MaskRCNNConvUpsampleHead)(
+            input_shape=ShapeSpec(channels=256, width=14, height=14),
+            num_classes="${..num_classes}",
+            conv_dims=[256, 256, 256, 256, 256],
+        ),
+    ),
+    pixel_mean=[103.530, 116.280, 123.675],
+    pixel_std=[1.0, 1.0, 1.0],
+    input_format="BGR",
+)

configs/common/models/panoptic_fpn.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling import PanopticFPN
+from detectron2.modeling.meta_arch.semantic_seg import SemSegFPNHead
+from .mask_rcnn_fpn import model
+model._target_ = PanopticFPN
+model.sem_seg_head = L(SemSegFPNHead)(
+    input_shape={
+        f: L(ShapeSpec)(stride=s, channels="${....backbone.out_channels}")
+        for f, s in zip(["p2", "p3", "p4", "p5"], [4, 8, 16, 32])
+    },
+    ignore_value=255,
+    num_classes=54,  # COCO stuff + 1
+    conv_dims=128,
+    common_stride=4,
+    loss_weight=0.5,
+    norm="GN",
+)

configs/common/models/retinanet.py ADDED Viewed

	@@ -0,0 +1,52 @@

+# -*- coding: utf-8 -*-
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.meta_arch import RetinaNet
+from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
+from detectron2.modeling.backbone.fpn import LastLevelP6P7
+from detectron2.modeling.backbone import BasicStem, FPN, ResNet
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.matcher import Matcher
+from detectron2.modeling.meta_arch.retinanet import RetinaNetHead
+model = L(RetinaNet)(
+    backbone=L(FPN)(
+        bottom_up=L(ResNet)(
+            stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
+            stages=L(ResNet.make_default_stages)(
+                depth=50,
+                stride_in_1x1=True,
+                norm="FrozenBN",
+            ),
+            out_features=["res3", "res4", "res5"],
+        ),
+        in_features=["res3", "res4", "res5"],
+        out_channels=256,
+        top_block=L(LastLevelP6P7)(in_channels=2048, out_channels="${..out_channels}"),
+    ),
+    head=L(RetinaNetHead)(
+        input_shape=[ShapeSpec(channels=256)],
+        num_classes="${..num_classes}",
+        conv_dims=[256, 256, 256, 256],
+        prior_prob=0.01,
+        num_anchors=9,
+    ),
+    anchor_generator=L(DefaultAnchorGenerator)(
+        sizes=[[x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)] for x in [32, 64, 128, 256, 512]],
+        aspect_ratios=[0.5, 1.0, 2.0],
+        strides=[8, 16, 32, 64, 128],
+        offset=0.0,
+    ),
+    box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
+    anchor_matcher=L(Matcher)(
+        thresholds=[0.4, 0.5], labels=[0, -1, 1], allow_low_quality_matches=True
+    ),
+    num_classes=80,
+    head_in_features=["p3", "p4", "p5", "p6", "p7"],
+    focal_loss_alpha=0.25,
+    focal_loss_gamma=2.0,
+    pixel_mean=[103.530, 116.280, 123.675],
+    pixel_std=[1.0, 1.0, 1.0],
+    input_format="BGR",
+)

configs/common/optim.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import torch
+from detectron2.config import LazyCall as L
+from detectron2.solver.build import get_default_optimizer_params
+SGD = L(torch.optim.SGD)(
+    params=L(get_default_optimizer_params)(
+        # params.model is meant to be set to the model object, before instantiating
+        # the optimizer.
+        weight_decay_norm=0.0
+    ),
+    lr=0.02,
+    momentum=0.9,
+    weight_decay=1e-4,
+)

configs/common/train.py ADDED Viewed

	@@ -0,0 +1,18 @@

+# Common training-related configs that are designed for "tools/lazyconfig_train_net.py"
+# You can use your own instead, together with your own train_net.py
+train = dict(
+    output_dir="./output",
+    init_checkpoint="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
+    max_iter=90000,
+    amp=dict(enabled=False),  # options for Automatic Mixed Precision
+    ddp=dict(  # options for DistributedDataParallel
+        broadcast_buffers=False,
+        find_unused_parameters=False,
+        fp16_compression=False,
+    ),
+    checkpointer=dict(period=5000, max_to_keep=100),  # options for PeriodicCheckpointer
+    eval_period=5000,
+    log_period=20,
+    device="cuda"
+    # ...
+)

configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from .mask_rcnn_R_50_FPN_100ep_LSJ import (
+    dataloader,
+    lr_multiplier,
+    model,
+    optimizer,
+    train,
+)
+model.backbone.bottom_up.stages.depth = 101

configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from .mask_rcnn_R_101_FPN_100ep_LSJ import (
+    dataloader,
+    lr_multiplier,
+    model,
+    optimizer,
+    train,
+)
+train.max_iter *= 2  # 100ep -> 200ep
+lr_multiplier.scheduler.milestones = [
+    milestone * 2 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter