diff --git a/README.md b/README.md
index 8e61cf66f29dca840183d4965dd57ebd435eb163..72c85e42a4751f3501d4e36c838a283d500c1120 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,13 @@
 ---
 title: Transfiner
-emoji: 🌍
-colorFrom: gray
-colorTo: gray
+emoji: 📊
+colorFrom: red
+colorTo: green
 sdk: gradio
-sdk_version: 3.0.20
+sdk_version: 2.9.3
 app_file: app.py
 pinned: false
 license: apache-2.0
 ---
 
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9a5d4bdaf8a89f05e35346cc34c3159fa3919b7
--- /dev/null
+++ b/app.py
@@ -0,0 +1,84 @@
+#try:
+#    import detectron2
+#except:
+import os 
+os.system('pip install git+https://github.com/SysCV/transfiner.git')
+
+from matplotlib.pyplot import axis
+import gradio as gr
+import requests
+import numpy as np
+from torch import nn
+import requests
+
+import torch
+
+from detectron2 import model_zoo
+from detectron2.engine import DefaultPredictor
+from detectron2.config import get_cfg
+from detectron2.utils.visualizer import Visualizer
+from detectron2.data import MetadataCatalog
+
+
+model_name='./configs/transfiner/mask_rcnn_R_101_FPN_3x_deform.yaml'
+
+
+cfg = get_cfg()
+# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
+cfg.merge_from_file(model_name)
+cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
+cfg.VIS_PERIOD = 100
+# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as w ell
+#cfg.MODEL.WEIGHTS = './output_3x_transfiner_r50.pth'
+cfg.MODEL.WEIGHTS = './output_3x_transfiner_r101_deform.pth'
+
+if not torch.cuda.is_available():
+    cfg.MODEL.DEVICE='cpu'
+
+predictor = DefaultPredictor(cfg)
+
+
+def inference(image):
+    width, height = image.size
+    if width > 1300:
+        ratio = float(height) / float(width)
+        width = 1300
+        height = int(ratio * width)
+        image = image.resize((width, height))
+
+    img = np.asarray(image)
+
+    #img = np.array(image)
+    outputs = predictor(img)
+
+    v = Visualizer(img, MetadataCatalog.get(cfg.DATASETS.TRAIN[0]))
+    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
+    
+    return out.get_image()
+
+
+
+title = "Mask Transfiner [CVPR, 2022]"
+description = "Demo for <a target='_blank' href='https://arxiv.org/abs/2111.13673'>Mask Transfiner for High-Quality Instance Segmentation, CVPR 2022</a> based on R50-FPN. To use it, simply upload your image, or click one of the examples to load them. Note that it runs in the <b>CPU environment</b> provided by Hugging Face so the processing speed may be slow."
+article = "<p style='text-align: center'><a target='_blank' href='https://arxiv.org/abs/2111.13673'>Mask Transfiner for High-Quality Instance Segmentation, CVPR 2022</a> | <a target='_blank' href='https://github.com/SysCV/transfiner'>Mask Transfiner Github Code</a></p>"
+
+gr.Interface(
+    inference, 
+    [gr.inputs.Image(type="pil", label="Input")], 
+    gr.outputs.Image(type="numpy", label="Output"),
+    title=title,
+    description=description,
+    article=article,
+    examples=[
+            ["demo/sample_imgs/000000131444.jpg"],
+            ["demo/sample_imgs/000000157365.jpg"],
+            ["demo/sample_imgs/000000176037.jpg"],
+            ["demo/sample_imgs/000000018737.jpg"],
+            ["demo/sample_imgs/000000224200.jpg"],
+            ["demo/sample_imgs/000000558073.jpg"],
+            ["demo/sample_imgs/000000404922.jpg"],
+            ["demo/sample_imgs/000000252776.jpg"],
+            ["demo/sample_imgs/000000482477.jpg"],
+            ["demo/sample_imgs/000000344909.jpg"]
+        ]).launch()
+
diff --git a/configs/Base-RCNN-C4.yaml b/configs/Base-RCNN-C4.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..fbf34a0ea57a587e09997edd94c4012d69d0b6ad
--- /dev/null
+++ b/configs/Base-RCNN-C4.yaml
@@ -0,0 +1,18 @@
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  RPN:
+    PRE_NMS_TOPK_TEST: 6000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "Res5ROIHeads"
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.02
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/configs/Base-RCNN-DilatedC5.yaml b/configs/Base-RCNN-DilatedC5.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..c0d6d16bdaf532f09e4976f0aa240a49e748da27
--- /dev/null
+++ b/configs/Base-RCNN-DilatedC5.yaml
@@ -0,0 +1,31 @@
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  RESNETS:
+    OUT_FEATURES: ["res5"]
+    RES5_DILATION: 2
+  RPN:
+    IN_FEATURES: ["res5"]
+    PRE_NMS_TOPK_TEST: 6000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "StandardROIHeads"
+    IN_FEATURES: ["res5"]
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+  ROI_MASK_HEAD:
+    NAME: "MaskRCNNConvUpsampleHead"
+    NUM_CONV: 4
+    POOLER_RESOLUTION: 14
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.02
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/configs/Base-RCNN-FPN.yaml b/configs/Base-RCNN-FPN.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..d1c7c791c40dbc5eec884e56c1bf18f422f52f1d
--- /dev/null
+++ b/configs/Base-RCNN-FPN.yaml
@@ -0,0 +1,43 @@
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  BACKBONE:
+    NAME: "build_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+  FPN:
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
+    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
+  RPN:
+    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
+    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
+    # Detectron1 uses 2000 proposals per-batch,
+    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+    POST_NMS_TOPK_TRAIN: 1000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "StandardROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+  ROI_MASK_HEAD:
+    NAME: "MaskRCNNConvUpsampleHead"
+    NUM_CONV: 4
+    POOLER_RESOLUTION: 14
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  #TEST: ("coco_2017_val",)
+  TEST: ("coco_2017_test-dev",)
+SOLVER:
+  IMS_PER_BATCH: 16 #16
+  BASE_LR: 0.02
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/configs/Base-RetinaNet.yaml b/configs/Base-RetinaNet.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..8b45b982bbf84b34d2a6a172ab0a946b1029f7c8
--- /dev/null
+++ b/configs/Base-RetinaNet.yaml
@@ -0,0 +1,25 @@
+MODEL:
+  META_ARCHITECTURE: "RetinaNet"
+  BACKBONE:
+    NAME: "build_retinanet_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
+  FPN:
+    IN_FEATURES: ["res3", "res4", "res5"]
+  RETINANET:
+    IOU_THRESHOLDS: [0.4, 0.5]
+    IOU_LABELS: [0, -1, 1]
+    SMOOTH_L1_LOSS_BETA: 0.0
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml b/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..1a7aaeb961581ed9492c4cfe5a69a1eb60495b3e
--- /dev/null
+++ b/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
@@ -0,0 +1,27 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  # For better, more stable performance initialize from COCO
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
+  MASK_ON: True
+  ROI_HEADS:
+    NUM_CLASSES: 8
+# This is similar to the setting used in Mask R-CNN paper, Appendix A
+# But there are some differences, e.g., we did not initialize the output
+# layer using the corresponding classes from COCO
+INPUT:
+  MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
+  MIN_SIZE_TRAIN_SAMPLING: "choice"
+  MIN_SIZE_TEST: 1024
+  MAX_SIZE_TRAIN: 2048
+  MAX_SIZE_TEST: 2048
+DATASETS:
+  TRAIN: ("cityscapes_fine_instance_seg_train",)
+  TEST: ("cityscapes_fine_instance_seg_val",)
+SOLVER:
+  BASE_LR: 0.01
+  STEPS: (18000,)
+  MAX_ITER: 24000
+  IMS_PER_BATCH: 8
+TEST:
+  EVAL_PERIOD: 8000
diff --git a/configs/Cityscapes/mask_rcnn_R_50_FPN_4gpu.yaml b/configs/Cityscapes/mask_rcnn_R_50_FPN_4gpu.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..4b4f2e6545e6920f8d3a84f1c517d79679a848c0
--- /dev/null
+++ b/configs/Cityscapes/mask_rcnn_R_50_FPN_4gpu.yaml
@@ -0,0 +1,27 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  # For better, more stable performance initialize from COCO
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
+  MASK_ON: True
+  ROI_HEADS:
+    NUM_CLASSES: 8
+# This is similar to the setting used in Mask R-CNN paper, Appendix A
+# But there are some differences, e.g., we did not initialize the output
+# layer using the corresponding classes from COCO
+INPUT:
+  MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
+  MIN_SIZE_TRAIN_SAMPLING: "choice"
+  MIN_SIZE_TEST: 1024
+  MAX_SIZE_TRAIN: 2048
+  MAX_SIZE_TEST: 2048
+DATASETS:
+  TRAIN: ("cityscapes_fine_instance_seg_train",)
+  TEST: ("cityscapes_fine_instance_seg_val",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (36000,)
+  MAX_ITER: 48000
+  IMS_PER_BATCH: 4
+TEST:
+  EVAL_PERIOD: 48000
diff --git a/configs/Detectron1-Comparisons/README.md b/configs/Detectron1-Comparisons/README.md
new file mode 100755
index 0000000000000000000000000000000000000000..924fd00af642ddf1a4ff4c4f5947f676134eb7de
--- /dev/null
+++ b/configs/Detectron1-Comparisons/README.md
@@ -0,0 +1,84 @@
+
+Detectron2 model zoo's experimental settings and a few implementation details are different from Detectron.
+
+The differences in implementation details are shared in
+[Compatibility with Other Libraries](../../docs/notes/compatibility.md).
+
+The differences in model zoo's experimental settings include:
+* Use scale augmentation during training. This improves AP with lower training cost.
+* Use L1 loss instead of smooth L1 loss for simplicity. This sometimes improves box AP but may
+  affect other AP.
+* Use `POOLER_SAMPLING_RATIO=0` instead of 2. This does not significantly affect AP.
+* Use `ROIAlignV2`. This does not significantly affect AP.
+
+In this directory, we provide a few configs that __do not__ have the above changes.
+They mimic Detectron's behavior as close as possible,
+and provide a fair comparison of accuracy and speed against Detectron.
+
+<!--
+./gen_html_table.py --config 'Detectron1-Comparisons/*.yaml' --name "Faster R-CNN" "Keypoint R-CNN" "Mask R-CNN" --fields lr_sched train_speed inference_speed mem box_AP mask_AP keypoint_AP --base-dir ../../../configs/Detectron1-Comparisons
+-->
+
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">lr<br/>sched</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">mask<br/>AP</th>
+<th valign="bottom">kp.<br/>AP</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: faster_rcnn_R_50_FPN_noaug_1x -->
+ <tr><td align="left"><a href="faster_rcnn_R_50_FPN_noaug_1x.yaml">Faster R-CNN</a></td>
+<td align="center">1x</td>
+<td align="center">0.219</td>
+<td align="center">0.038</td>
+<td align="center">3.1</td>
+<td align="center">36.9</td>
+<td align="center"></td>
+<td align="center"></td>
+<td align="center">137781054</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/model_final_7ab50c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: keypoint_rcnn_R_50_FPN_1x -->
+ <tr><td align="left"><a href="keypoint_rcnn_R_50_FPN_1x.yaml">Keypoint R-CNN</a></td>
+<td align="center">1x</td>
+<td align="center">0.313</td>
+<td align="center">0.071</td>
+<td align="center">5.0</td>
+<td align="center">53.1</td>
+<td align="center"></td>
+<td align="center">64.2</td>
+<td align="center">137781195</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/model_final_cce136.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: mask_rcnn_R_50_FPN_noaug_1x -->
+ <tr><td align="left"><a href="mask_rcnn_R_50_FPN_noaug_1x.yaml">Mask R-CNN</a></td>
+<td align="center">1x</td>
+<td align="center">0.273</td>
+<td align="center">0.043</td>
+<td align="center">3.4</td>
+<td align="center">37.8</td>
+<td align="center">34.9</td>
+<td align="center"></td>
+<td align="center">137781281</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/model_final_62ca52.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/metrics.json">metrics</a></td>
+</tr>
+</tbody></table>
+
+## Comparisons:
+
+* Faster R-CNN: Detectron's AP is 36.7, similar to ours.
+* Keypoint R-CNN: Detectron's AP is box 53.6, keypoint 64.2. Fixing a Detectron's
+  [bug](https://github.com/facebookresearch/Detectron/issues/459) lead to a drop in box AP, and can be
+	compensated back by some parameter tuning.
+* Mask R-CNN: Detectron's AP is box 37.7, mask 33.9. We're 1 AP better in mask AP, due to more correct implementation.
+  See [this article](https://ppwwyyxx.com/blog/2021/Where-are-Pixels/) for details.
+
+For speed comparison, see [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html).
diff --git a/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml b/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..6ce77f137fa2c4e5254a62b58c18b8b76096f2aa
--- /dev/null
+++ b/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
@@ -0,0 +1,17 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+  # Detectron1 uses smooth L1 loss with some magic beta values.
+  # The defaults are changed to L1 loss in Detectron2.
+  RPN:
+    SMOOTH_L1_BETA: 0.1111
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+INPUT:
+  # no scale augmentation
+  MIN_SIZE_TRAIN: (800, )
diff --git a/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml b/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..aacf868ba5290c752031c130a2081af48afc0808
--- /dev/null
+++ b/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,27 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  KEYPOINT_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 1
+  ROI_KEYPOINT_HEAD:
+    POOLER_RESOLUTION: 14
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  # Detectron1 uses smooth L1 loss with some magic beta values.
+  # The defaults are changed to L1 loss in Detectron2.
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  RPN:
+    SMOOTH_L1_BETA: 0.1111
+    # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2
+    # 1000 proposals per-image is found to hurt box AP.
+    # Therefore we increase it to 1500 per-image.
+    POST_NMS_TOPK_TRAIN: 1500
+DATASETS:
+  TRAIN: ("keypoints_coco_2017_train",)
+  TEST: ("keypoints_coco_2017_val",)
diff --git a/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml b/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..4ea86a8d8e2cd3e51cbc7311b0d00710c07d01f6
--- /dev/null
+++ b/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
@@ -0,0 +1,20 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  # Detectron1 uses smooth L1 loss with some magic beta values.
+  # The defaults are changed to L1 loss in Detectron2.
+  RPN:
+    SMOOTH_L1_BETA: 0.1111
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  ROI_MASK_HEAD:
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+INPUT:
+  # no scale augmentation
+  MIN_SIZE_TRAIN: (800, )
diff --git a/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..f0c3a1bbc0a09e1384de522f30c443ba1e36fafa
--- /dev/null
+++ b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
diff --git a/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x_finetune.yaml b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x_finetune.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..de110d26e773c35504a96d75724545777d2332ee
--- /dev/null
+++ b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x_finetune.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "./model_final_824ab5.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 150 #300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
diff --git a/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..c474187bdf2db5c9662c8b7083ba481ded378fbd
--- /dev/null
+++ b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 150  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
diff --git a/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..c8b822c6c006ba642f4caf9b55e7983f6797427a
--- /dev/null
+++ b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
@@ -0,0 +1,23 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1230
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v0.5_train",)
+  TEST: ("lvis_v0.5_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
diff --git a/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..ca4dd97144561276ecaabbb6c254e3a7737ac157
--- /dev/null
+++ b/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
@@ -0,0 +1,22 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1203
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v1_train",)
+  TEST: ("lvis_v1_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
diff --git a/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..f313295ee5f0d553d394ce2efe003810c79af47d
--- /dev/null
+++ b/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,22 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 1203
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v1_train",)
+  TEST: ("lvis_v1_val",)
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
diff --git a/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..f6528f7c31c8cfbf139c14fd0cae598592d8e898
--- /dev/null
+++ b/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
@@ -0,0 +1,26 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 1203
+    SCORE_THRESH_TEST: 0.0001
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+  TRAIN: ("lvis_v1_train",)
+  TEST: ("lvis_v1_val",)
+SOLVER:
+  STEPS: (120000, 160000)
+  MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
+TEST:
+  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
+DATALOADER:
+  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+  REPEAT_THRESHOLD: 0.001
diff --git a/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml b/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..abb33b618932e94b66239945ac892f4c84a6e8f8
--- /dev/null
+++ b/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,12 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NAME: CascadeROIHeads
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
diff --git a/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml b/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..e2201ad5c46ded91ccfa47b7698a521625c5e447
--- /dev/null
+++ b/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
@@ -0,0 +1,15 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NAME: CascadeROIHeads
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml b/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..fc117f6b5e3e51558ec2f01b73c5365622e5ce25
--- /dev/null
+++ b/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
@@ -0,0 +1,36 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  MASK_ON: True
+  WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 152
+    DEFORM_ON_PER_STAGE: [False, True, True, True]
+  ROI_HEADS:
+    NAME: "CascadeROIHeads"
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_CONV: 4
+    NUM_FC: 1
+    NORM: "GN"
+    CLS_AGNOSTIC_BBOX_REG: True
+  ROI_MASK_HEAD:
+    NUM_CONV: 8
+    NORM: "GN"
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+  IMS_PER_BATCH: 128
+  STEPS: (35000, 45000)
+  MAX_ITER: 50000
+  BASE_LR: 0.16
+INPUT:
+  MIN_SIZE_TRAIN: (640, 864)
+  MIN_SIZE_TRAIN_SAMPLING: "range"
+  MAX_SIZE_TRAIN: 1440
+  CROP:
+    ENABLED: True
+TEST:
+  EVAL_PERIOD: 2500
diff --git a/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml b/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..4c3b767ff473bbab7225cc8a4a92608543d78246
--- /dev/null
+++ b/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  ROI_MASK_HEAD:
+    CLS_AGNOSTIC_MASK: True
diff --git a/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml b/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..04ff988d073ef9169ee4ca2cbce0d6f030c15232
--- /dev/null
+++ b/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
@@ -0,0 +1,8 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+    DEFORM_MODULATED: False
diff --git a/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml b/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..68c0ca58d7df97ca728c339da0ca9828fe6be318
--- /dev/null
+++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+    DEFORM_MODULATED: False
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5_4gpu.yaml b/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5_4gpu.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..699bea11dfa413c0718681752963cd97ab29b52c
--- /dev/null
+++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5_4gpu.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-RCNN-FPN-4gpu.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+    DEFORM_MODULATED: False
+SOLVER:
+  STEPS: (420000, 500000) # (210000, 250000)
+  MAX_ITER: 540000 # 270000
diff --git a/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml b/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..74d274e5a529b5a8afe186940868f9d48c6112b3
--- /dev/null
+++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml
@@ -0,0 +1,21 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    NORM: "GN"
+    STRIDE_IN_1X1: False
+  FPN:
+    NORM: "GN"
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_CONV: 4
+    NUM_FC: 1
+    NORM: "GN"
+  ROI_MASK_HEAD:
+    NORM: "GN"
+SOLVER:
+  # 3x schedule
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
diff --git a/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml b/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..11ebb076ba529f26c71a0d972e96ca4c2d6a830b
--- /dev/null
+++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml
@@ -0,0 +1,24 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    NORM: "SyncBN"
+    STRIDE_IN_1X1: True
+  FPN:
+    NORM: "SyncBN"
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_CONV: 4
+    NUM_FC: 1
+    NORM: "SyncBN"
+  ROI_MASK_HEAD:
+    NORM: "SyncBN"
+SOLVER:
+  # 3x schedule
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
+TEST:
+  PRECISE_BN:
+    ENABLED: True
diff --git a/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py b/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py
new file mode 100755
index 0000000000000000000000000000000000000000..0f2464be744c083985898a25f9e71d00104f689d
--- /dev/null
+++ b/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py
@@ -0,0 +1,151 @@
+# An example config to train a mmdetection model using detectron2.
+
+from ..common.data.coco import dataloader
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.optim import SGD as optimizer
+from ..common.train import train
+
+from detectron2.modeling.mmdet_wrapper import MMDetDetector
+from detectron2.config import LazyCall as L
+
+model = L(MMDetDetector)(
+    detector=dict(
+        type="MaskRCNN",
+        pretrained="torchvision://resnet50",
+        backbone=dict(
+            type="ResNet",
+            depth=50,
+            num_stages=4,
+            out_indices=(0, 1, 2, 3),
+            frozen_stages=1,
+            norm_cfg=dict(type="BN", requires_grad=True),
+            norm_eval=True,
+            style="pytorch",
+        ),
+        neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5),
+        rpn_head=dict(
+            type="RPNHead",
+            in_channels=256,
+            feat_channels=256,
+            anchor_generator=dict(
+                type="AnchorGenerator",
+                scales=[8],
+                ratios=[0.5, 1.0, 2.0],
+                strides=[4, 8, 16, 32, 64],
+            ),
+            bbox_coder=dict(
+                type="DeltaXYWHBBoxCoder",
+                target_means=[0.0, 0.0, 0.0, 0.0],
+                target_stds=[1.0, 1.0, 1.0, 1.0],
+            ),
+            loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0),
+            loss_bbox=dict(type="L1Loss", loss_weight=1.0),
+        ),
+        roi_head=dict(
+            type="StandardRoIHead",
+            bbox_roi_extractor=dict(
+                type="SingleRoIExtractor",
+                roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0),
+                out_channels=256,
+                featmap_strides=[4, 8, 16, 32],
+            ),
+            bbox_head=dict(
+                type="Shared2FCBBoxHead",
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=80,
+                bbox_coder=dict(
+                    type="DeltaXYWHBBoxCoder",
+                    target_means=[0.0, 0.0, 0.0, 0.0],
+                    target_stds=[0.1, 0.1, 0.2, 0.2],
+                ),
+                reg_class_agnostic=False,
+                loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
+                loss_bbox=dict(type="L1Loss", loss_weight=1.0),
+            ),
+            mask_roi_extractor=dict(
+                type="SingleRoIExtractor",
+                roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0),
+                out_channels=256,
+                featmap_strides=[4, 8, 16, 32],
+            ),
+            mask_head=dict(
+                type="FCNMaskHead",
+                num_convs=4,
+                in_channels=256,
+                conv_out_channels=256,
+                num_classes=80,
+                loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0),
+            ),
+        ),
+        # model training and testing settings
+        train_cfg=dict(
+            rpn=dict(
+                assigner=dict(
+                    type="MaxIoUAssigner",
+                    pos_iou_thr=0.7,
+                    neg_iou_thr=0.3,
+                    min_pos_iou=0.3,
+                    match_low_quality=True,
+                    ignore_iof_thr=-1,
+                ),
+                sampler=dict(
+                    type="RandomSampler",
+                    num=256,
+                    pos_fraction=0.5,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=False,
+                ),
+                allowed_border=-1,
+                pos_weight=-1,
+                debug=False,
+            ),
+            rpn_proposal=dict(
+                nms_pre=2000,
+                max_per_img=1000,
+                nms=dict(type="nms", iou_threshold=0.7),
+                min_bbox_size=0,
+            ),
+            rcnn=dict(
+                assigner=dict(
+                    type="MaxIoUAssigner",
+                    pos_iou_thr=0.5,
+                    neg_iou_thr=0.5,
+                    min_pos_iou=0.5,
+                    match_low_quality=True,
+                    ignore_iof_thr=-1,
+                ),
+                sampler=dict(
+                    type="RandomSampler",
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True,
+                ),
+                mask_size=28,
+                pos_weight=-1,
+                debug=False,
+            ),
+        ),
+        test_cfg=dict(
+            rpn=dict(
+                nms_pre=1000,
+                max_per_img=1000,
+                nms=dict(type="nms", iou_threshold=0.7),
+                min_bbox_size=0,
+            ),
+            rcnn=dict(
+                score_thr=0.05,
+                nms=dict(type="nms", iou_threshold=0.5),
+                max_per_img=100,
+                mask_thr_binary=0.5,
+            ),
+        ),
+    ),
+    pixel_mean=[123.675, 116.280, 103.530],
+    pixel_std=[58.395, 57.120, 57.375],
+)
+
+dataloader.train.mapper.image_format = "RGB"  # torchvision pretrained model
+train.init_checkpoint = None  # pretrained model is loaded inside backbone
diff --git a/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml b/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..34016cea3ca9d7fb69ef4fe01d6b47ee8690a13b
--- /dev/null
+++ b/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
@@ -0,0 +1,26 @@
+# A large PanopticFPN for demo purposes.
+# Use GN on backbone to support semantic seg.
+# Use Cascade + Deform Conv to improve localization.
+_BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml"
+MODEL:
+  WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN"
+  RESNETS:
+    DEPTH: 101
+    NORM: "GN"
+    DEFORM_ON_PER_STAGE: [False, True, True, True]
+    STRIDE_IN_1X1: False
+  FPN:
+    NORM: "GN"
+  ROI_HEADS:
+    NAME: CascadeROIHeads
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  ROI_MASK_HEAD:
+    NORM: "GN"
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+  STEPS: (105000, 125000)
+  MAX_ITER: 135000
+  IMS_PER_BATCH: 32
+  BASE_LR: 0.04
diff --git a/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml b/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..f3400288cde242fcf66eef7f63b5a9165ca663c5
--- /dev/null
+++ b/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
@@ -0,0 +1,13 @@
+_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
+MODEL:
+  # Train from random initialization.
+  WEIGHTS: ""
+  # It makes sense to divide by STD when training from scratch
+  # But it seems to make no difference on the results and C2's models didn't do this.
+  # So we keep things consistent with C2.
+  # PIXEL_STD: [57.375, 57.12, 58.395]
+  MASK_ON: True
+  BACKBONE:
+    FREEZE_AT: 0
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.
diff --git a/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml b/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..d90c9ff0ef4573252ee165b4c958ec5f74178176
--- /dev/null
+++ b/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
@@ -0,0 +1,19 @@
+_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
+MODEL:
+  PIXEL_STD: [57.375, 57.12, 58.395]
+  WEIGHTS: ""
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False
+  BACKBONE:
+    FREEZE_AT: 0
+SOLVER:
+  # 9x schedule
+  IMS_PER_BATCH: 64  # 4x the standard
+  STEPS: (187500, 197500)  # last 60/4==15k and last 20/4==5k
+  MAX_ITER: 202500   # 90k * 9 / 4
+  BASE_LR: 0.08
+TEST:
+  EVAL_PERIOD: 2500
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.
diff --git a/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml b/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..60d4e42330e396a1901437df8e17b262d5ad547a
--- /dev/null
+++ b/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
@@ -0,0 +1,19 @@
+_BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml"
+MODEL:
+  PIXEL_STD: [57.375, 57.12, 58.395]
+  WEIGHTS: ""
+  MASK_ON: True
+  RESNETS:
+    STRIDE_IN_1X1: False
+  BACKBONE:
+    FREEZE_AT: 0
+SOLVER:
+  # 9x schedule
+  IMS_PER_BATCH: 64  # 4x the standard
+  STEPS: (187500, 197500)  # last 60/4==15k and last 20/4==5k
+  MAX_ITER: 202500   # 90k * 9 / 4
+  BASE_LR: 0.08
+TEST:
+  EVAL_PERIOD: 2500
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.
diff --git a/configs/Misc/semantic_R_50_FPN_1x.yaml b/configs/Misc/semantic_R_50_FPN_1x.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..ac256e1372770ab3d9ae522c962de0fd0dbceeb5
--- /dev/null
+++ b/configs/Misc/semantic_R_50_FPN_1x.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "SemanticSegmentor"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+DATASETS:
+  TRAIN: ("coco_2017_train_panoptic_stuffonly",)
+  TEST: ("coco_2017_val_panoptic_stuffonly",)
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
diff --git a/configs/Misc/torchvision_imagenet_R_50.py b/configs/Misc/torchvision_imagenet_R_50.py
new file mode 100755
index 0000000000000000000000000000000000000000..0d75305bcf7445b98db84b3d489a1505d2fce5af
--- /dev/null
+++ b/configs/Misc/torchvision_imagenet_R_50.py
@@ -0,0 +1,150 @@
+"""
+An example config file to train a ImageNet classifier with detectron2.
+Model and dataloader both come from torchvision.
+This shows how to use detectron2 as a general engine for any new models and tasks.
+
+To run, use the following command:
+
+python tools/lazyconfig_train_net.py --config-file configs/Misc/torchvision_imagenet_R_50.py \
+    --num-gpus 8 dataloader.train.dataset.root=/path/to/imagenet/
+
+"""
+
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+from omegaconf import OmegaConf
+import torchvision
+from torchvision.transforms import transforms as T
+from torchvision.models.resnet import ResNet, Bottleneck
+from fvcore.common.param_scheduler import MultiStepParamScheduler
+
+from detectron2.solver import WarmupParamScheduler
+from detectron2.solver.build import get_default_optimizer_params
+from detectron2.config import LazyCall as L
+from detectron2.model_zoo import get_config
+from detectron2.data.samplers import TrainingSampler, InferenceSampler
+from detectron2.evaluation import DatasetEvaluator
+from detectron2.utils import comm
+
+
+"""
+Note: Here we put reusable code (models, evaluation, data) together with configs just as a
+proof-of-concept, to easily demonstrate what's needed to train a ImageNet classifier in detectron2.
+Writing code in configs offers extreme flexibility but is often not a good engineering practice.
+In practice, you might want to put code in your project and import them instead.
+"""
+
+
+def build_data_loader(dataset, batch_size, num_workers, training=True):
+    return torch.utils.data.DataLoader(
+        dataset,
+        sampler=(TrainingSampler if training else InferenceSampler)(len(dataset)),
+        batch_size=batch_size,
+        num_workers=num_workers,
+        pin_memory=True,
+    )
+
+
+class ClassificationNet(nn.Module):
+    def __init__(self, model: nn.Module):
+        super().__init__()
+        self.model = model
+
+    @property
+    def device(self):
+        return list(self.model.parameters())[0].device
+
+    def forward(self, inputs):
+        image, label = inputs
+        pred = self.model(image.to(self.device))
+        if self.training:
+            label = label.to(self.device)
+            return F.cross_entropy(pred, label)
+        else:
+            return pred
+
+
+class ClassificationAcc(DatasetEvaluator):
+    def reset(self):
+        self.corr = self.total = 0
+
+    def process(self, inputs, outputs):
+        image, label = inputs
+        self.corr += (outputs.argmax(dim=1).cpu() == label.cpu()).sum().item()
+        self.total += len(label)
+
+    def evaluate(self):
+        all_corr_total = comm.all_gather([self.corr, self.total])
+        corr = sum(x[0] for x in all_corr_total)
+        total = sum(x[1] for x in all_corr_total)
+        return {"accuracy": corr / total}
+
+
+# --- End of code that could be in a project and be imported
+
+
+dataloader = OmegaConf.create()
+dataloader.train = L(build_data_loader)(
+    dataset=L(torchvision.datasets.ImageNet)(
+        root="/path/to/imagenet",
+        split="train",
+        transform=L(T.Compose)(
+            transforms=[
+                L(T.RandomResizedCrop)(size=224),
+                L(T.RandomHorizontalFlip)(),
+                T.ToTensor(),
+                L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+            ]
+        ),
+    ),
+    batch_size=256 // 8,
+    num_workers=4,
+    training=True,
+)
+
+dataloader.test = L(build_data_loader)(
+    dataset=L(torchvision.datasets.ImageNet)(
+        root="${...train.dataset.root}",
+        split="val",
+        transform=L(T.Compose)(
+            transforms=[
+                L(T.Resize)(size=256),
+                L(T.CenterCrop)(size=224),
+                T.ToTensor(),
+                L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+            ]
+        ),
+    ),
+    batch_size=256 // 8,
+    num_workers=4,
+    training=False,
+)
+
+dataloader.evaluator = L(ClassificationAcc)()
+
+model = L(ClassificationNet)(
+    model=(ResNet)(block=Bottleneck, layers=[3, 4, 6, 3], zero_init_residual=True)
+)
+
+
+optimizer = L(torch.optim.SGD)(
+    params=L(get_default_optimizer_params)(),
+    lr=0.1,
+    momentum=0.9,
+    weight_decay=1e-4,
+)
+
+lr_multiplier = L(WarmupParamScheduler)(
+    scheduler=L(MultiStepParamScheduler)(
+        values=[1.0, 0.1, 0.01, 0.001], milestones=[30, 60, 90, 100]
+    ),
+    warmup_length=1 / 100,
+    warmup_factor=0.1,
+)
+
+
+train = get_config("common/train.py").train
+train.init_checkpoint = None
+train.max_iter = 100 * 1281167 // 256
diff --git a/configs/common/README.md b/configs/common/README.md
new file mode 100755
index 0000000000000000000000000000000000000000..912cc29927542bfe4258d3208cf52d73cb0ea477
--- /dev/null
+++ b/configs/common/README.md
@@ -0,0 +1,6 @@
+This directory provides definitions for a few common models, dataloaders, scheduler,
+and optimizers that are often used in training.
+The definition of these objects are provided in the form of lazy instantiation:
+their arguments can be edited by users before constructing the objects.
+
+They can be imported, or loaded by `model_zoo.get_config` API in users' own configs.
diff --git a/configs/common/coco_schedule.py b/configs/common/coco_schedule.py
new file mode 100755
index 0000000000000000000000000000000000000000..355e66a1d213cb599a7ffe55089d854089c8ead2
--- /dev/null
+++ b/configs/common/coco_schedule.py
@@ -0,0 +1,47 @@
+from fvcore.common.param_scheduler import MultiStepParamScheduler
+
+from detectron2.config import LazyCall as L
+from detectron2.solver import WarmupParamScheduler
+
+
+def default_X_scheduler(num_X):
+    """
+    Returns the config for a default multi-step LR scheduler such as "1x", "3x",
+    commonly referred to in papers, where every 1x has the total length of 1440k
+    training images (~12 COCO epochs). LR is decayed twice at the end of training
+    following the strategy defined in "Rethinking ImageNet Pretraining", Sec 4.
+
+    Args:
+        num_X: a positive real number
+
+    Returns:
+        DictConfig: configs that define the multiplier for LR during training
+    """
+    # total number of iterations assuming 16 batch size, using 1440000/16=90000
+    total_steps_16bs = num_X * 90000
+
+    if num_X <= 2:
+        scheduler = L(MultiStepParamScheduler)(
+            values=[1.0, 0.1, 0.01],
+            # note that scheduler is scale-invariant. This is equivalent to
+            # milestones=[6, 8, 9]
+            milestones=[60000, 80000, 90000],
+        )
+    else:
+        scheduler = L(MultiStepParamScheduler)(
+            values=[1.0, 0.1, 0.01],
+            milestones=[total_steps_16bs - 60000, total_steps_16bs - 20000, total_steps_16bs],
+        )
+    return L(WarmupParamScheduler)(
+        scheduler=scheduler,
+        warmup_length=1000 / total_steps_16bs,
+        warmup_method="linear",
+        warmup_factor=0.001,
+    )
+
+
+lr_multiplier_1x = default_X_scheduler(1)
+lr_multiplier_2x = default_X_scheduler(2)
+lr_multiplier_3x = default_X_scheduler(3)
+lr_multiplier_6x = default_X_scheduler(6)
+lr_multiplier_9x = default_X_scheduler(9)
diff --git a/configs/common/data/coco.py b/configs/common/data/coco.py
new file mode 100755
index 0000000000000000000000000000000000000000..703c4385c7ddc7eb0759c98d102ab2384d6a9e3e
--- /dev/null
+++ b/configs/common/data/coco.py
@@ -0,0 +1,48 @@
+from omegaconf import OmegaConf
+
+import detectron2.data.transforms as T
+from detectron2.config import LazyCall as L
+from detectron2.data import (
+    DatasetMapper,
+    build_detection_test_loader,
+    build_detection_train_loader,
+    get_detection_dataset_dicts,
+)
+from detectron2.evaluation import COCOEvaluator
+
+dataloader = OmegaConf.create()
+
+dataloader.train = L(build_detection_train_loader)(
+    dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"),
+    mapper=L(DatasetMapper)(
+        is_train=True,
+        augmentations=[
+            L(T.ResizeShortestEdge)(
+                short_edge_length=(640, 672, 704, 736, 768, 800),
+                sample_style="choice",
+                max_size=1333,
+            ),
+            L(T.RandomFlip)(horizontal=True),
+        ],
+        image_format="BGR",
+        use_instance_mask=True,
+    ),
+    total_batch_size=16,
+    num_workers=4,
+)
+
+dataloader.test = L(build_detection_test_loader)(
+    dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False),
+    mapper=L(DatasetMapper)(
+        is_train=False,
+        augmentations=[
+            L(T.ResizeShortestEdge)(short_edge_length=800, max_size=1333),
+        ],
+        image_format="${...train.mapper.image_format}",
+    ),
+    num_workers=4,
+)
+
+dataloader.evaluator = L(COCOEvaluator)(
+    dataset_name="${..test.dataset.names}",
+)
diff --git a/configs/common/data/coco_keypoint.py b/configs/common/data/coco_keypoint.py
new file mode 100755
index 0000000000000000000000000000000000000000..b4ceb066faf696954244205dc75376b767071217
--- /dev/null
+++ b/configs/common/data/coco_keypoint.py
@@ -0,0 +1,13 @@
+from detectron2.data.detection_utils import create_keypoint_hflip_indices
+
+from .coco import dataloader
+
+dataloader.train.dataset.min_keypoints = 1
+dataloader.train.dataset.names = "keypoints_coco_2017_train"
+dataloader.test.dataset.names = "keypoints_coco_2017_val"
+
+dataloader.train.mapper.update(
+    use_instance_mask=False,
+    use_keypoint=True,
+    keypoint_hflip_indices=create_keypoint_hflip_indices(dataloader.train.dataset.names),
+)
diff --git a/configs/common/data/coco_panoptic_separated.py b/configs/common/data/coco_panoptic_separated.py
new file mode 100755
index 0000000000000000000000000000000000000000..5ccbc77e64d1c92c99cbd7158d047bab54cb9f3d
--- /dev/null
+++ b/configs/common/data/coco_panoptic_separated.py
@@ -0,0 +1,26 @@
+from detectron2.config import LazyCall as L
+from detectron2.evaluation import (
+    COCOEvaluator,
+    COCOPanopticEvaluator,
+    DatasetEvaluators,
+    SemSegEvaluator,
+)
+
+from .coco import dataloader
+
+dataloader.train.dataset.names = "coco_2017_train_panoptic_separated"
+dataloader.train.dataset.filter_empty = False
+dataloader.test.dataset.names = "coco_2017_val_panoptic_separated"
+
+
+dataloader.evaluator = [
+    L(COCOEvaluator)(
+        dataset_name="${...test.dataset.names}",
+    ),
+    L(SemSegEvaluator)(
+        dataset_name="${...test.dataset.names}",
+    ),
+    L(COCOPanopticEvaluator)(
+        dataset_name="${...test.dataset.names}",
+    ),
+]
diff --git a/configs/common/models/cascade_rcnn.py b/configs/common/models/cascade_rcnn.py
new file mode 100755
index 0000000000000000000000000000000000000000..c7372a801dc00d7fec4db8cda8c2612ce281d48a
--- /dev/null
+++ b/configs/common/models/cascade_rcnn.py
@@ -0,0 +1,36 @@
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.matcher import Matcher
+from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads
+
+from .mask_rcnn_fpn import model
+
+# arguments that don't exist for Cascade R-CNN
+[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]]
+
+model.roi_heads.update(
+    _target_=CascadeROIHeads,
+    box_heads=[
+        L(FastRCNNConvFCHead)(
+            input_shape=ShapeSpec(channels=256, height=7, width=7),
+            conv_dims=[],
+            fc_dims=[1024, 1024],
+        )
+        for k in range(3)
+    ],
+    box_predictors=[
+        L(FastRCNNOutputLayers)(
+            input_shape=ShapeSpec(channels=1024),
+            test_score_thresh=0.05,
+            box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)),
+            cls_agnostic_bbox_reg=True,
+            num_classes="${...num_classes}",
+        )
+        for (w1, w2) in [(10, 5), (20, 10), (30, 15)]
+    ],
+    proposal_matchers=[
+        L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False)
+        for th in [0.5, 0.6, 0.7]
+    ],
+)
diff --git a/configs/common/models/keypoint_rcnn_fpn.py b/configs/common/models/keypoint_rcnn_fpn.py
new file mode 100755
index 0000000000000000000000000000000000000000..56b3994df249884d4816fc9a5c7f553a9ab6f400
--- /dev/null
+++ b/configs/common/models/keypoint_rcnn_fpn.py
@@ -0,0 +1,33 @@
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.modeling.roi_heads import KRCNNConvDeconvUpsampleHead
+
+from .mask_rcnn_fpn import model
+
+[model.roi_heads.pop(x) for x in ["mask_in_features", "mask_pooler", "mask_head"]]
+
+model.roi_heads.update(
+    num_classes=1,
+    keypoint_in_features=["p2", "p3", "p4", "p5"],
+    keypoint_pooler=L(ROIPooler)(
+        output_size=14,
+        scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
+        sampling_ratio=0,
+        pooler_type="ROIAlignV2",
+    ),
+    keypoint_head=L(KRCNNConvDeconvUpsampleHead)(
+        input_shape=ShapeSpec(channels=256, width=14, height=14),
+        num_keypoints=17,
+        conv_dims=[512] * 8,
+        loss_normalizer="visible",
+    ),
+)
+
+# Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
+# 1000 proposals per-image is found to hurt box AP.
+# Therefore we increase it to 1500 per-image.
+model.proposal_generator.post_nms_topk = (1500, 1000)
+
+# Keypoint AP degrades (though box AP improves) when using plain L1 loss
+model.roi_heads.box_predictor.smooth_l1_beta = 0.5
diff --git a/configs/common/models/mask_rcnn_c4.py b/configs/common/models/mask_rcnn_c4.py
new file mode 100755
index 0000000000000000000000000000000000000000..a3dcf8be42a39c6e5f6e76e3ab23adeccb33085d
--- /dev/null
+++ b/configs/common/models/mask_rcnn_c4.py
@@ -0,0 +1,88 @@
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.meta_arch import GeneralizedRCNN
+from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
+from detectron2.modeling.backbone import BasicStem, BottleneckBlock, ResNet
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.matcher import Matcher
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
+from detectron2.modeling.roi_heads import (
+    FastRCNNOutputLayers,
+    MaskRCNNConvUpsampleHead,
+    Res5ROIHeads,
+)
+
+model = L(GeneralizedRCNN)(
+    backbone=L(ResNet)(
+        stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
+        stages=L(ResNet.make_default_stages)(
+            depth=50,
+            stride_in_1x1=True,
+            norm="FrozenBN",
+        ),
+        out_features=["res4"],
+    ),
+    proposal_generator=L(RPN)(
+        in_features=["res4"],
+        head=L(StandardRPNHead)(in_channels=1024, num_anchors=15),
+        anchor_generator=L(DefaultAnchorGenerator)(
+            sizes=[[32, 64, 128, 256, 512]],
+            aspect_ratios=[0.5, 1.0, 2.0],
+            strides=[16],
+            offset=0.0,
+        ),
+        anchor_matcher=L(Matcher)(
+            thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
+        ),
+        box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
+        batch_size_per_image=256,
+        positive_fraction=0.5,
+        pre_nms_topk=(12000, 6000),
+        post_nms_topk=(2000, 1000),
+        nms_thresh=0.7,
+    ),
+    roi_heads=L(Res5ROIHeads)(
+        num_classes=80,
+        batch_size_per_image=512,
+        positive_fraction=0.25,
+        proposal_matcher=L(Matcher)(
+            thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
+        ),
+        in_features=["res4"],
+        pooler=L(ROIPooler)(
+            output_size=14,
+            scales=(1.0 / 16,),
+            sampling_ratio=0,
+            pooler_type="ROIAlignV2",
+        ),
+        res5=L(ResNet.make_stage)(
+            block_class=BottleneckBlock,
+            num_blocks=3,
+            stride_per_block=[2, 1, 1],
+            in_channels=1024,
+            bottleneck_channels=512,
+            out_channels=2048,
+            norm="FrozenBN",
+            stride_in_1x1=True,
+        ),
+        box_predictor=L(FastRCNNOutputLayers)(
+            input_shape=L(ShapeSpec)(channels="${...res5.out_channels}", height=1, width=1),
+            test_score_thresh=0.05,
+            box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
+            num_classes="${..num_classes}",
+        ),
+        mask_head=L(MaskRCNNConvUpsampleHead)(
+            input_shape=L(ShapeSpec)(
+                channels="${...res5.out_channels}",
+                width="${...pooler.output_size}",
+                height="${...pooler.output_size}",
+            ),
+            num_classes="${..num_classes}",
+            conv_dims=[256],
+        ),
+    ),
+    pixel_mean=[103.530, 116.280, 123.675],
+    pixel_std=[1.0, 1.0, 1.0],
+    input_format="BGR",
+)
diff --git a/configs/common/models/mask_rcnn_fpn.py b/configs/common/models/mask_rcnn_fpn.py
new file mode 100755
index 0000000000000000000000000000000000000000..3f87d8da83d93932ddd5e9dc5b38d42786c0cbb4
--- /dev/null
+++ b/configs/common/models/mask_rcnn_fpn.py
@@ -0,0 +1,93 @@
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.meta_arch import GeneralizedRCNN
+from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
+from detectron2.modeling.backbone.fpn import LastLevelMaxPool
+from detectron2.modeling.backbone import BasicStem, FPN, ResNet
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.matcher import Matcher
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
+from detectron2.modeling.roi_heads import (
+    StandardROIHeads,
+    FastRCNNOutputLayers,
+    MaskRCNNConvUpsampleHead,
+    FastRCNNConvFCHead,
+)
+
+model = L(GeneralizedRCNN)(
+    backbone=L(FPN)(
+        bottom_up=L(ResNet)(
+            stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
+            stages=L(ResNet.make_default_stages)(
+                depth=50,
+                stride_in_1x1=True,
+                norm="FrozenBN",
+            ),
+            out_features=["res2", "res3", "res4", "res5"],
+        ),
+        in_features="${.bottom_up.out_features}",
+        out_channels=256,
+        top_block=L(LastLevelMaxPool)(),
+    ),
+    proposal_generator=L(RPN)(
+        in_features=["p2", "p3", "p4", "p5", "p6"],
+        head=L(StandardRPNHead)(in_channels=256, num_anchors=3),
+        anchor_generator=L(DefaultAnchorGenerator)(
+            sizes=[[32], [64], [128], [256], [512]],
+            aspect_ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64],
+            offset=0.0,
+        ),
+        anchor_matcher=L(Matcher)(
+            thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
+        ),
+        box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
+        batch_size_per_image=256,
+        positive_fraction=0.5,
+        pre_nms_topk=(2000, 1000),
+        post_nms_topk=(1000, 1000),
+        nms_thresh=0.7,
+    ),
+    roi_heads=L(StandardROIHeads)(
+        num_classes=80,
+        batch_size_per_image=512,
+        positive_fraction=0.25,
+        proposal_matcher=L(Matcher)(
+            thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
+        ),
+        box_in_features=["p2", "p3", "p4", "p5"],
+        box_pooler=L(ROIPooler)(
+            output_size=7,
+            scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
+            sampling_ratio=0,
+            pooler_type="ROIAlignV2",
+        ),
+        box_head=L(FastRCNNConvFCHead)(
+            input_shape=ShapeSpec(channels=256, height=7, width=7),
+            conv_dims=[],
+            fc_dims=[1024, 1024],
+        ),
+        box_predictor=L(FastRCNNOutputLayers)(
+            input_shape=ShapeSpec(channels=1024),
+            test_score_thresh=0.05,
+            box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
+            num_classes="${..num_classes}",
+        ),
+        mask_in_features=["p2", "p3", "p4", "p5"],
+        mask_pooler=L(ROIPooler)(
+            output_size=14, # ori is 14
+            scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
+            sampling_ratio=0,
+            pooler_type="ROIAlignV2",
+        ),
+        mask_head=L(MaskRCNNConvUpsampleHead)(
+            input_shape=ShapeSpec(channels=256, width=14, height=14),
+            num_classes="${..num_classes}",
+            conv_dims=[256, 256, 256, 256, 256],
+        ),
+    ),
+    pixel_mean=[103.530, 116.280, 123.675],
+    pixel_std=[1.0, 1.0, 1.0],
+    input_format="BGR",
+)
diff --git a/configs/common/models/panoptic_fpn.py b/configs/common/models/panoptic_fpn.py
new file mode 100755
index 0000000000000000000000000000000000000000..88f55d2ce9db62e61445d6a3700067d9d864ecae
--- /dev/null
+++ b/configs/common/models/panoptic_fpn.py
@@ -0,0 +1,20 @@
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling import PanopticFPN
+from detectron2.modeling.meta_arch.semantic_seg import SemSegFPNHead
+
+from .mask_rcnn_fpn import model
+
+model._target_ = PanopticFPN
+model.sem_seg_head = L(SemSegFPNHead)(
+    input_shape={
+        f: L(ShapeSpec)(stride=s, channels="${....backbone.out_channels}")
+        for f, s in zip(["p2", "p3", "p4", "p5"], [4, 8, 16, 32])
+    },
+    ignore_value=255,
+    num_classes=54,  # COCO stuff + 1
+    conv_dims=128,
+    common_stride=4,
+    loss_weight=0.5,
+    norm="GN",
+)
diff --git a/configs/common/models/retinanet.py b/configs/common/models/retinanet.py
new file mode 100755
index 0000000000000000000000000000000000000000..01d168fe6f054b88933488bdc65516424ce917cd
--- /dev/null
+++ b/configs/common/models/retinanet.py
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.meta_arch import RetinaNet
+from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
+from detectron2.modeling.backbone.fpn import LastLevelP6P7
+from detectron2.modeling.backbone import BasicStem, FPN, ResNet
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.matcher import Matcher
+from detectron2.modeling.meta_arch.retinanet import RetinaNetHead
+
+model = L(RetinaNet)(
+    backbone=L(FPN)(
+        bottom_up=L(ResNet)(
+            stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
+            stages=L(ResNet.make_default_stages)(
+                depth=50,
+                stride_in_1x1=True,
+                norm="FrozenBN",
+            ),
+            out_features=["res3", "res4", "res5"],
+        ),
+        in_features=["res3", "res4", "res5"],
+        out_channels=256,
+        top_block=L(LastLevelP6P7)(in_channels=2048, out_channels="${..out_channels}"),
+    ),
+    head=L(RetinaNetHead)(
+        input_shape=[ShapeSpec(channels=256)],
+        num_classes="${..num_classes}",
+        conv_dims=[256, 256, 256, 256],
+        prior_prob=0.01,
+        num_anchors=9,
+    ),
+    anchor_generator=L(DefaultAnchorGenerator)(
+        sizes=[[x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)] for x in [32, 64, 128, 256, 512]],
+        aspect_ratios=[0.5, 1.0, 2.0],
+        strides=[8, 16, 32, 64, 128],
+        offset=0.0,
+    ),
+    box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
+    anchor_matcher=L(Matcher)(
+        thresholds=[0.4, 0.5], labels=[0, -1, 1], allow_low_quality_matches=True
+    ),
+    num_classes=80,
+    head_in_features=["p3", "p4", "p5", "p6", "p7"],
+    focal_loss_alpha=0.25,
+    focal_loss_gamma=2.0,
+    pixel_mean=[103.530, 116.280, 123.675],
+    pixel_std=[1.0, 1.0, 1.0],
+    input_format="BGR",
+)
diff --git a/configs/common/optim.py b/configs/common/optim.py
new file mode 100755
index 0000000000000000000000000000000000000000..d39d3aaa546c17e831d21d1758b69e8c1609415e
--- /dev/null
+++ b/configs/common/optim.py
@@ -0,0 +1,15 @@
+import torch
+
+from detectron2.config import LazyCall as L
+from detectron2.solver.build import get_default_optimizer_params
+
+SGD = L(torch.optim.SGD)(
+    params=L(get_default_optimizer_params)(
+        # params.model is meant to be set to the model object, before instantiating
+        # the optimizer.
+        weight_decay_norm=0.0
+    ),
+    lr=0.02,
+    momentum=0.9,
+    weight_decay=1e-4,
+)
diff --git a/configs/common/train.py b/configs/common/train.py
new file mode 100755
index 0000000000000000000000000000000000000000..7c63bdb073797e48e0b3640e668ecc1d5c137d59
--- /dev/null
+++ b/configs/common/train.py
@@ -0,0 +1,18 @@
+# Common training-related configs that are designed for "tools/lazyconfig_train_net.py"
+# You can use your own instead, together with your own train_net.py
+train = dict(
+    output_dir="./output",
+    init_checkpoint="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
+    max_iter=90000,
+    amp=dict(enabled=False),  # options for Automatic Mixed Precision
+    ddp=dict(  # options for DistributedDataParallel
+        broadcast_buffers=False,
+        find_unused_parameters=False,
+        fp16_compression=False,
+    ),
+    checkpointer=dict(period=5000, max_to_keep=100),  # options for PeriodicCheckpointer
+    eval_period=5000,
+    log_period=20,
+    device="cuda"
+    # ...
+)
diff --git a/configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py
new file mode 100755
index 0000000000000000000000000000000000000000..3740e9bb08c5f168a9ab3a6d94561678bad1775c
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py
@@ -0,0 +1,9 @@
+from .mask_rcnn_R_50_FPN_100ep_LSJ import (
+    dataloader,
+    lr_multiplier,
+    model,
+    optimizer,
+    train,
+)
+
+model.backbone.bottom_up.stages.depth = 101
diff --git a/configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py
new file mode 100755
index 0000000000000000000000000000000000000000..18e5f0720c568db4ef0c97b59688b5e7866df606
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_R_101_FPN_100ep_LSJ import (
+    dataloader,
+    lr_multiplier,
+    model,
+    optimizer,
+    train,
+)
+
+train.max_iter *= 2  # 100ep -> 200ep
+
+lr_multiplier.scheduler.milestones = [
+    milestone * 2 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py
new file mode 100755
index 0000000000000000000000000000000000000000..63c54ee9a5ce2368494b775cc90fada1439feaa5
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_R_101_FPN_100ep_LSJ import (
+    dataloader,
+    lr_multiplier,
+    model,
+    optimizer,
+    train,
+)
+
+train.max_iter *= 4  # 100ep -> 400ep
+
+lr_multiplier.scheduler.milestones = [
+    milestone * 4 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py
new file mode 100755
index 0000000000000000000000000000000000000000..df7a2aedf480ed8dc4aa3645e37420e9b893fae4
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py
@@ -0,0 +1,72 @@
+import detectron2.data.transforms as T
+from detectron2.config.lazy import LazyCall as L
+from detectron2.layers.batch_norm import NaiveSyncBatchNorm
+from detectron2.solver import WarmupParamScheduler
+from fvcore.common.param_scheduler import MultiStepParamScheduler
+
+from ..common.data.coco import dataloader
+from ..common.models.mask_rcnn_fpn import model
+from ..common.optim import SGD as optimizer
+from ..common.train import train
+
+# train from scratch
+train.init_checkpoint = ""
+train.amp.enabled = True
+train.ddp.fp16_compression = True
+model.backbone.bottom_up.freeze_at = 0
+
+# SyncBN
+# fmt: off
+model.backbone.bottom_up.stem.norm = \
+    model.backbone.bottom_up.stages.norm = \
+    model.backbone.norm = "SyncBN"
+
+# Using NaiveSyncBatchNorm becase heads may have empty input. That is not supported by
+# torch.nn.SyncBatchNorm. We can remove this after
+# https://github.com/pytorch/pytorch/issues/36530 is fixed.
+model.roi_heads.box_head.conv_norm = \
+    model.roi_heads.mask_head.conv_norm = lambda c: NaiveSyncBatchNorm(c,
+                                                                       stats_mode="N")
+# fmt: on
+
+# 2conv in RPN:
+# https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/modeling/architecture/heads.py#L95-L97  # noqa: E501, B950
+model.proposal_generator.head.conv_dims = [-1, -1]
+
+# 4conv1fc box head
+model.roi_heads.box_head.conv_dims = [256, 256, 256, 256]
+model.roi_heads.box_head.fc_dims = [1024]
+
+# resize_and_crop_image in:
+# https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/utils/input_utils.py#L127  # noqa: E501, B950
+image_size = 1024
+dataloader.train.mapper.augmentations = [
+    L(T.ResizeScale)(
+        min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size
+    ),
+    L(T.FixedSizeCrop)(crop_size=(image_size, image_size)),
+    L(T.RandomFlip)(horizontal=True),
+]
+
+# recompute boxes due to cropping
+dataloader.train.mapper.recompute_boxes = True
+
+# larger batch-size.
+dataloader.train.total_batch_size = 64
+
+# Equivalent to 100 epochs.
+# 100 ep = 184375 iters * 64 images/iter / 118000 images/ep
+train.max_iter = 184375
+
+lr_multiplier = L(WarmupParamScheduler)(
+    scheduler=L(MultiStepParamScheduler)(
+        values=[1.0, 0.1, 0.01],
+        milestones=[163889, 177546],
+        num_updates=train.max_iter,
+    ),
+    warmup_length=500 / train.max_iter,
+    warmup_factor=0.067,
+)
+
+optimizer.lr = 0.1
+optimizer.weight_decay = 4e-5
diff --git a/configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py
new file mode 100755
index 0000000000000000000000000000000000000000..2a7c376da5f9269197c44079f3e0f3b09cdc63fa
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_R_50_FPN_100ep_LSJ import (
+    dataloader,
+    lr_multiplier,
+    model,
+    optimizer,
+    train,
+)
+
+train.max_iter *= 2  # 100ep -> 200ep
+
+lr_multiplier.scheduler.milestones = [
+    milestone * 2 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py
new file mode 100755
index 0000000000000000000000000000000000000000..97586b8f5330a9d995a0bffd1f5e7bd5b5656462
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_R_50_FPN_100ep_LSJ import (
+    dataloader,
+    lr_multiplier,
+    model,
+    optimizer,
+    train,
+)
+
+train.max_iter *= 4  # 100ep -> 400ep
+
+lr_multiplier.scheduler.milestones = [
+    milestone * 4 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py
new file mode 100755
index 0000000000000000000000000000000000000000..2ca1ede262cf5c37a3a54778458c74aff1479411
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_R_50_FPN_100ep_LSJ import (
+    dataloader,
+    lr_multiplier,
+    model,
+    optimizer,
+    train,
+)
+
+train.max_iter //= 2  # 100ep -> 50ep
+
+lr_multiplier.scheduler.milestones = [
+    milestone // 2 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py
new file mode 100755
index 0000000000000000000000000000000000000000..249387fffeed7c02f592ecc84ee5a295533b1ed7
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py
@@ -0,0 +1,29 @@
+from .mask_rcnn_R_50_FPN_100ep_LSJ import (
+    dataloader,
+    lr_multiplier,
+    model,
+    optimizer,
+    train,
+)
+from detectron2.config import LazyCall as L
+from detectron2.modeling.backbone import RegNet
+from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
+
+# Config source:
+# https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py  # noqa
+model.backbone.bottom_up = L(RegNet)(
+    stem_class=SimpleStem,
+    stem_width=32,
+    block_class=ResBottleneckBlock,
+    depth=23,
+    w_a=38.65,
+    w_0=96,
+    w_m=2.43,
+    group_width=40,
+    norm="SyncBN",
+    out_features=["s1", "s2", "s3", "s4"],
+)
+model.pixel_std = [57.375, 57.120, 58.395]
+
+# RegNets benefit from enabling cudnn benchmark mode
+train.cudnn_benchmark = True
diff --git a/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py
new file mode 100755
index 0000000000000000000000000000000000000000..731320e74ebed4d8ceec58c07cb906542b8b021b
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ import (
+    dataloader,
+    lr_multiplier,
+    model,
+    optimizer,
+    train,
+)
+
+train.max_iter *= 2  # 100ep -> 200ep
+
+lr_multiplier.scheduler.milestones = [
+    milestone * 2 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py
new file mode 100755
index 0000000000000000000000000000000000000000..8f369a2afedb6c6e69fd52ff9a9a6b1cdf965937
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ import (
+    dataloader,
+    lr_multiplier,
+    model,
+    optimizer,
+    train,
+)
+
+train.max_iter *= 4  # 100ep -> 400ep
+
+lr_multiplier.scheduler.milestones = [
+    milestone * 4 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py
new file mode 100755
index 0000000000000000000000000000000000000000..da94e6f90d823f110e4a2373d7fd16b3d1ab5ac3
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py
@@ -0,0 +1,30 @@
+from .mask_rcnn_R_50_FPN_100ep_LSJ import (
+    dataloader,
+    lr_multiplier,
+    model,
+    optimizer,
+    train,
+)
+from detectron2.config import LazyCall as L
+from detectron2.modeling.backbone import RegNet
+from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
+
+# Config source:
+# https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py  # noqa
+model.backbone.bottom_up = L(RegNet)(
+    stem_class=SimpleStem,
+    stem_width=32,
+    block_class=ResBottleneckBlock,
+    depth=22,
+    w_a=31.41,
+    w_0=96,
+    w_m=2.24,
+    group_width=64,
+    se_ratio=0.25,
+    norm="SyncBN",
+    out_features=["s1", "s2", "s3", "s4"],
+)
+model.pixel_std = [57.375, 57.120, 58.395]
+
+# RegNets benefit from enabling cudnn benchmark mode
+train.cudnn_benchmark = True
diff --git a/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py
new file mode 100755
index 0000000000000000000000000000000000000000..b867cc865e5ac4d7b70221da141894efd7cbd75c
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ import (
+    dataloader,
+    lr_multiplier,
+    model,
+    optimizer,
+    train,
+)
+
+train.max_iter *= 2  # 100ep -> 200ep
+
+lr_multiplier.scheduler.milestones = [
+    milestone * 2 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py
new file mode 100755
index 0000000000000000000000000000000000000000..7b86ea8c6c5c48f5d26c9e0df7cf96e745b17b34
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ import (
+    dataloader,
+    lr_multiplier,
+    model,
+    optimizer,
+    train,
+)
+
+train.max_iter *= 4  # 100ep -> 400ep
+
+lr_multiplier.scheduler.milestones = [
+    milestone * 4 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/configs/quick_schedules/README.md b/configs/quick_schedules/README.md
new file mode 100755
index 0000000000000000000000000000000000000000..4e6c82ef3f75a73c7006f33d7c850a0d4781a58f
--- /dev/null
+++ b/configs/quick_schedules/README.md
@@ -0,0 +1,8 @@
+These are quick configs for performance or accuracy regression tracking purposes.
+
+* `*instance_test.yaml`: can train on 2 GPUs. They are used to test whether the training can
+  successfully finish. They are not expected to produce reasonable training results.
+* `*inference_acc_test.yaml`: They should be run using `--eval-only`. They run inference using pre-trained models and verify
+  the results are as expected.
+* `*training_acc_test.yaml`: They should be trained on 8 GPUs. They finish in about an hour and verify the training accuracy
+  is within the normal range.
diff --git a/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..fc5a4116cb096278823049c1f823e99f8e16e97e
--- /dev/null
+++ b/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP",  43.87, 0.02]]
diff --git a/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..e41a0fe7ffe9c3531741df49e546aa45cfe4fdee
--- /dev/null
+++ b/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..a2f37e5e2cc2a9e195e13703e9930e67e0f9a896
--- /dev/null
+++ b/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]]
diff --git a/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..52fc0ec03c8b87ab2be1dda97bec1e8c93e6bb5c
--- /dev/null
+++ b/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,15 @@
+_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
+  TEST: ("coco_2017_val_100",)
+  PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..14cf2aa82aec52ad44e28ead0665dad811d55457
--- /dev/null
+++ b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl"
+DATASETS:
+  TEST: ("keypoints_coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]]
diff --git a/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..3dd209f693bd0bfdd46a2c9e7e750dede3abc141
--- /dev/null
+++ b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,16 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  KEYPOINT_ON: True
+  ROI_HEADS:
+    NUM_CLASSES: 1
+DATASETS:
+  TRAIN: ("keypoints_coco_2017_val_100",)
+  TEST: ("keypoints_coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..4b92392f1c4457033ae4c87a521e339fe9e184ce
--- /dev/null
+++ b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml
@@ -0,0 +1,30 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  KEYPOINT_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    BATCH_SIZE_PER_IMAGE: 256
+    NUM_CLASSES: 1
+  ROI_KEYPOINT_HEAD:
+    POOLER_RESOLUTION: 14
+    POOLER_SAMPLING_RATIO: 2
+    NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False
+    LOSS_WEIGHT: 4.0
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0  # Keypoint AP degrades when using plain L1 loss
+  RPN:
+    SMOOTH_L1_BETA: 0.2  # Keypoint AP degrades when using plain L1 loss
+DATASETS:
+  TRAIN: ("keypoints_coco_2017_val",)
+  TEST: ("keypoints_coco_2017_val",)
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+SOLVER:
+  WARMUP_FACTOR: 0.33333333
+  WARMUP_ITERS: 100
+  STEPS: (5500, 5800)
+  MAX_ITER: 6000
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]]
diff --git a/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..9bd962878fea64035887c48981beeb8d41bfdbd0
--- /dev/null
+++ b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml
@@ -0,0 +1,28 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  KEYPOINT_ON: True
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    BATCH_SIZE_PER_IMAGE: 256
+    NUM_CLASSES: 1
+  ROI_KEYPOINT_HEAD:
+    POOLER_RESOLUTION: 14
+    POOLER_SAMPLING_RATIO: 2
+  ROI_BOX_HEAD:
+    SMOOTH_L1_BETA: 1.0  # Keypoint AP degrades when using plain L1 loss
+  RPN:
+    SMOOTH_L1_BETA: 0.2  # Keypoint AP degrades when using plain L1 loss
+DATASETS:
+  TRAIN: ("keypoints_coco_2017_val",)
+  TEST: ("keypoints_coco_2017_val",)
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+SOLVER:
+  WARMUP_FACTOR: 0.33333333
+  WARMUP_ITERS: 100
+  STEPS: (5500, 5800)
+  MAX_ITER: 6000
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]]
diff --git a/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..ab6e69812b94ea7e071f29d9a6937d5c70805b5b
--- /dev/null
+++ b/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml
@@ -0,0 +1,18 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.001
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+  CLIP_GRADIENTS:
+    ENABLED: True
+    CLIP_TYPE: "value"
+    CLIP_VALUE: 1.0
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..b2d5b7ff87e069f8c774a230bdfd47b8c12d18a3
--- /dev/null
+++ b/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]]
diff --git a/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..6c4f1214efa520944fd941daec082ad45c164a23
--- /dev/null
+++ b/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml
@@ -0,0 +1,14 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.001
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..f68dd8f96c7896b5fc95d694a399f2ce417c1deb
--- /dev/null
+++ b/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml
@@ -0,0 +1,22 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  ROI_HEADS:
+    BATCH_SIZE_PER_IMAGE: 256
+  MASK_ON: True
+DATASETS:
+  TRAIN: ("coco_2017_val",)
+  TEST: ("coco_2017_val",)
+INPUT:
+  MIN_SIZE_TRAIN: (600,)
+  MAX_SIZE_TRAIN: 1000
+  MIN_SIZE_TEST: 800
+  MAX_SIZE_TEST: 1000
+SOLVER:
+  IMS_PER_BATCH: 8  # base uses 16
+  WARMUP_FACTOR: 0.33333
+  WARMUP_ITERS: 100
+  STEPS: (11000, 11600)
+  MAX_ITER: 12000
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]]
diff --git a/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..e3ce6cf922ae07fba5b5e01edbac19bf58a8e9dd
--- /dev/null
+++ b/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]]
diff --git a/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..e5454bfd95cc37749c50aec7866f32d9a80ca2b7
--- /dev/null
+++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP",  42.67, 0.02], ["bbox_TTA", "AP", 49.11, 0.02], ["segm_TTA", "AP", 45.04, 0.02]]
+  AUG:
+    ENABLED: True
+    MIN_SIZES: (700, 800)  # to save some time
diff --git a/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..6dbfcde0bf837990634d419a6dda1e2909c3cd7f
--- /dev/null
+++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,14 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..52f78762bda23331c97afd523cf98a5c118b113e
--- /dev/null
+++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml
@@ -0,0 +1,6 @@
+_BASE_: "./mask_rcnn_R_50_FPN_training_acc_test.yaml"
+MODEL:
+  ROI_BOX_HEAD:
+    TRAIN_ON_PRED_BOXES: True
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 42.6, 1.0], ["segm", "AP", 35.8, 0.8]]
diff --git a/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..aadae4ce898761e1e40e5af65a9e5ea01053b936
--- /dev/null
+++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml
@@ -0,0 +1,21 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  ROI_HEADS:
+    BATCH_SIZE_PER_IMAGE: 256
+  MASK_ON: True
+DATASETS:
+  TRAIN: ("coco_2017_val",)
+  TEST: ("coco_2017_val",)
+INPUT:
+  MIN_SIZE_TRAIN: (600,)
+  MAX_SIZE_TRAIN: 1000
+  MIN_SIZE_TEST: 800
+  MAX_SIZE_TEST: 1000
+SOLVER:
+  WARMUP_FACTOR: 0.3333333
+  WARMUP_ITERS: 100
+  STEPS: (5500, 5800)
+  MAX_ITER: 6000
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 42.5, 1.0], ["segm", "AP", 35.8, 0.8]]
diff --git a/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml b/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..70874e3a92c9034d75cbbebb145b61084ba15e42
--- /dev/null
+++ b/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100_panoptic_separated",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]]
diff --git a/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml b/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..7cdee7bfcf6dc75dda52602a0d9177ad0a9cc6ed
--- /dev/null
+++ b/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "PanopticFPN"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  SEM_SEG_HEAD:
+    LOSS_WEIGHT: 0.5
+DATASETS:
+  TRAIN: ("coco_2017_val_100_panoptic_separated",)
+  TEST: ("coco_2017_val_100_panoptic_separated",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 1
diff --git a/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml b/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..f3bbf30196cb35434340d4c343cab0c96283cd4f
--- /dev/null
+++ b/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml
@@ -0,0 +1,20 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "PanopticFPN"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  SEM_SEG_HEAD:
+    LOSS_WEIGHT: 0.5
+DATASETS:
+  TRAIN: ("coco_2017_val_panoptic_separated",)
+  TEST: ("coco_2017_val_panoptic_separated",)
+SOLVER:
+  BASE_LR: 0.01
+  WARMUP_FACTOR: 0.001
+  WARMUP_ITERS: 500
+  STEPS: (5500,)
+  MAX_ITER: 7000
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 39.0, 0.7], ["sem_seg", "mIoU", 64.73, 1.3], ["panoptic_seg", "PQ", 48.13, 0.8]]
diff --git a/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..cb666c1a6b3e351227046bc9c2af8799408858e8
--- /dev/null
+++ b/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 44.45, 0.02]]
diff --git a/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml b/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..8d95c1f614296716374686b22055a587ccd052b9
--- /dev/null
+++ b/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..c7c3f908a9e80e98b2d25b6d384a60acaba9d4f8
--- /dev/null
+++ b/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]]
diff --git a/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..402d432477507dc36f04c4a9777cb80fe06b2809
--- /dev/null
+++ b/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  STEPS: (30,)
+  MAX_ITER: 40
+  BASE_LR: 0.005
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..bca74987d5218736983617883e0fe37f79d219b7
--- /dev/null
+++ b/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "SemanticSegmentor"
+  WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl"
+  RESNETS:
+    DEPTH: 50
+DATASETS:
+  TEST: ("coco_2017_val_100_panoptic_stuffonly",)
+TEST:
+  EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]]
diff --git a/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml b/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..14ab606f219b462fe37fcc7d5fbdbe65cb5c2642
--- /dev/null
+++ b/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml
@@ -0,0 +1,18 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "SemanticSegmentor"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+DATASETS:
+  TRAIN: ("coco_2017_val_100_panoptic_stuffonly",)
+  TEST: ("coco_2017_val_100_panoptic_stuffonly",)
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
diff --git a/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml b/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..1f78d775889b11e9e76743de5ddb8139198edf61
--- /dev/null
+++ b/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml
@@ -0,0 +1,20 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "SemanticSegmentor"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+DATASETS:
+  TRAIN: ("coco_2017_val_panoptic_stuffonly",)
+  TEST: ("coco_2017_val_panoptic_stuffonly",)
+SOLVER:
+  BASE_LR: 0.01
+  WARMUP_FACTOR: 0.001
+  WARMUP_ITERS: 300
+  STEPS: (5500,)
+  MAX_ITER: 7000
+TEST:
+  EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]]
+INPUT:
+  # no scale augmentation
+  MIN_SIZE_TRAIN: (800, )
diff --git a/configs/transfiner/mask_rcnn_R_101_FPN_3x.yaml b/configs/transfiner/mask_rcnn_R_101_FPN_3x.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..ad87a098a83e5d670378e7fa451bef3c6cc1f406
--- /dev/null
+++ b/configs/transfiner/mask_rcnn_R_101_FPN_3x.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
+OUTPUT_DIR: "./output_101_3x"
diff --git a/configs/transfiner/mask_rcnn_R_101_FPN_3x_deform.yaml b/configs/transfiner/mask_rcnn_R_101_FPN_3x_deform.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..157f74d4078260d1261985d68cfde47cca5a80c9
--- /dev/null
+++ b/configs/transfiner/mask_rcnn_R_101_FPN_3x_deform.yaml
@@ -0,0 +1,12 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+    DEFORM_MODULATED: False
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
+OUTPUT_DIR: "./output_101_3x_deform"
diff --git a/configs/transfiner/mask_rcnn_R_50_FPN_1x.yaml b/configs/transfiner/mask_rcnn_R_50_FPN_1x.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..e0cfed630dcce0da04c1461e72197e11bdc820c8
--- /dev/null
+++ b/configs/transfiner/mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+OUTPUT_DIR: "./output_r50_1x"
diff --git a/configs/transfiner/mask_rcnn_R_50_FPN_3x.yaml b/configs/transfiner/mask_rcnn_R_50_FPN_3x.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..9f75563b51e1e071d9c70f7254c4495f2085fa12
--- /dev/null
+++ b/configs/transfiner/mask_rcnn_R_50_FPN_3x.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
+OUTPUT_DIR: "./output_r50_3x"
diff --git a/configs/transfiner/mask_rcnn_R_50_FPN_3x_deform.yaml b/configs/transfiner/mask_rcnn_R_50_FPN_3x_deform.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..2354b3419225809fd660c09779a3ee94841f5cdb
--- /dev/null
+++ b/configs/transfiner/mask_rcnn_R_50_FPN_3x_deform.yaml
@@ -0,0 +1,12 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+    DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+    DEFORM_MODULATED: False
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 270000
+OUTPUT_DIR: "./output_r50_3x_deform"
diff --git a/demo/README.md b/demo/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f11ad3eb72953a7bc05d5e333fca4a62ab633b9c
--- /dev/null
+++ b/demo/README.md
@@ -0,0 +1,5 @@
+
+## Mask Transfiner Demo
+
+For visualization demo, please refer to our [visualization script](https://github.com/SysCV/transfiner#visualization).
+
diff --git a/demo/__pycache__/predictor.cpython-38.pyc b/demo/__pycache__/predictor.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5fed9aa41681040202f3708d1122e5240091b8a9
Binary files /dev/null and b/demo/__pycache__/predictor.cpython-38.pyc differ
diff --git a/demo/demo.py b/demo/demo.py
new file mode 100755
index 0000000000000000000000000000000000000000..a14dfb94c998bd3bfb650004a6fe1a23bf17eda3
--- /dev/null
+++ b/demo/demo.py
@@ -0,0 +1,190 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import argparse
+import glob
+import multiprocessing as mp
+import numpy as np
+import os
+import tempfile
+import time
+import warnings
+import cv2
+import tqdm
+
+from detectron2.config import get_cfg
+from detectron2.data.detection_utils import read_image
+from detectron2.utils.logger import setup_logger
+
+from predictor import VisualizationDemo
+
+# constants
+WINDOW_NAME = "COCO detections"
+
+
+def setup_cfg(args):
+    # load config from file and command-line arguments
+    cfg = get_cfg()
+    # To use demo for Panoptic-DeepLab, please uncomment the following two lines.
+    # from detectron2.projects.panoptic_deeplab import add_panoptic_deeplab_config  # noqa
+    # add_panoptic_deeplab_config(cfg)
+    cfg.merge_from_file(args.config_file)
+    cfg.merge_from_list(args.opts)
+    # Set score_threshold for builtin models
+    cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
+    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
+    cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold
+    cfg.freeze()
+    return cfg
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs")
+    parser.add_argument(
+        "--config-file",
+        default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml",
+        metavar="FILE",
+        help="path to config file",
+    )
+    parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.")
+    parser.add_argument("--video-input", help="Path to video file.")
+    parser.add_argument(
+        "--input",
+        nargs="+",
+        help="A list of space separated input images; "
+        "or a single glob pattern such as 'directory/*.jpg'",
+    )
+    parser.add_argument(
+        "--output",
+        help="A file or directory to save output visualizations. "
+        "If not given, will show output in an OpenCV window.",
+    )
+
+    parser.add_argument(
+        "--confidence-threshold",
+        type=float,
+        default=0.5,
+        help="Minimum score for instance predictions to be shown",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options using the command-line 'KEY VALUE' pairs",
+        default=[],
+        nargs=argparse.REMAINDER,
+    )
+    return parser
+
+
+def test_opencv_video_format(codec, file_ext):
+    with tempfile.TemporaryDirectory(prefix="video_format_test") as dir:
+        filename = os.path.join(dir, "test_file" + file_ext)
+        writer = cv2.VideoWriter(
+            filename=filename,
+            fourcc=cv2.VideoWriter_fourcc(*codec),
+            fps=float(30),
+            frameSize=(10, 10),
+            isColor=True,
+        )
+        [writer.write(np.zeros((10, 10, 3), np.uint8)) for _ in range(30)]
+        writer.release()
+        if os.path.isfile(filename):
+            return True
+        return False
+
+
+if __name__ == "__main__":
+    mp.set_start_method("spawn", force=True)
+    args = get_parser().parse_args()
+    setup_logger(name="fvcore")
+    logger = setup_logger()
+    logger.info("Arguments: " + str(args))
+
+    cfg = setup_cfg(args)
+
+    demo = VisualizationDemo(cfg)
+
+    if args.input:
+        if len(args.input) == 1:
+            args.input = glob.glob(os.path.expanduser(args.input[0]))
+            assert args.input, "The input path(s) was not found"
+        for path in tqdm.tqdm(args.input, disable=not args.output):
+            # use PIL, to be consistent with evaluation
+            img = read_image(path, format="BGR")
+            start_time = time.time()
+            predictions, visualized_output = demo.run_on_image(img)
+            logger.info(
+                "{}: {} in {:.2f}s".format(
+                    path,
+                    "detected {} instances".format(len(predictions["instances"]))
+                    if "instances" in predictions
+                    else "finished",
+                    time.time() - start_time,
+                )
+            )
+
+            if args.output:
+                if os.path.isdir(args.output):
+                    assert os.path.isdir(args.output), args.output
+                    out_filename = os.path.join(args.output, os.path.basename(path))
+                else:
+                    #assert len(args.input) == 1, "Please specify a directory with args.output"
+                    os.makedirs(args.output)
+                    out_filename = os.path.join(args.output, os.path.basename(path))
+                    #out_filename = args.output
+                visualized_output.save(out_filename)
+            else:
+                cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
+                cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1])
+                if cv2.waitKey(0) == 27:
+                    break  # esc to quit
+    elif args.webcam:
+        assert args.input is None, "Cannot have both --input and --webcam!"
+        assert args.output is None, "output not yet supported with --webcam!"
+        cam = cv2.VideoCapture(0)
+        for vis in tqdm.tqdm(demo.run_on_video(cam)):
+            cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
+            cv2.imshow(WINDOW_NAME, vis)
+            if cv2.waitKey(1) == 27:
+                break  # esc to quit
+        cam.release()
+        cv2.destroyAllWindows()
+    elif args.video_input:
+        video = cv2.VideoCapture(args.video_input)
+        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        frames_per_second = video.get(cv2.CAP_PROP_FPS)
+        num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+        basename = os.path.basename(args.video_input)
+        codec, file_ext = (
+            ("x264", ".mkv") if test_opencv_video_format("x264", ".mkv") else ("mp4v", ".mp4")
+        )
+        if codec == ".mp4v":
+            warnings.warn("x264 codec not available, switching to mp4v")
+        if args.output:
+            if os.path.isdir(args.output):
+                output_fname = os.path.join(args.output, basename)
+                output_fname = os.path.splitext(output_fname)[0] + file_ext
+            else:
+                output_fname = args.output
+            assert not os.path.isfile(output_fname), output_fname
+            output_file = cv2.VideoWriter(
+                filename=output_fname,
+                # some installation of opencv may not support x264 (due to its license),
+                # you can try other format (e.g. MPEG)
+                fourcc=cv2.VideoWriter_fourcc(*codec),
+                fps=float(frames_per_second),
+                frameSize=(width, height),
+                isColor=True,
+            )
+        assert os.path.isfile(args.video_input)
+        for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames):
+            if args.output:
+                output_file.write(vis_frame)
+            else:
+                cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
+                cv2.imshow(basename, vis_frame)
+                if cv2.waitKey(1) == 27:
+                    break  # esc to quit
+        video.release()
+        if args.output:
+            output_file.release()
+        else:
+            cv2.destroyAllWindows()
diff --git a/demo/predictor.py b/demo/predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b7ebd3f846850172c1f560f8492d51e5667f76d
--- /dev/null
+++ b/demo/predictor.py
@@ -0,0 +1,220 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import atexit
+import bisect
+import multiprocessing as mp
+from collections import deque
+import cv2
+import torch
+
+from detectron2.data import MetadataCatalog
+from detectron2.engine.defaults import DefaultPredictor
+from detectron2.utils.video_visualizer import VideoVisualizer
+from detectron2.utils.visualizer import ColorMode, Visualizer
+
+
+class VisualizationDemo(object):
+    def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
+        """
+        Args:
+            cfg (CfgNode):
+            instance_mode (ColorMode):
+            parallel (bool): whether to run the model in different processes from visualization.
+                Useful since the visualization logic can be slow.
+        """
+        self.metadata = MetadataCatalog.get(
+            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
+        )
+        self.cpu_device = torch.device("cpu")
+        self.instance_mode = instance_mode
+
+        self.parallel = parallel
+        if parallel:
+            num_gpu = torch.cuda.device_count()
+            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
+        else:
+            self.predictor = DefaultPredictor(cfg)
+
+    def run_on_image(self, image):
+        """
+        Args:
+            image (np.ndarray): an image of shape (H, W, C) (in BGR order).
+                This is the format used by OpenCV.
+
+        Returns:
+            predictions (dict): the output of the model.
+            vis_output (VisImage): the visualized image output.
+        """
+        vis_output = None
+        predictions = self.predictor(image)
+        # Convert image from OpenCV BGR format to Matplotlib RGB format.
+        image = image[:, :, ::-1]
+        visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode)
+        if "panoptic_seg" in predictions:
+            panoptic_seg, segments_info = predictions["panoptic_seg"]
+            vis_output = visualizer.draw_panoptic_seg_predictions(
+                panoptic_seg.to(self.cpu_device), segments_info
+            )
+        else:
+            if "sem_seg" in predictions:
+                vis_output = visualizer.draw_sem_seg(
+                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
+                )
+            if "instances" in predictions:
+                instances = predictions["instances"].to(self.cpu_device)
+                vis_output = visualizer.draw_instance_predictions(predictions=instances)
+
+        return predictions, vis_output
+
+    def _frame_from_video(self, video):
+        while video.isOpened():
+            success, frame = video.read()
+            if success:
+                yield frame
+            else:
+                break
+
+    def run_on_video(self, video):
+        """
+        Visualizes predictions on frames of the input video.
+
+        Args:
+            video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
+                either a webcam or a video file.
+
+        Yields:
+            ndarray: BGR visualizations of each video frame.
+        """
+        video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)
+
+        def process_predictions(frame, predictions):
+            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            if "panoptic_seg" in predictions:
+                panoptic_seg, segments_info = predictions["panoptic_seg"]
+                vis_frame = video_visualizer.draw_panoptic_seg_predictions(
+                    frame, panoptic_seg.to(self.cpu_device), segments_info
+                )
+            elif "instances" in predictions:
+                predictions = predictions["instances"].to(self.cpu_device)
+                vis_frame = video_visualizer.draw_instance_predictions(frame, predictions)
+            elif "sem_seg" in predictions:
+                vis_frame = video_visualizer.draw_sem_seg(
+                    frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
+                )
+
+            # Converts Matplotlib RGB format to OpenCV BGR format
+            vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
+            return vis_frame
+
+        frame_gen = self._frame_from_video(video)
+        if self.parallel:
+            buffer_size = self.predictor.default_buffer_size
+
+            frame_data = deque()
+
+            for cnt, frame in enumerate(frame_gen):
+                frame_data.append(frame)
+                self.predictor.put(frame)
+
+                if cnt >= buffer_size:
+                    frame = frame_data.popleft()
+                    predictions = self.predictor.get()
+                    yield process_predictions(frame, predictions)
+
+            while len(frame_data):
+                frame = frame_data.popleft()
+                predictions = self.predictor.get()
+                yield process_predictions(frame, predictions)
+        else:
+            for frame in frame_gen:
+                yield process_predictions(frame, self.predictor(frame))
+
+
+class AsyncPredictor:
+    """
+    A predictor that runs the model asynchronously, possibly on >1 GPUs.
+    Because rendering the visualization takes considerably amount of time,
+    this helps improve throughput a little bit when rendering videos.
+    """
+
+    class _StopToken:
+        pass
+
+    class _PredictWorker(mp.Process):
+        def __init__(self, cfg, task_queue, result_queue):
+            self.cfg = cfg
+            self.task_queue = task_queue
+            self.result_queue = result_queue
+            super().__init__()
+
+        def run(self):
+            predictor = DefaultPredictor(self.cfg)
+
+            while True:
+                task = self.task_queue.get()
+                if isinstance(task, AsyncPredictor._StopToken):
+                    break
+                idx, data = task
+                result = predictor(data)
+                self.result_queue.put((idx, result))
+
+    def __init__(self, cfg, num_gpus: int = 1):
+        """
+        Args:
+            cfg (CfgNode):
+            num_gpus (int): if 0, will run on CPU
+        """
+        num_workers = max(num_gpus, 1)
+        self.task_queue = mp.Queue(maxsize=num_workers * 3)
+        self.result_queue = mp.Queue(maxsize=num_workers * 3)
+        self.procs = []
+        for gpuid in range(max(num_gpus, 1)):
+            cfg = cfg.clone()
+            cfg.defrost()
+            cfg.MODEL.DEVICE = "cuda:{}".format(gpuid) if num_gpus > 0 else "cpu"
+            self.procs.append(
+                AsyncPredictor._PredictWorker(cfg, self.task_queue, self.result_queue)
+            )
+
+        self.put_idx = 0
+        self.get_idx = 0
+        self.result_rank = []
+        self.result_data = []
+
+        for p in self.procs:
+            p.start()
+        atexit.register(self.shutdown)
+
+    def put(self, image):
+        self.put_idx += 1
+        self.task_queue.put((self.put_idx, image))
+
+    def get(self):
+        self.get_idx += 1  # the index needed for this request
+        if len(self.result_rank) and self.result_rank[0] == self.get_idx:
+            res = self.result_data[0]
+            del self.result_data[0], self.result_rank[0]
+            return res
+
+        while True:
+            # make sure the results are returned in the correct order
+            idx, res = self.result_queue.get()
+            if idx == self.get_idx:
+                return res
+            insert = bisect.bisect(self.result_rank, idx)
+            self.result_rank.insert(insert, idx)
+            self.result_data.insert(insert, res)
+
+    def __len__(self):
+        return self.put_idx - self.get_idx
+
+    def __call__(self, image):
+        self.put(image)
+        return self.get()
+
+    def shutdown(self):
+        for _ in self.procs:
+            self.task_queue.put(AsyncPredictor._StopToken())
+
+    @property
+    def default_buffer_size(self):
+        return len(self.procs) * 5
diff --git a/demo/sample_imgs/000000008844.jpg b/demo/sample_imgs/000000008844.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d117937ec29e62d694bd6d2dc70eb41d9a92326c
Binary files /dev/null and b/demo/sample_imgs/000000008844.jpg differ
diff --git a/demo/sample_imgs/000000018737.jpg b/demo/sample_imgs/000000018737.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..340c394ff1398a1496c81855ff1128bbf8071842
Binary files /dev/null and b/demo/sample_imgs/000000018737.jpg differ
diff --git a/demo/sample_imgs/000000126137.jpg b/demo/sample_imgs/000000126137.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..83c736e918992a085819d8a70103159b80c90998
Binary files /dev/null and b/demo/sample_imgs/000000126137.jpg differ
diff --git a/demo/sample_imgs/000000131444.jpg b/demo/sample_imgs/000000131444.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d4f63ec0b4ce746eb0eba168eceacb0032d1aac3
Binary files /dev/null and b/demo/sample_imgs/000000131444.jpg differ
diff --git a/demo/sample_imgs/000000132408.jpg b/demo/sample_imgs/000000132408.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..dac4b04c9fefe52341456fe400d56a3d6ccb367b
Binary files /dev/null and b/demo/sample_imgs/000000132408.jpg differ
diff --git a/demo/sample_imgs/000000157365.jpg b/demo/sample_imgs/000000157365.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..10e719bef58161855d280d7a0034491d12a382f0
Binary files /dev/null and b/demo/sample_imgs/000000157365.jpg differ
diff --git a/demo/sample_imgs/000000176037.jpg b/demo/sample_imgs/000000176037.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0abf887fb73e89869f761c8046c9227bc5bb298a
Binary files /dev/null and b/demo/sample_imgs/000000176037.jpg differ
diff --git a/demo/sample_imgs/000000224200.jpg b/demo/sample_imgs/000000224200.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fc5b3de83c8b3861ec92ecef6263249ad7b11473
Binary files /dev/null and b/demo/sample_imgs/000000224200.jpg differ
diff --git a/demo/sample_imgs/000000244019.jpg b/demo/sample_imgs/000000244019.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..54927eb0e93cf3b5ce55a33aa64f5dd36ebd1008
Binary files /dev/null and b/demo/sample_imgs/000000244019.jpg differ
diff --git a/demo/sample_imgs/000000252776.jpg b/demo/sample_imgs/000000252776.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8b9dd0ef433ad232164a0c92c3414f49dffc6fec
Binary files /dev/null and b/demo/sample_imgs/000000252776.jpg differ
diff --git a/demo/sample_imgs/000000286849.jpg b/demo/sample_imgs/000000286849.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..12d9e147d759e2aeeb4e3903bc129157f71ac642
Binary files /dev/null and b/demo/sample_imgs/000000286849.jpg differ
diff --git a/demo/sample_imgs/000000292997.jpg b/demo/sample_imgs/000000292997.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4d56af9492d02539b68805cb80c075d6efad63e3
Binary files /dev/null and b/demo/sample_imgs/000000292997.jpg differ
diff --git a/demo/sample_imgs/000000321214.jpg b/demo/sample_imgs/000000321214.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..427cdf048ac5bb950bdf808e791e6a52477169b4
Binary files /dev/null and b/demo/sample_imgs/000000321214.jpg differ
diff --git a/demo/sample_imgs/000000344909.jpg b/demo/sample_imgs/000000344909.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6323a743693f7d87c620888e5587edbf545f0f76
Binary files /dev/null and b/demo/sample_imgs/000000344909.jpg differ
diff --git a/demo/sample_imgs/000000360661.jpg b/demo/sample_imgs/000000360661.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c90c058740466131082aed6fee6964cda04a4711
Binary files /dev/null and b/demo/sample_imgs/000000360661.jpg differ
diff --git a/demo/sample_imgs/000000396903.jpg b/demo/sample_imgs/000000396903.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f10456ff9d60df5821d6427e672f9ffe51480d9b
Binary files /dev/null and b/demo/sample_imgs/000000396903.jpg differ
diff --git a/demo/sample_imgs/000000404922.jpg b/demo/sample_imgs/000000404922.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6595f7b259bbfeb5de8d8aa172254db8a0e56645
Binary files /dev/null and b/demo/sample_imgs/000000404922.jpg differ
diff --git a/demo/sample_imgs/000000442836.jpg b/demo/sample_imgs/000000442836.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3e24da5924c518e34bc7c56dd7dc1404d58463b3
Binary files /dev/null and b/demo/sample_imgs/000000442836.jpg differ
diff --git a/demo/sample_imgs/000000464144.jpg b/demo/sample_imgs/000000464144.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b59f7e4b7fd684e7d2b47b3ac9036fb592a5457d
Binary files /dev/null and b/demo/sample_imgs/000000464144.jpg differ
diff --git a/demo/sample_imgs/000000482477.jpg b/demo/sample_imgs/000000482477.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..98c5277b190faa54f12e85df99768bef255abfff
Binary files /dev/null and b/demo/sample_imgs/000000482477.jpg differ
diff --git a/demo/sample_imgs/000000495054.jpg b/demo/sample_imgs/000000495054.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..53ae52e0be22fbb426eeec63d14e85c5b2b9fab2
Binary files /dev/null and b/demo/sample_imgs/000000495054.jpg differ
diff --git a/demo/sample_imgs/000000558073.jpg b/demo/sample_imgs/000000558073.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6d0a14a1b5b8765c0df91fde476f6a1d488a6b05
Binary files /dev/null and b/demo/sample_imgs/000000558073.jpg differ
diff --git a/output_3x_transfiner_r101_deform.pth b/output_3x_transfiner_r101_deform.pth
new file mode 100644
index 0000000000000000000000000000000000000000..78a66ab48160fc823ea4daaa93d19261c77f1ec9
--- /dev/null
+++ b/output_3x_transfiner_r101_deform.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d5fefa123e70f379af846c979139e53e89b8c661b7285dd485e640fa86faead
+size 294703133
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed14d209773f90b011767ba0a2ea47f9cfc497d4
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,8 @@
+pyyaml==5.1
+torch==1.7.1
+torchvision==0.8.2
+opencv-python-headless
+scikit-image
+kornia==0.5.11
+
+

Name	lr sched	train time (s/iter)	inference time (s/im)	train mem (GB)	box AP	mask AP	kp. AP	model id	download
Faster R-CNN	1x	0.219	0.038	3.1	36.9			137781054	model \| metrics
Keypoint R-CNN	1x	0.313	0.071	5.0	53.1		64.2	137781195	model \| metrics
Mask R-CNN	1x	0.273	0.043	3.4	37.8	34.9		137781281	model \| metrics