diff --git a/README.md b/README.md index 8e61cf66f29dca840183d4965dd57ebd435eb163..72c85e42a4751f3501d4e36c838a283d500c1120 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ --- title: Transfiner -emoji: 🌍 -colorFrom: gray -colorTo: gray +emoji: 📊 +colorFrom: red +colorTo: green sdk: gradio -sdk_version: 3.0.20 +sdk_version: 2.9.3 app_file: app.py pinned: false license: apache-2.0 --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..d9a5d4bdaf8a89f05e35346cc34c3159fa3919b7 --- /dev/null +++ b/app.py @@ -0,0 +1,84 @@ +#try: +# import detectron2 +#except: +import os +os.system('pip install git+https://github.com/SysCV/transfiner.git') + +from matplotlib.pyplot import axis +import gradio as gr +import requests +import numpy as np +from torch import nn +import requests + +import torch + +from detectron2 import model_zoo +from detectron2.engine import DefaultPredictor +from detectron2.config import get_cfg +from detectron2.utils.visualizer import Visualizer +from detectron2.data import MetadataCatalog + + +model_name='./configs/transfiner/mask_rcnn_R_101_FPN_3x_deform.yaml' + + +cfg = get_cfg() +# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library +cfg.merge_from_file(model_name) +cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model +cfg.VIS_PERIOD = 100 +# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as w ell +#cfg.MODEL.WEIGHTS = './output_3x_transfiner_r50.pth' +cfg.MODEL.WEIGHTS = './output_3x_transfiner_r101_deform.pth' + +if not torch.cuda.is_available(): + cfg.MODEL.DEVICE='cpu' + +predictor = DefaultPredictor(cfg) + + +def inference(image): + width, height = image.size + if width > 1300: + ratio = float(height) / float(width) + width = 1300 + height = int(ratio * width) + image = image.resize((width, height)) + + img = np.asarray(image) + + #img = np.array(image) + outputs = predictor(img) + + v = Visualizer(img, MetadataCatalog.get(cfg.DATASETS.TRAIN[0])) + out = v.draw_instance_predictions(outputs["instances"].to("cpu")) + + return out.get_image() + + + +title = "Mask Transfiner [CVPR, 2022]" +description = "Demo for Mask Transfiner for High-Quality Instance Segmentation, CVPR 2022 based on R50-FPN. To use it, simply upload your image, or click one of the examples to load them. Note that it runs in the CPU environment provided by Hugging Face so the processing speed may be slow." +article = "

Mask Transfiner for High-Quality Instance Segmentation, CVPR 2022 | Mask Transfiner Github Code

" + +gr.Interface( + inference, + [gr.inputs.Image(type="pil", label="Input")], + gr.outputs.Image(type="numpy", label="Output"), + title=title, + description=description, + article=article, + examples=[ + ["demo/sample_imgs/000000131444.jpg"], + ["demo/sample_imgs/000000157365.jpg"], + ["demo/sample_imgs/000000176037.jpg"], + ["demo/sample_imgs/000000018737.jpg"], + ["demo/sample_imgs/000000224200.jpg"], + ["demo/sample_imgs/000000558073.jpg"], + ["demo/sample_imgs/000000404922.jpg"], + ["demo/sample_imgs/000000252776.jpg"], + ["demo/sample_imgs/000000482477.jpg"], + ["demo/sample_imgs/000000344909.jpg"] + ]).launch() + diff --git a/configs/Base-RCNN-C4.yaml b/configs/Base-RCNN-C4.yaml new file mode 100755 index 0000000000000000000000000000000000000000..fbf34a0ea57a587e09997edd94c4012d69d0b6ad --- /dev/null +++ b/configs/Base-RCNN-C4.yaml @@ -0,0 +1,18 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + RPN: + PRE_NMS_TOPK_TEST: 6000 + POST_NMS_TOPK_TEST: 1000 + ROI_HEADS: + NAME: "Res5ROIHeads" +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.02 + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +VERSION: 2 diff --git a/configs/Base-RCNN-DilatedC5.yaml b/configs/Base-RCNN-DilatedC5.yaml new file mode 100755 index 0000000000000000000000000000000000000000..c0d6d16bdaf532f09e4976f0aa240a49e748da27 --- /dev/null +++ b/configs/Base-RCNN-DilatedC5.yaml @@ -0,0 +1,31 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + RESNETS: + OUT_FEATURES: ["res5"] + RES5_DILATION: 2 + RPN: + IN_FEATURES: ["res5"] + PRE_NMS_TOPK_TEST: 6000 + POST_NMS_TOPK_TEST: 1000 + ROI_HEADS: + NAME: "StandardROIHeads" + IN_FEATURES: ["res5"] + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_FC: 2 + POOLER_RESOLUTION: 7 + ROI_MASK_HEAD: + NAME: "MaskRCNNConvUpsampleHead" + NUM_CONV: 4 + POOLER_RESOLUTION: 14 +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.02 + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +VERSION: 2 diff --git a/configs/Base-RCNN-FPN.yaml b/configs/Base-RCNN-FPN.yaml new file mode 100755 index 0000000000000000000000000000000000000000..d1c7c791c40dbc5eec884e56c1bf18f422f52f1d --- /dev/null +++ b/configs/Base-RCNN-FPN.yaml @@ -0,0 +1,43 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + BACKBONE: + NAME: "build_resnet_fpn_backbone" + RESNETS: + OUT_FEATURES: ["res2", "res3", "res4", "res5"] + FPN: + IN_FEATURES: ["res2", "res3", "res4", "res5"] + ANCHOR_GENERATOR: + SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map + ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) + RPN: + IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] + PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level + PRE_NMS_TOPK_TEST: 1000 # Per FPN level + # Detectron1 uses 2000 proposals per-batch, + # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) + # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. + POST_NMS_TOPK_TRAIN: 1000 + POST_NMS_TOPK_TEST: 1000 + ROI_HEADS: + NAME: "StandardROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_FC: 2 + POOLER_RESOLUTION: 7 + ROI_MASK_HEAD: + NAME: "MaskRCNNConvUpsampleHead" + NUM_CONV: 4 + POOLER_RESOLUTION: 14 +DATASETS: + TRAIN: ("coco_2017_train",) + #TEST: ("coco_2017_val",) + TEST: ("coco_2017_test-dev",) +SOLVER: + IMS_PER_BATCH: 16 #16 + BASE_LR: 0.02 + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +VERSION: 2 diff --git a/configs/Base-RetinaNet.yaml b/configs/Base-RetinaNet.yaml new file mode 100755 index 0000000000000000000000000000000000000000..8b45b982bbf84b34d2a6a172ab0a946b1029f7c8 --- /dev/null +++ b/configs/Base-RetinaNet.yaml @@ -0,0 +1,25 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + BACKBONE: + NAME: "build_retinanet_resnet_fpn_backbone" + RESNETS: + OUT_FEATURES: ["res3", "res4", "res5"] + ANCHOR_GENERATOR: + SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"] + FPN: + IN_FEATURES: ["res3", "res4", "res5"] + RETINANET: + IOU_THRESHOLDS: [0.4, 0.5] + IOU_LABELS: [0, -1, 1] + SMOOTH_L1_LOSS_BETA: 0.0 +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +VERSION: 2 diff --git a/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml b/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml new file mode 100755 index 0000000000000000000000000000000000000000..1a7aaeb961581ed9492c4cfe5a69a1eb60495b3e --- /dev/null +++ b/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml @@ -0,0 +1,27 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + # For better, more stable performance initialize from COCO + WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" + MASK_ON: True + ROI_HEADS: + NUM_CLASSES: 8 +# This is similar to the setting used in Mask R-CNN paper, Appendix A +# But there are some differences, e.g., we did not initialize the output +# layer using the corresponding classes from COCO +INPUT: + MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024) + MIN_SIZE_TRAIN_SAMPLING: "choice" + MIN_SIZE_TEST: 1024 + MAX_SIZE_TRAIN: 2048 + MAX_SIZE_TEST: 2048 +DATASETS: + TRAIN: ("cityscapes_fine_instance_seg_train",) + TEST: ("cityscapes_fine_instance_seg_val",) +SOLVER: + BASE_LR: 0.01 + STEPS: (18000,) + MAX_ITER: 24000 + IMS_PER_BATCH: 8 +TEST: + EVAL_PERIOD: 8000 diff --git a/configs/Cityscapes/mask_rcnn_R_50_FPN_4gpu.yaml b/configs/Cityscapes/mask_rcnn_R_50_FPN_4gpu.yaml new file mode 100755 index 0000000000000000000000000000000000000000..4b4f2e6545e6920f8d3a84f1c517d79679a848c0 --- /dev/null +++ b/configs/Cityscapes/mask_rcnn_R_50_FPN_4gpu.yaml @@ -0,0 +1,27 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + # For better, more stable performance initialize from COCO + WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" + MASK_ON: True + ROI_HEADS: + NUM_CLASSES: 8 +# This is similar to the setting used in Mask R-CNN paper, Appendix A +# But there are some differences, e.g., we did not initialize the output +# layer using the corresponding classes from COCO +INPUT: + MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024) + MIN_SIZE_TRAIN_SAMPLING: "choice" + MIN_SIZE_TEST: 1024 + MAX_SIZE_TRAIN: 2048 + MAX_SIZE_TEST: 2048 +DATASETS: + TRAIN: ("cityscapes_fine_instance_seg_train",) + TEST: ("cityscapes_fine_instance_seg_val",) +SOLVER: + BASE_LR: 0.005 + STEPS: (36000,) + MAX_ITER: 48000 + IMS_PER_BATCH: 4 +TEST: + EVAL_PERIOD: 48000 diff --git a/configs/Detectron1-Comparisons/README.md b/configs/Detectron1-Comparisons/README.md new file mode 100755 index 0000000000000000000000000000000000000000..924fd00af642ddf1a4ff4c4f5947f676134eb7de --- /dev/null +++ b/configs/Detectron1-Comparisons/README.md @@ -0,0 +1,84 @@ + +Detectron2 model zoo's experimental settings and a few implementation details are different from Detectron. + +The differences in implementation details are shared in +[Compatibility with Other Libraries](../../docs/notes/compatibility.md). + +The differences in model zoo's experimental settings include: +* Use scale augmentation during training. This improves AP with lower training cost. +* Use L1 loss instead of smooth L1 loss for simplicity. This sometimes improves box AP but may + affect other AP. +* Use `POOLER_SAMPLING_RATIO=0` instead of 2. This does not significantly affect AP. +* Use `ROIAlignV2`. This does not significantly affect AP. + +In this directory, we provide a few configs that __do not__ have the above changes. +They mimic Detectron's behavior as close as possible, +and provide a fair comparison of accuracy and speed against Detectron. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
kp.
AP
model iddownload
Faster R-CNN1x0.2190.0383.136.9137781054model | metrics
Keypoint R-CNN1x0.3130.0715.053.164.2137781195model | metrics
Mask R-CNN1x0.2730.0433.437.834.9137781281model | metrics
+ +## Comparisons: + +* Faster R-CNN: Detectron's AP is 36.7, similar to ours. +* Keypoint R-CNN: Detectron's AP is box 53.6, keypoint 64.2. Fixing a Detectron's + [bug](https://github.com/facebookresearch/Detectron/issues/459) lead to a drop in box AP, and can be + compensated back by some parameter tuning. +* Mask R-CNN: Detectron's AP is box 37.7, mask 33.9. We're 1 AP better in mask AP, due to more correct implementation. + See [this article](https://ppwwyyxx.com/blog/2021/Where-are-Pixels/) for details. + +For speed comparison, see [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html). diff --git a/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml b/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml new file mode 100755 index 0000000000000000000000000000000000000000..6ce77f137fa2c4e5254a62b58c18b8b76096f2aa --- /dev/null +++ b/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml @@ -0,0 +1,17 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 + # Detectron1 uses smooth L1 loss with some magic beta values. + # The defaults are changed to L1 loss in Detectron2. + RPN: + SMOOTH_L1_BETA: 0.1111 + ROI_BOX_HEAD: + SMOOTH_L1_BETA: 1.0 + POOLER_SAMPLING_RATIO: 2 + POOLER_TYPE: "ROIAlign" +INPUT: + # no scale augmentation + MIN_SIZE_TRAIN: (800, ) diff --git a/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml b/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml new file mode 100755 index 0000000000000000000000000000000000000000..aacf868ba5290c752031c130a2081af48afc0808 --- /dev/null +++ b/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,27 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + KEYPOINT_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + NUM_CLASSES: 1 + ROI_KEYPOINT_HEAD: + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + POOLER_TYPE: "ROIAlign" + # Detectron1 uses smooth L1 loss with some magic beta values. + # The defaults are changed to L1 loss in Detectron2. + ROI_BOX_HEAD: + SMOOTH_L1_BETA: 1.0 + POOLER_SAMPLING_RATIO: 2 + POOLER_TYPE: "ROIAlign" + RPN: + SMOOTH_L1_BETA: 0.1111 + # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2 + # 1000 proposals per-image is found to hurt box AP. + # Therefore we increase it to 1500 per-image. + POST_NMS_TOPK_TRAIN: 1500 +DATASETS: + TRAIN: ("keypoints_coco_2017_train",) + TEST: ("keypoints_coco_2017_val",) diff --git a/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml b/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml new file mode 100755 index 0000000000000000000000000000000000000000..4ea86a8d8e2cd3e51cbc7311b0d00710c07d01f6 --- /dev/null +++ b/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml @@ -0,0 +1,20 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + # Detectron1 uses smooth L1 loss with some magic beta values. + # The defaults are changed to L1 loss in Detectron2. + RPN: + SMOOTH_L1_BETA: 0.1111 + ROI_BOX_HEAD: + SMOOTH_L1_BETA: 1.0 + POOLER_SAMPLING_RATIO: 2 + POOLER_TYPE: "ROIAlign" + ROI_MASK_HEAD: + POOLER_SAMPLING_RATIO: 2 + POOLER_TYPE: "ROIAlign" +INPUT: + # no scale augmentation + MIN_SIZE_TRAIN: (800, ) diff --git a/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml new file mode 100755 index 0000000000000000000000000000000000000000..f0c3a1bbc0a09e1384de522f30c443ba1e36fafa --- /dev/null +++ b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml @@ -0,0 +1,19 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 + ROI_HEADS: + NUM_CLASSES: 1230 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v0.5_train",) + TEST: ("lvis_v0.5_val",) +TEST: + DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x_finetune.yaml b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x_finetune.yaml new file mode 100755 index 0000000000000000000000000000000000000000..de110d26e773c35504a96d75724545777d2332ee --- /dev/null +++ b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x_finetune.yaml @@ -0,0 +1,19 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "./model_final_824ab5.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 + ROI_HEADS: + NUM_CLASSES: 1230 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v0.5_train",) + TEST: ("lvis_v0.5_val",) +TEST: + DETECTIONS_PER_IMAGE: 150 #300 # LVIS allows up to 300 +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml new file mode 100755 index 0000000000000000000000000000000000000000..c474187bdf2db5c9662c8b7083ba481ded378fbd --- /dev/null +++ b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,19 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + NUM_CLASSES: 1230 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v0.5_train",) + TEST: ("lvis_v0.5_val",) +TEST: + DETECTIONS_PER_IMAGE: 150 # LVIS allows up to 300 +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml new file mode 100755 index 0000000000000000000000000000000000000000..c8b822c6c006ba642f4caf9b55e7983f6797427a --- /dev/null +++ b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml @@ -0,0 +1,23 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" + PIXEL_STD: [57.375, 57.120, 58.395] + MASK_ON: True + RESNETS: + STRIDE_IN_1X1: False # this is a C2 model + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + DEPTH: 101 + ROI_HEADS: + NUM_CLASSES: 1230 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v0.5_train",) + TEST: ("lvis_v0.5_val",) +TEST: + DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml new file mode 100755 index 0000000000000000000000000000000000000000..ca4dd97144561276ecaabbb6c254e3a7737ac157 --- /dev/null +++ b/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml @@ -0,0 +1,22 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 + ROI_HEADS: + NUM_CLASSES: 1203 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v1_train",) + TEST: ("lvis_v1_val",) +TEST: + DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 +SOLVER: + STEPS: (120000, 160000) + MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml new file mode 100755 index 0000000000000000000000000000000000000000..f313295ee5f0d553d394ce2efe003810c79af47d --- /dev/null +++ b/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,22 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + NUM_CLASSES: 1203 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v1_train",) + TEST: ("lvis_v1_val",) +TEST: + DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 +SOLVER: + STEPS: (120000, 160000) + MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml new file mode 100755 index 0000000000000000000000000000000000000000..f6528f7c31c8cfbf139c14fd0cae598592d8e898 --- /dev/null +++ b/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml @@ -0,0 +1,26 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" + PIXEL_STD: [57.375, 57.120, 58.395] + MASK_ON: True + RESNETS: + STRIDE_IN_1X1: False # this is a C2 model + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + DEPTH: 101 + ROI_HEADS: + NUM_CLASSES: 1203 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v1_train",) + TEST: ("lvis_v1_val",) +SOLVER: + STEPS: (120000, 160000) + MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs +TEST: + DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml b/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml new file mode 100755 index 0000000000000000000000000000000000000000..abb33b618932e94b66239945ac892f4c84a6e8f8 --- /dev/null +++ b/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,12 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + NAME: CascadeROIHeads + ROI_BOX_HEAD: + CLS_AGNOSTIC_BBOX_REG: True + RPN: + POST_NMS_TOPK_TRAIN: 2000 diff --git a/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml b/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml new file mode 100755 index 0000000000000000000000000000000000000000..e2201ad5c46ded91ccfa47b7698a521625c5e447 --- /dev/null +++ b/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml @@ -0,0 +1,15 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + NAME: CascadeROIHeads + ROI_BOX_HEAD: + CLS_AGNOSTIC_BBOX_REG: True + RPN: + POST_NMS_TOPK_TRAIN: 2000 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml b/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml new file mode 100755 index 0000000000000000000000000000000000000000..fc117f6b5e3e51558ec2f01b73c5365622e5ce25 --- /dev/null +++ b/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml @@ -0,0 +1,36 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + MASK_ON: True + WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k" + RESNETS: + STRIDE_IN_1X1: False # this is a C2 model + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + DEPTH: 152 + DEFORM_ON_PER_STAGE: [False, True, True, True] + ROI_HEADS: + NAME: "CascadeROIHeads" + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_CONV: 4 + NUM_FC: 1 + NORM: "GN" + CLS_AGNOSTIC_BBOX_REG: True + ROI_MASK_HEAD: + NUM_CONV: 8 + NORM: "GN" + RPN: + POST_NMS_TOPK_TRAIN: 2000 +SOLVER: + IMS_PER_BATCH: 128 + STEPS: (35000, 45000) + MAX_ITER: 50000 + BASE_LR: 0.16 +INPUT: + MIN_SIZE_TRAIN: (640, 864) + MIN_SIZE_TRAIN_SAMPLING: "range" + MAX_SIZE_TRAIN: 1440 + CROP: + ENABLED: True +TEST: + EVAL_PERIOD: 2500 diff --git a/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml b/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml new file mode 100755 index 0000000000000000000000000000000000000000..4c3b767ff473bbab7225cc8a4a92608543d78246 --- /dev/null +++ b/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml @@ -0,0 +1,10 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + ROI_BOX_HEAD: + CLS_AGNOSTIC_BBOX_REG: True + ROI_MASK_HEAD: + CLS_AGNOSTIC_MASK: True diff --git a/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml b/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml new file mode 100755 index 0000000000000000000000000000000000000000..04ff988d073ef9169ee4ca2cbce0d6f030c15232 --- /dev/null +++ b/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml @@ -0,0 +1,8 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 + DEFORM_MODULATED: False diff --git a/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml b/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml new file mode 100755 index 0000000000000000000000000000000000000000..68c0ca58d7df97ca728c339da0ca9828fe6be318 --- /dev/null +++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml @@ -0,0 +1,11 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 + DEFORM_MODULATED: False +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5_4gpu.yaml b/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5_4gpu.yaml new file mode 100755 index 0000000000000000000000000000000000000000..699bea11dfa413c0718681752963cd97ab29b52c --- /dev/null +++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5_4gpu.yaml @@ -0,0 +1,11 @@ +_BASE_: "../Base-RCNN-FPN-4gpu.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 + DEFORM_MODULATED: False +SOLVER: + STEPS: (420000, 500000) # (210000, 250000) + MAX_ITER: 540000 # 270000 diff --git a/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml b/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml new file mode 100755 index 0000000000000000000000000000000000000000..74d274e5a529b5a8afe186940868f9d48c6112b3 --- /dev/null +++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml @@ -0,0 +1,21 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN" + MASK_ON: True + RESNETS: + DEPTH: 50 + NORM: "GN" + STRIDE_IN_1X1: False + FPN: + NORM: "GN" + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_CONV: 4 + NUM_FC: 1 + NORM: "GN" + ROI_MASK_HEAD: + NORM: "GN" +SOLVER: + # 3x schedule + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml b/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml new file mode 100755 index 0000000000000000000000000000000000000000..11ebb076ba529f26c71a0d972e96ca4c2d6a830b --- /dev/null +++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml @@ -0,0 +1,24 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + NORM: "SyncBN" + STRIDE_IN_1X1: True + FPN: + NORM: "SyncBN" + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_CONV: 4 + NUM_FC: 1 + NORM: "SyncBN" + ROI_MASK_HEAD: + NORM: "SyncBN" +SOLVER: + # 3x schedule + STEPS: (210000, 250000) + MAX_ITER: 270000 +TEST: + PRECISE_BN: + ENABLED: True diff --git a/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py b/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py new file mode 100755 index 0000000000000000000000000000000000000000..0f2464be744c083985898a25f9e71d00104f689d --- /dev/null +++ b/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py @@ -0,0 +1,151 @@ +# An example config to train a mmdetection model using detectron2. + +from ..common.data.coco import dataloader +from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier +from ..common.optim import SGD as optimizer +from ..common.train import train + +from detectron2.modeling.mmdet_wrapper import MMDetDetector +from detectron2.config import LazyCall as L + +model = L(MMDetDetector)( + detector=dict( + type="MaskRCNN", + pretrained="torchvision://resnet50", + backbone=dict( + type="ResNet", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type="BN", requires_grad=True), + norm_eval=True, + style="pytorch", + ), + neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5), + rpn_head=dict( + type="RPNHead", + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type="AnchorGenerator", + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64], + ), + bbox_coder=dict( + type="DeltaXYWHBBoxCoder", + target_means=[0.0, 0.0, 0.0, 0.0], + target_stds=[1.0, 1.0, 1.0, 1.0], + ), + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type="L1Loss", loss_weight=1.0), + ), + roi_head=dict( + type="StandardRoIHead", + bbox_roi_extractor=dict( + type="SingleRoIExtractor", + roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + ), + bbox_head=dict( + type="Shared2FCBBoxHead", + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type="DeltaXYWHBBoxCoder", + target_means=[0.0, 0.0, 0.0, 0.0], + target_stds=[0.1, 0.1, 0.2, 0.2], + ), + reg_class_agnostic=False, + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type="L1Loss", loss_weight=1.0), + ), + mask_roi_extractor=dict( + type="SingleRoIExtractor", + roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + ), + mask_head=dict( + type="FCNMaskHead", + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=80, + loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0), + ), + ), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type="MaxIoUAssigner", + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1, + ), + sampler=dict( + type="RandomSampler", + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False, + ), + allowed_border=-1, + pos_weight=-1, + debug=False, + ), + rpn_proposal=dict( + nms_pre=2000, + max_per_img=1000, + nms=dict(type="nms", iou_threshold=0.7), + min_bbox_size=0, + ), + rcnn=dict( + assigner=dict( + type="MaxIoUAssigner", + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=True, + ignore_iof_thr=-1, + ), + sampler=dict( + type="RandomSampler", + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + ), + mask_size=28, + pos_weight=-1, + debug=False, + ), + ), + test_cfg=dict( + rpn=dict( + nms_pre=1000, + max_per_img=1000, + nms=dict(type="nms", iou_threshold=0.7), + min_bbox_size=0, + ), + rcnn=dict( + score_thr=0.05, + nms=dict(type="nms", iou_threshold=0.5), + max_per_img=100, + mask_thr_binary=0.5, + ), + ), + ), + pixel_mean=[123.675, 116.280, 103.530], + pixel_std=[58.395, 57.120, 57.375], +) + +dataloader.train.mapper.image_format = "RGB" # torchvision pretrained model +train.init_checkpoint = None # pretrained model is loaded inside backbone diff --git a/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml b/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml new file mode 100755 index 0000000000000000000000000000000000000000..34016cea3ca9d7fb69ef4fe01d6b47ee8690a13b --- /dev/null +++ b/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml @@ -0,0 +1,26 @@ +# A large PanopticFPN for demo purposes. +# Use GN on backbone to support semantic seg. +# Use Cascade + Deform Conv to improve localization. +_BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml" +MODEL: + WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN" + RESNETS: + DEPTH: 101 + NORM: "GN" + DEFORM_ON_PER_STAGE: [False, True, True, True] + STRIDE_IN_1X1: False + FPN: + NORM: "GN" + ROI_HEADS: + NAME: CascadeROIHeads + ROI_BOX_HEAD: + CLS_AGNOSTIC_BBOX_REG: True + ROI_MASK_HEAD: + NORM: "GN" + RPN: + POST_NMS_TOPK_TRAIN: 2000 +SOLVER: + STEPS: (105000, 125000) + MAX_ITER: 135000 + IMS_PER_BATCH: 32 + BASE_LR: 0.04 diff --git a/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml b/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml new file mode 100755 index 0000000000000000000000000000000000000000..f3400288cde242fcf66eef7f63b5a9165ca663c5 --- /dev/null +++ b/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml @@ -0,0 +1,13 @@ +_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml" +MODEL: + # Train from random initialization. + WEIGHTS: "" + # It makes sense to divide by STD when training from scratch + # But it seems to make no difference on the results and C2's models didn't do this. + # So we keep things consistent with C2. + # PIXEL_STD: [57.375, 57.12, 58.395] + MASK_ON: True + BACKBONE: + FREEZE_AT: 0 +# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 +# to learn what you need for training from scratch. diff --git a/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml b/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml new file mode 100755 index 0000000000000000000000000000000000000000..d90c9ff0ef4573252ee165b4c958ec5f74178176 --- /dev/null +++ b/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml @@ -0,0 +1,19 @@ +_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml" +MODEL: + PIXEL_STD: [57.375, 57.12, 58.395] + WEIGHTS: "" + MASK_ON: True + RESNETS: + STRIDE_IN_1X1: False + BACKBONE: + FREEZE_AT: 0 +SOLVER: + # 9x schedule + IMS_PER_BATCH: 64 # 4x the standard + STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k + MAX_ITER: 202500 # 90k * 9 / 4 + BASE_LR: 0.08 +TEST: + EVAL_PERIOD: 2500 +# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 +# to learn what you need for training from scratch. diff --git a/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml b/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml new file mode 100755 index 0000000000000000000000000000000000000000..60d4e42330e396a1901437df8e17b262d5ad547a --- /dev/null +++ b/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml @@ -0,0 +1,19 @@ +_BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml" +MODEL: + PIXEL_STD: [57.375, 57.12, 58.395] + WEIGHTS: "" + MASK_ON: True + RESNETS: + STRIDE_IN_1X1: False + BACKBONE: + FREEZE_AT: 0 +SOLVER: + # 9x schedule + IMS_PER_BATCH: 64 # 4x the standard + STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k + MAX_ITER: 202500 # 90k * 9 / 4 + BASE_LR: 0.08 +TEST: + EVAL_PERIOD: 2500 +# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 +# to learn what you need for training from scratch. diff --git a/configs/Misc/semantic_R_50_FPN_1x.yaml b/configs/Misc/semantic_R_50_FPN_1x.yaml new file mode 100755 index 0000000000000000000000000000000000000000..ac256e1372770ab3d9ae522c962de0fd0dbceeb5 --- /dev/null +++ b/configs/Misc/semantic_R_50_FPN_1x.yaml @@ -0,0 +1,11 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "SemanticSegmentor" + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 +DATASETS: + TRAIN: ("coco_2017_train_panoptic_stuffonly",) + TEST: ("coco_2017_val_panoptic_stuffonly",) +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) diff --git a/configs/Misc/torchvision_imagenet_R_50.py b/configs/Misc/torchvision_imagenet_R_50.py new file mode 100755 index 0000000000000000000000000000000000000000..0d75305bcf7445b98db84b3d489a1505d2fce5af --- /dev/null +++ b/configs/Misc/torchvision_imagenet_R_50.py @@ -0,0 +1,150 @@ +""" +An example config file to train a ImageNet classifier with detectron2. +Model and dataloader both come from torchvision. +This shows how to use detectron2 as a general engine for any new models and tasks. + +To run, use the following command: + +python tools/lazyconfig_train_net.py --config-file configs/Misc/torchvision_imagenet_R_50.py \ + --num-gpus 8 dataloader.train.dataset.root=/path/to/imagenet/ + +""" + + +import torch +from torch import nn +from torch.nn import functional as F +from omegaconf import OmegaConf +import torchvision +from torchvision.transforms import transforms as T +from torchvision.models.resnet import ResNet, Bottleneck +from fvcore.common.param_scheduler import MultiStepParamScheduler + +from detectron2.solver import WarmupParamScheduler +from detectron2.solver.build import get_default_optimizer_params +from detectron2.config import LazyCall as L +from detectron2.model_zoo import get_config +from detectron2.data.samplers import TrainingSampler, InferenceSampler +from detectron2.evaluation import DatasetEvaluator +from detectron2.utils import comm + + +""" +Note: Here we put reusable code (models, evaluation, data) together with configs just as a +proof-of-concept, to easily demonstrate what's needed to train a ImageNet classifier in detectron2. +Writing code in configs offers extreme flexibility but is often not a good engineering practice. +In practice, you might want to put code in your project and import them instead. +""" + + +def build_data_loader(dataset, batch_size, num_workers, training=True): + return torch.utils.data.DataLoader( + dataset, + sampler=(TrainingSampler if training else InferenceSampler)(len(dataset)), + batch_size=batch_size, + num_workers=num_workers, + pin_memory=True, + ) + + +class ClassificationNet(nn.Module): + def __init__(self, model: nn.Module): + super().__init__() + self.model = model + + @property + def device(self): + return list(self.model.parameters())[0].device + + def forward(self, inputs): + image, label = inputs + pred = self.model(image.to(self.device)) + if self.training: + label = label.to(self.device) + return F.cross_entropy(pred, label) + else: + return pred + + +class ClassificationAcc(DatasetEvaluator): + def reset(self): + self.corr = self.total = 0 + + def process(self, inputs, outputs): + image, label = inputs + self.corr += (outputs.argmax(dim=1).cpu() == label.cpu()).sum().item() + self.total += len(label) + + def evaluate(self): + all_corr_total = comm.all_gather([self.corr, self.total]) + corr = sum(x[0] for x in all_corr_total) + total = sum(x[1] for x in all_corr_total) + return {"accuracy": corr / total} + + +# --- End of code that could be in a project and be imported + + +dataloader = OmegaConf.create() +dataloader.train = L(build_data_loader)( + dataset=L(torchvision.datasets.ImageNet)( + root="/path/to/imagenet", + split="train", + transform=L(T.Compose)( + transforms=[ + L(T.RandomResizedCrop)(size=224), + L(T.RandomHorizontalFlip)(), + T.ToTensor(), + L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), + ] + ), + ), + batch_size=256 // 8, + num_workers=4, + training=True, +) + +dataloader.test = L(build_data_loader)( + dataset=L(torchvision.datasets.ImageNet)( + root="${...train.dataset.root}", + split="val", + transform=L(T.Compose)( + transforms=[ + L(T.Resize)(size=256), + L(T.CenterCrop)(size=224), + T.ToTensor(), + L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), + ] + ), + ), + batch_size=256 // 8, + num_workers=4, + training=False, +) + +dataloader.evaluator = L(ClassificationAcc)() + +model = L(ClassificationNet)( + model=(ResNet)(block=Bottleneck, layers=[3, 4, 6, 3], zero_init_residual=True) +) + + +optimizer = L(torch.optim.SGD)( + params=L(get_default_optimizer_params)(), + lr=0.1, + momentum=0.9, + weight_decay=1e-4, +) + +lr_multiplier = L(WarmupParamScheduler)( + scheduler=L(MultiStepParamScheduler)( + values=[1.0, 0.1, 0.01, 0.001], milestones=[30, 60, 90, 100] + ), + warmup_length=1 / 100, + warmup_factor=0.1, +) + + +train = get_config("common/train.py").train +train.init_checkpoint = None +train.max_iter = 100 * 1281167 // 256 diff --git a/configs/common/README.md b/configs/common/README.md new file mode 100755 index 0000000000000000000000000000000000000000..912cc29927542bfe4258d3208cf52d73cb0ea477 --- /dev/null +++ b/configs/common/README.md @@ -0,0 +1,6 @@ +This directory provides definitions for a few common models, dataloaders, scheduler, +and optimizers that are often used in training. +The definition of these objects are provided in the form of lazy instantiation: +their arguments can be edited by users before constructing the objects. + +They can be imported, or loaded by `model_zoo.get_config` API in users' own configs. diff --git a/configs/common/coco_schedule.py b/configs/common/coco_schedule.py new file mode 100755 index 0000000000000000000000000000000000000000..355e66a1d213cb599a7ffe55089d854089c8ead2 --- /dev/null +++ b/configs/common/coco_schedule.py @@ -0,0 +1,47 @@ +from fvcore.common.param_scheduler import MultiStepParamScheduler + +from detectron2.config import LazyCall as L +from detectron2.solver import WarmupParamScheduler + + +def default_X_scheduler(num_X): + """ + Returns the config for a default multi-step LR scheduler such as "1x", "3x", + commonly referred to in papers, where every 1x has the total length of 1440k + training images (~12 COCO epochs). LR is decayed twice at the end of training + following the strategy defined in "Rethinking ImageNet Pretraining", Sec 4. + + Args: + num_X: a positive real number + + Returns: + DictConfig: configs that define the multiplier for LR during training + """ + # total number of iterations assuming 16 batch size, using 1440000/16=90000 + total_steps_16bs = num_X * 90000 + + if num_X <= 2: + scheduler = L(MultiStepParamScheduler)( + values=[1.0, 0.1, 0.01], + # note that scheduler is scale-invariant. This is equivalent to + # milestones=[6, 8, 9] + milestones=[60000, 80000, 90000], + ) + else: + scheduler = L(MultiStepParamScheduler)( + values=[1.0, 0.1, 0.01], + milestones=[total_steps_16bs - 60000, total_steps_16bs - 20000, total_steps_16bs], + ) + return L(WarmupParamScheduler)( + scheduler=scheduler, + warmup_length=1000 / total_steps_16bs, + warmup_method="linear", + warmup_factor=0.001, + ) + + +lr_multiplier_1x = default_X_scheduler(1) +lr_multiplier_2x = default_X_scheduler(2) +lr_multiplier_3x = default_X_scheduler(3) +lr_multiplier_6x = default_X_scheduler(6) +lr_multiplier_9x = default_X_scheduler(9) diff --git a/configs/common/data/coco.py b/configs/common/data/coco.py new file mode 100755 index 0000000000000000000000000000000000000000..703c4385c7ddc7eb0759c98d102ab2384d6a9e3e --- /dev/null +++ b/configs/common/data/coco.py @@ -0,0 +1,48 @@ +from omegaconf import OmegaConf + +import detectron2.data.transforms as T +from detectron2.config import LazyCall as L +from detectron2.data import ( + DatasetMapper, + build_detection_test_loader, + build_detection_train_loader, + get_detection_dataset_dicts, +) +from detectron2.evaluation import COCOEvaluator + +dataloader = OmegaConf.create() + +dataloader.train = L(build_detection_train_loader)( + dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"), + mapper=L(DatasetMapper)( + is_train=True, + augmentations=[ + L(T.ResizeShortestEdge)( + short_edge_length=(640, 672, 704, 736, 768, 800), + sample_style="choice", + max_size=1333, + ), + L(T.RandomFlip)(horizontal=True), + ], + image_format="BGR", + use_instance_mask=True, + ), + total_batch_size=16, + num_workers=4, +) + +dataloader.test = L(build_detection_test_loader)( + dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False), + mapper=L(DatasetMapper)( + is_train=False, + augmentations=[ + L(T.ResizeShortestEdge)(short_edge_length=800, max_size=1333), + ], + image_format="${...train.mapper.image_format}", + ), + num_workers=4, +) + +dataloader.evaluator = L(COCOEvaluator)( + dataset_name="${..test.dataset.names}", +) diff --git a/configs/common/data/coco_keypoint.py b/configs/common/data/coco_keypoint.py new file mode 100755 index 0000000000000000000000000000000000000000..b4ceb066faf696954244205dc75376b767071217 --- /dev/null +++ b/configs/common/data/coco_keypoint.py @@ -0,0 +1,13 @@ +from detectron2.data.detection_utils import create_keypoint_hflip_indices + +from .coco import dataloader + +dataloader.train.dataset.min_keypoints = 1 +dataloader.train.dataset.names = "keypoints_coco_2017_train" +dataloader.test.dataset.names = "keypoints_coco_2017_val" + +dataloader.train.mapper.update( + use_instance_mask=False, + use_keypoint=True, + keypoint_hflip_indices=create_keypoint_hflip_indices(dataloader.train.dataset.names), +) diff --git a/configs/common/data/coco_panoptic_separated.py b/configs/common/data/coco_panoptic_separated.py new file mode 100755 index 0000000000000000000000000000000000000000..5ccbc77e64d1c92c99cbd7158d047bab54cb9f3d --- /dev/null +++ b/configs/common/data/coco_panoptic_separated.py @@ -0,0 +1,26 @@ +from detectron2.config import LazyCall as L +from detectron2.evaluation import ( + COCOEvaluator, + COCOPanopticEvaluator, + DatasetEvaluators, + SemSegEvaluator, +) + +from .coco import dataloader + +dataloader.train.dataset.names = "coco_2017_train_panoptic_separated" +dataloader.train.dataset.filter_empty = False +dataloader.test.dataset.names = "coco_2017_val_panoptic_separated" + + +dataloader.evaluator = [ + L(COCOEvaluator)( + dataset_name="${...test.dataset.names}", + ), + L(SemSegEvaluator)( + dataset_name="${...test.dataset.names}", + ), + L(COCOPanopticEvaluator)( + dataset_name="${...test.dataset.names}", + ), +] diff --git a/configs/common/models/cascade_rcnn.py b/configs/common/models/cascade_rcnn.py new file mode 100755 index 0000000000000000000000000000000000000000..c7372a801dc00d7fec4db8cda8c2612ce281d48a --- /dev/null +++ b/configs/common/models/cascade_rcnn.py @@ -0,0 +1,36 @@ +from detectron2.config import LazyCall as L +from detectron2.layers import ShapeSpec +from detectron2.modeling.box_regression import Box2BoxTransform +from detectron2.modeling.matcher import Matcher +from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads + +from .mask_rcnn_fpn import model + +# arguments that don't exist for Cascade R-CNN +[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]] + +model.roi_heads.update( + _target_=CascadeROIHeads, + box_heads=[ + L(FastRCNNConvFCHead)( + input_shape=ShapeSpec(channels=256, height=7, width=7), + conv_dims=[], + fc_dims=[1024, 1024], + ) + for k in range(3) + ], + box_predictors=[ + L(FastRCNNOutputLayers)( + input_shape=ShapeSpec(channels=1024), + test_score_thresh=0.05, + box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)), + cls_agnostic_bbox_reg=True, + num_classes="${...num_classes}", + ) + for (w1, w2) in [(10, 5), (20, 10), (30, 15)] + ], + proposal_matchers=[ + L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False) + for th in [0.5, 0.6, 0.7] + ], +) diff --git a/configs/common/models/keypoint_rcnn_fpn.py b/configs/common/models/keypoint_rcnn_fpn.py new file mode 100755 index 0000000000000000000000000000000000000000..56b3994df249884d4816fc9a5c7f553a9ab6f400 --- /dev/null +++ b/configs/common/models/keypoint_rcnn_fpn.py @@ -0,0 +1,33 @@ +from detectron2.config import LazyCall as L +from detectron2.layers import ShapeSpec +from detectron2.modeling.poolers import ROIPooler +from detectron2.modeling.roi_heads import KRCNNConvDeconvUpsampleHead + +from .mask_rcnn_fpn import model + +[model.roi_heads.pop(x) for x in ["mask_in_features", "mask_pooler", "mask_head"]] + +model.roi_heads.update( + num_classes=1, + keypoint_in_features=["p2", "p3", "p4", "p5"], + keypoint_pooler=L(ROIPooler)( + output_size=14, + scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), + sampling_ratio=0, + pooler_type="ROIAlignV2", + ), + keypoint_head=L(KRCNNConvDeconvUpsampleHead)( + input_shape=ShapeSpec(channels=256, width=14, height=14), + num_keypoints=17, + conv_dims=[512] * 8, + loss_normalizer="visible", + ), +) + +# Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2. +# 1000 proposals per-image is found to hurt box AP. +# Therefore we increase it to 1500 per-image. +model.proposal_generator.post_nms_topk = (1500, 1000) + +# Keypoint AP degrades (though box AP improves) when using plain L1 loss +model.roi_heads.box_predictor.smooth_l1_beta = 0.5 diff --git a/configs/common/models/mask_rcnn_c4.py b/configs/common/models/mask_rcnn_c4.py new file mode 100755 index 0000000000000000000000000000000000000000..a3dcf8be42a39c6e5f6e76e3ab23adeccb33085d --- /dev/null +++ b/configs/common/models/mask_rcnn_c4.py @@ -0,0 +1,88 @@ +from detectron2.config import LazyCall as L +from detectron2.layers import ShapeSpec +from detectron2.modeling.meta_arch import GeneralizedRCNN +from detectron2.modeling.anchor_generator import DefaultAnchorGenerator +from detectron2.modeling.backbone import BasicStem, BottleneckBlock, ResNet +from detectron2.modeling.box_regression import Box2BoxTransform +from detectron2.modeling.matcher import Matcher +from detectron2.modeling.poolers import ROIPooler +from detectron2.modeling.proposal_generator import RPN, StandardRPNHead +from detectron2.modeling.roi_heads import ( + FastRCNNOutputLayers, + MaskRCNNConvUpsampleHead, + Res5ROIHeads, +) + +model = L(GeneralizedRCNN)( + backbone=L(ResNet)( + stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"), + stages=L(ResNet.make_default_stages)( + depth=50, + stride_in_1x1=True, + norm="FrozenBN", + ), + out_features=["res4"], + ), + proposal_generator=L(RPN)( + in_features=["res4"], + head=L(StandardRPNHead)(in_channels=1024, num_anchors=15), + anchor_generator=L(DefaultAnchorGenerator)( + sizes=[[32, 64, 128, 256, 512]], + aspect_ratios=[0.5, 1.0, 2.0], + strides=[16], + offset=0.0, + ), + anchor_matcher=L(Matcher)( + thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True + ), + box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]), + batch_size_per_image=256, + positive_fraction=0.5, + pre_nms_topk=(12000, 6000), + post_nms_topk=(2000, 1000), + nms_thresh=0.7, + ), + roi_heads=L(Res5ROIHeads)( + num_classes=80, + batch_size_per_image=512, + positive_fraction=0.25, + proposal_matcher=L(Matcher)( + thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False + ), + in_features=["res4"], + pooler=L(ROIPooler)( + output_size=14, + scales=(1.0 / 16,), + sampling_ratio=0, + pooler_type="ROIAlignV2", + ), + res5=L(ResNet.make_stage)( + block_class=BottleneckBlock, + num_blocks=3, + stride_per_block=[2, 1, 1], + in_channels=1024, + bottleneck_channels=512, + out_channels=2048, + norm="FrozenBN", + stride_in_1x1=True, + ), + box_predictor=L(FastRCNNOutputLayers)( + input_shape=L(ShapeSpec)(channels="${...res5.out_channels}", height=1, width=1), + test_score_thresh=0.05, + box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)), + num_classes="${..num_classes}", + ), + mask_head=L(MaskRCNNConvUpsampleHead)( + input_shape=L(ShapeSpec)( + channels="${...res5.out_channels}", + width="${...pooler.output_size}", + height="${...pooler.output_size}", + ), + num_classes="${..num_classes}", + conv_dims=[256], + ), + ), + pixel_mean=[103.530, 116.280, 123.675], + pixel_std=[1.0, 1.0, 1.0], + input_format="BGR", +) diff --git a/configs/common/models/mask_rcnn_fpn.py b/configs/common/models/mask_rcnn_fpn.py new file mode 100755 index 0000000000000000000000000000000000000000..3f87d8da83d93932ddd5e9dc5b38d42786c0cbb4 --- /dev/null +++ b/configs/common/models/mask_rcnn_fpn.py @@ -0,0 +1,93 @@ +from detectron2.config import LazyCall as L +from detectron2.layers import ShapeSpec +from detectron2.modeling.meta_arch import GeneralizedRCNN +from detectron2.modeling.anchor_generator import DefaultAnchorGenerator +from detectron2.modeling.backbone.fpn import LastLevelMaxPool +from detectron2.modeling.backbone import BasicStem, FPN, ResNet +from detectron2.modeling.box_regression import Box2BoxTransform +from detectron2.modeling.matcher import Matcher +from detectron2.modeling.poolers import ROIPooler +from detectron2.modeling.proposal_generator import RPN, StandardRPNHead +from detectron2.modeling.roi_heads import ( + StandardROIHeads, + FastRCNNOutputLayers, + MaskRCNNConvUpsampleHead, + FastRCNNConvFCHead, +) + +model = L(GeneralizedRCNN)( + backbone=L(FPN)( + bottom_up=L(ResNet)( + stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"), + stages=L(ResNet.make_default_stages)( + depth=50, + stride_in_1x1=True, + norm="FrozenBN", + ), + out_features=["res2", "res3", "res4", "res5"], + ), + in_features="${.bottom_up.out_features}", + out_channels=256, + top_block=L(LastLevelMaxPool)(), + ), + proposal_generator=L(RPN)( + in_features=["p2", "p3", "p4", "p5", "p6"], + head=L(StandardRPNHead)(in_channels=256, num_anchors=3), + anchor_generator=L(DefaultAnchorGenerator)( + sizes=[[32], [64], [128], [256], [512]], + aspect_ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64], + offset=0.0, + ), + anchor_matcher=L(Matcher)( + thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True + ), + box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]), + batch_size_per_image=256, + positive_fraction=0.5, + pre_nms_topk=(2000, 1000), + post_nms_topk=(1000, 1000), + nms_thresh=0.7, + ), + roi_heads=L(StandardROIHeads)( + num_classes=80, + batch_size_per_image=512, + positive_fraction=0.25, + proposal_matcher=L(Matcher)( + thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False + ), + box_in_features=["p2", "p3", "p4", "p5"], + box_pooler=L(ROIPooler)( + output_size=7, + scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), + sampling_ratio=0, + pooler_type="ROIAlignV2", + ), + box_head=L(FastRCNNConvFCHead)( + input_shape=ShapeSpec(channels=256, height=7, width=7), + conv_dims=[], + fc_dims=[1024, 1024], + ), + box_predictor=L(FastRCNNOutputLayers)( + input_shape=ShapeSpec(channels=1024), + test_score_thresh=0.05, + box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)), + num_classes="${..num_classes}", + ), + mask_in_features=["p2", "p3", "p4", "p5"], + mask_pooler=L(ROIPooler)( + output_size=14, # ori is 14 + scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), + sampling_ratio=0, + pooler_type="ROIAlignV2", + ), + mask_head=L(MaskRCNNConvUpsampleHead)( + input_shape=ShapeSpec(channels=256, width=14, height=14), + num_classes="${..num_classes}", + conv_dims=[256, 256, 256, 256, 256], + ), + ), + pixel_mean=[103.530, 116.280, 123.675], + pixel_std=[1.0, 1.0, 1.0], + input_format="BGR", +) diff --git a/configs/common/models/panoptic_fpn.py b/configs/common/models/panoptic_fpn.py new file mode 100755 index 0000000000000000000000000000000000000000..88f55d2ce9db62e61445d6a3700067d9d864ecae --- /dev/null +++ b/configs/common/models/panoptic_fpn.py @@ -0,0 +1,20 @@ +from detectron2.config import LazyCall as L +from detectron2.layers import ShapeSpec +from detectron2.modeling import PanopticFPN +from detectron2.modeling.meta_arch.semantic_seg import SemSegFPNHead + +from .mask_rcnn_fpn import model + +model._target_ = PanopticFPN +model.sem_seg_head = L(SemSegFPNHead)( + input_shape={ + f: L(ShapeSpec)(stride=s, channels="${....backbone.out_channels}") + for f, s in zip(["p2", "p3", "p4", "p5"], [4, 8, 16, 32]) + }, + ignore_value=255, + num_classes=54, # COCO stuff + 1 + conv_dims=128, + common_stride=4, + loss_weight=0.5, + norm="GN", +) diff --git a/configs/common/models/retinanet.py b/configs/common/models/retinanet.py new file mode 100755 index 0000000000000000000000000000000000000000..01d168fe6f054b88933488bdc65516424ce917cd --- /dev/null +++ b/configs/common/models/retinanet.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- + +from detectron2.config import LazyCall as L +from detectron2.layers import ShapeSpec +from detectron2.modeling.meta_arch import RetinaNet +from detectron2.modeling.anchor_generator import DefaultAnchorGenerator +from detectron2.modeling.backbone.fpn import LastLevelP6P7 +from detectron2.modeling.backbone import BasicStem, FPN, ResNet +from detectron2.modeling.box_regression import Box2BoxTransform +from detectron2.modeling.matcher import Matcher +from detectron2.modeling.meta_arch.retinanet import RetinaNetHead + +model = L(RetinaNet)( + backbone=L(FPN)( + bottom_up=L(ResNet)( + stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"), + stages=L(ResNet.make_default_stages)( + depth=50, + stride_in_1x1=True, + norm="FrozenBN", + ), + out_features=["res3", "res4", "res5"], + ), + in_features=["res3", "res4", "res5"], + out_channels=256, + top_block=L(LastLevelP6P7)(in_channels=2048, out_channels="${..out_channels}"), + ), + head=L(RetinaNetHead)( + input_shape=[ShapeSpec(channels=256)], + num_classes="${..num_classes}", + conv_dims=[256, 256, 256, 256], + prior_prob=0.01, + num_anchors=9, + ), + anchor_generator=L(DefaultAnchorGenerator)( + sizes=[[x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)] for x in [32, 64, 128, 256, 512]], + aspect_ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128], + offset=0.0, + ), + box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]), + anchor_matcher=L(Matcher)( + thresholds=[0.4, 0.5], labels=[0, -1, 1], allow_low_quality_matches=True + ), + num_classes=80, + head_in_features=["p3", "p4", "p5", "p6", "p7"], + focal_loss_alpha=0.25, + focal_loss_gamma=2.0, + pixel_mean=[103.530, 116.280, 123.675], + pixel_std=[1.0, 1.0, 1.0], + input_format="BGR", +) diff --git a/configs/common/optim.py b/configs/common/optim.py new file mode 100755 index 0000000000000000000000000000000000000000..d39d3aaa546c17e831d21d1758b69e8c1609415e --- /dev/null +++ b/configs/common/optim.py @@ -0,0 +1,15 @@ +import torch + +from detectron2.config import LazyCall as L +from detectron2.solver.build import get_default_optimizer_params + +SGD = L(torch.optim.SGD)( + params=L(get_default_optimizer_params)( + # params.model is meant to be set to the model object, before instantiating + # the optimizer. + weight_decay_norm=0.0 + ), + lr=0.02, + momentum=0.9, + weight_decay=1e-4, +) diff --git a/configs/common/train.py b/configs/common/train.py new file mode 100755 index 0000000000000000000000000000000000000000..7c63bdb073797e48e0b3640e668ecc1d5c137d59 --- /dev/null +++ b/configs/common/train.py @@ -0,0 +1,18 @@ +# Common training-related configs that are designed for "tools/lazyconfig_train_net.py" +# You can use your own instead, together with your own train_net.py +train = dict( + output_dir="./output", + init_checkpoint="detectron2://ImageNetPretrained/MSRA/R-50.pkl", + max_iter=90000, + amp=dict(enabled=False), # options for Automatic Mixed Precision + ddp=dict( # options for DistributedDataParallel + broadcast_buffers=False, + find_unused_parameters=False, + fp16_compression=False, + ), + checkpointer=dict(period=5000, max_to_keep=100), # options for PeriodicCheckpointer + eval_period=5000, + log_period=20, + device="cuda" + # ... +) diff --git a/configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py new file mode 100755 index 0000000000000000000000000000000000000000..3740e9bb08c5f168a9ab3a6d94561678bad1775c --- /dev/null +++ b/configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py @@ -0,0 +1,9 @@ +from .mask_rcnn_R_50_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +model.backbone.bottom_up.stages.depth = 101 diff --git a/configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py new file mode 100755 index 0000000000000000000000000000000000000000..18e5f0720c568db4ef0c97b59688b5e7866df606 --- /dev/null +++ b/configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py @@ -0,0 +1,14 @@ +from .mask_rcnn_R_101_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +train.max_iter *= 2 # 100ep -> 200ep + +lr_multiplier.scheduler.milestones = [ + milestone * 2 for milestone in lr_multiplier.scheduler.milestones +] +lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py new file mode 100755 index 0000000000000000000000000000000000000000..63c54ee9a5ce2368494b775cc90fada1439feaa5 --- /dev/null +++ b/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py @@ -0,0 +1,14 @@ +from .mask_rcnn_R_101_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +train.max_iter *= 4 # 100ep -> 400ep + +lr_multiplier.scheduler.milestones = [ + milestone * 4 for milestone in lr_multiplier.scheduler.milestones +] +lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py new file mode 100755 index 0000000000000000000000000000000000000000..df7a2aedf480ed8dc4aa3645e37420e9b893fae4 --- /dev/null +++ b/configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py @@ -0,0 +1,72 @@ +import detectron2.data.transforms as T +from detectron2.config.lazy import LazyCall as L +from detectron2.layers.batch_norm import NaiveSyncBatchNorm +from detectron2.solver import WarmupParamScheduler +from fvcore.common.param_scheduler import MultiStepParamScheduler + +from ..common.data.coco import dataloader +from ..common.models.mask_rcnn_fpn import model +from ..common.optim import SGD as optimizer +from ..common.train import train + +# train from scratch +train.init_checkpoint = "" +train.amp.enabled = True +train.ddp.fp16_compression = True +model.backbone.bottom_up.freeze_at = 0 + +# SyncBN +# fmt: off +model.backbone.bottom_up.stem.norm = \ + model.backbone.bottom_up.stages.norm = \ + model.backbone.norm = "SyncBN" + +# Using NaiveSyncBatchNorm becase heads may have empty input. That is not supported by +# torch.nn.SyncBatchNorm. We can remove this after +# https://github.com/pytorch/pytorch/issues/36530 is fixed. +model.roi_heads.box_head.conv_norm = \ + model.roi_heads.mask_head.conv_norm = lambda c: NaiveSyncBatchNorm(c, + stats_mode="N") +# fmt: on + +# 2conv in RPN: +# https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/modeling/architecture/heads.py#L95-L97 # noqa: E501, B950 +model.proposal_generator.head.conv_dims = [-1, -1] + +# 4conv1fc box head +model.roi_heads.box_head.conv_dims = [256, 256, 256, 256] +model.roi_heads.box_head.fc_dims = [1024] + +# resize_and_crop_image in: +# https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/utils/input_utils.py#L127 # noqa: E501, B950 +image_size = 1024 +dataloader.train.mapper.augmentations = [ + L(T.ResizeScale)( + min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size + ), + L(T.FixedSizeCrop)(crop_size=(image_size, image_size)), + L(T.RandomFlip)(horizontal=True), +] + +# recompute boxes due to cropping +dataloader.train.mapper.recompute_boxes = True + +# larger batch-size. +dataloader.train.total_batch_size = 64 + +# Equivalent to 100 epochs. +# 100 ep = 184375 iters * 64 images/iter / 118000 images/ep +train.max_iter = 184375 + +lr_multiplier = L(WarmupParamScheduler)( + scheduler=L(MultiStepParamScheduler)( + values=[1.0, 0.1, 0.01], + milestones=[163889, 177546], + num_updates=train.max_iter, + ), + warmup_length=500 / train.max_iter, + warmup_factor=0.067, +) + +optimizer.lr = 0.1 +optimizer.weight_decay = 4e-5 diff --git a/configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py new file mode 100755 index 0000000000000000000000000000000000000000..2a7c376da5f9269197c44079f3e0f3b09cdc63fa --- /dev/null +++ b/configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py @@ -0,0 +1,14 @@ +from .mask_rcnn_R_50_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +train.max_iter *= 2 # 100ep -> 200ep + +lr_multiplier.scheduler.milestones = [ + milestone * 2 for milestone in lr_multiplier.scheduler.milestones +] +lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py new file mode 100755 index 0000000000000000000000000000000000000000..97586b8f5330a9d995a0bffd1f5e7bd5b5656462 --- /dev/null +++ b/configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py @@ -0,0 +1,14 @@ +from .mask_rcnn_R_50_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +train.max_iter *= 4 # 100ep -> 400ep + +lr_multiplier.scheduler.milestones = [ + milestone * 4 for milestone in lr_multiplier.scheduler.milestones +] +lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py new file mode 100755 index 0000000000000000000000000000000000000000..2ca1ede262cf5c37a3a54778458c74aff1479411 --- /dev/null +++ b/configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py @@ -0,0 +1,14 @@ +from .mask_rcnn_R_50_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +train.max_iter //= 2 # 100ep -> 50ep + +lr_multiplier.scheduler.milestones = [ + milestone // 2 for milestone in lr_multiplier.scheduler.milestones +] +lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py new file mode 100755 index 0000000000000000000000000000000000000000..249387fffeed7c02f592ecc84ee5a295533b1ed7 --- /dev/null +++ b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py @@ -0,0 +1,29 @@ +from .mask_rcnn_R_50_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) +from detectron2.config import LazyCall as L +from detectron2.modeling.backbone import RegNet +from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock + +# Config source: +# https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py # noqa +model.backbone.bottom_up = L(RegNet)( + stem_class=SimpleStem, + stem_width=32, + block_class=ResBottleneckBlock, + depth=23, + w_a=38.65, + w_0=96, + w_m=2.43, + group_width=40, + norm="SyncBN", + out_features=["s1", "s2", "s3", "s4"], +) +model.pixel_std = [57.375, 57.120, 58.395] + +# RegNets benefit from enabling cudnn benchmark mode +train.cudnn_benchmark = True diff --git a/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py new file mode 100755 index 0000000000000000000000000000000000000000..731320e74ebed4d8ceec58c07cb906542b8b021b --- /dev/null +++ b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py @@ -0,0 +1,14 @@ +from .mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +train.max_iter *= 2 # 100ep -> 200ep + +lr_multiplier.scheduler.milestones = [ + milestone * 2 for milestone in lr_multiplier.scheduler.milestones +] +lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py new file mode 100755 index 0000000000000000000000000000000000000000..8f369a2afedb6c6e69fd52ff9a9a6b1cdf965937 --- /dev/null +++ b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py @@ -0,0 +1,14 @@ +from .mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +train.max_iter *= 4 # 100ep -> 400ep + +lr_multiplier.scheduler.milestones = [ + milestone * 4 for milestone in lr_multiplier.scheduler.milestones +] +lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py new file mode 100755 index 0000000000000000000000000000000000000000..da94e6f90d823f110e4a2373d7fd16b3d1ab5ac3 --- /dev/null +++ b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py @@ -0,0 +1,30 @@ +from .mask_rcnn_R_50_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) +from detectron2.config import LazyCall as L +from detectron2.modeling.backbone import RegNet +from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock + +# Config source: +# https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py # noqa +model.backbone.bottom_up = L(RegNet)( + stem_class=SimpleStem, + stem_width=32, + block_class=ResBottleneckBlock, + depth=22, + w_a=31.41, + w_0=96, + w_m=2.24, + group_width=64, + se_ratio=0.25, + norm="SyncBN", + out_features=["s1", "s2", "s3", "s4"], +) +model.pixel_std = [57.375, 57.120, 58.395] + +# RegNets benefit from enabling cudnn benchmark mode +train.cudnn_benchmark = True diff --git a/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py new file mode 100755 index 0000000000000000000000000000000000000000..b867cc865e5ac4d7b70221da141894efd7cbd75c --- /dev/null +++ b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py @@ -0,0 +1,14 @@ +from .mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +train.max_iter *= 2 # 100ep -> 200ep + +lr_multiplier.scheduler.milestones = [ + milestone * 2 for milestone in lr_multiplier.scheduler.milestones +] +lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py new file mode 100755 index 0000000000000000000000000000000000000000..7b86ea8c6c5c48f5d26c9e0df7cf96e745b17b34 --- /dev/null +++ b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py @@ -0,0 +1,14 @@ +from .mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ import ( + dataloader, + lr_multiplier, + model, + optimizer, + train, +) + +train.max_iter *= 4 # 100ep -> 400ep + +lr_multiplier.scheduler.milestones = [ + milestone * 4 for milestone in lr_multiplier.scheduler.milestones +] +lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/configs/quick_schedules/README.md b/configs/quick_schedules/README.md new file mode 100755 index 0000000000000000000000000000000000000000..4e6c82ef3f75a73c7006f33d7c850a0d4781a58f --- /dev/null +++ b/configs/quick_schedules/README.md @@ -0,0 +1,8 @@ +These are quick configs for performance or accuracy regression tracking purposes. + +* `*instance_test.yaml`: can train on 2 GPUs. They are used to test whether the training can + successfully finish. They are not expected to produce reasonable training results. +* `*inference_acc_test.yaml`: They should be run using `--eval-only`. They run inference using pre-trained models and verify + the results are as expected. +* `*training_acc_test.yaml`: They should be trained on 8 GPUs. They finish in about an hour and verify the training accuracy + is within the normal range. diff --git a/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..fc5a4116cb096278823049c1f823e99f8e16e97e --- /dev/null +++ b/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml" +MODEL: + WEIGHTS: "detectron2://Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP", 43.87, 0.02]] diff --git a/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..e41a0fe7ffe9c3531741df49e546aa45cfe4fdee --- /dev/null +++ b/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml @@ -0,0 +1,11 @@ +_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml" +DATASETS: + TRAIN: ("coco_2017_val_100",) + TEST: ("coco_2017_val_100",) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..a2f37e5e2cc2a9e195e13703e9930e67e0f9a896 --- /dev/null +++ b/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]] diff --git a/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..52fc0ec03c8b87ab2be1dda97bec1e8c93e6bb5c --- /dev/null +++ b/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml @@ -0,0 +1,15 @@ +_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" +DATASETS: + TRAIN: ("coco_2017_val_100",) + PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) + TEST: ("coco_2017_val_100",) + PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..14cf2aa82aec52ad44e28ead0665dad811d55457 --- /dev/null +++ b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl" +DATASETS: + TEST: ("keypoints_coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]] diff --git a/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..3dd209f693bd0bfdd46a2c9e7e750dede3abc141 --- /dev/null +++ b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml @@ -0,0 +1,16 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + KEYPOINT_ON: True + ROI_HEADS: + NUM_CLASSES: 1 +DATASETS: + TRAIN: ("keypoints_coco_2017_val_100",) + TEST: ("keypoints_coco_2017_val_100",) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..4b92392f1c4457033ae4c87a521e339fe9e184ce --- /dev/null +++ b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml @@ -0,0 +1,30 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + KEYPOINT_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + BATCH_SIZE_PER_IMAGE: 256 + NUM_CLASSES: 1 + ROI_KEYPOINT_HEAD: + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False + LOSS_WEIGHT: 4.0 + ROI_BOX_HEAD: + SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss + RPN: + SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss +DATASETS: + TRAIN: ("keypoints_coco_2017_val",) + TEST: ("keypoints_coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +SOLVER: + WARMUP_FACTOR: 0.33333333 + WARMUP_ITERS: 100 + STEPS: (5500, 5800) + MAX_ITER: 6000 +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]] diff --git a/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..9bd962878fea64035887c48981beeb8d41bfdbd0 --- /dev/null +++ b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml @@ -0,0 +1,28 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + KEYPOINT_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + BATCH_SIZE_PER_IMAGE: 256 + NUM_CLASSES: 1 + ROI_KEYPOINT_HEAD: + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + ROI_BOX_HEAD: + SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss + RPN: + SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss +DATASETS: + TRAIN: ("keypoints_coco_2017_val",) + TEST: ("keypoints_coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +SOLVER: + WARMUP_FACTOR: 0.33333333 + WARMUP_ITERS: 100 + STEPS: (5500, 5800) + MAX_ITER: 6000 +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]] diff --git a/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..ab6e69812b94ea7e071f29d9a6937d5c70805b5b --- /dev/null +++ b/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml @@ -0,0 +1,18 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_val_100",) + TEST: ("coco_2017_val_100",) +SOLVER: + BASE_LR: 0.001 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 + CLIP_GRADIENTS: + ENABLED: True + CLIP_TYPE: "value" + CLIP_VALUE: 1.0 +DATALOADER: + NUM_WORKERS: 2 diff --git a/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..b2d5b7ff87e069f8c774a230bdfd47b8c12d18a3 --- /dev/null +++ b/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]] diff --git a/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..6c4f1214efa520944fd941daec082ad45c164a23 --- /dev/null +++ b/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml @@ -0,0 +1,14 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_val_100",) + TEST: ("coco_2017_val_100",) +SOLVER: + BASE_LR: 0.001 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..f68dd8f96c7896b5fc95d694a399f2ce417c1deb --- /dev/null +++ b/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml @@ -0,0 +1,22 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + ROI_HEADS: + BATCH_SIZE_PER_IMAGE: 256 + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_val",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (600,) + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1000 +SOLVER: + IMS_PER_BATCH: 8 # base uses 16 + WARMUP_FACTOR: 0.33333 + WARMUP_ITERS: 100 + STEPS: (11000, 11600) + MAX_ITER: 12000 +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]] diff --git a/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..e3ce6cf922ae07fba5b5e01edbac19bf58a8e9dd --- /dev/null +++ b/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]] diff --git a/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..e5454bfd95cc37749c50aec7866f32d9a80ca2b7 --- /dev/null +++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,10 @@ +_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP", 42.67, 0.02], ["bbox_TTA", "AP", 49.11, 0.02], ["segm_TTA", "AP", 45.04, 0.02]] + AUG: + ENABLED: True + MIN_SIZES: (700, 800) # to save some time diff --git a/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..6dbfcde0bf837990634d419a6dda1e2909c3cd7f --- /dev/null +++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml @@ -0,0 +1,14 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_val_100",) + TEST: ("coco_2017_val_100",) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..52f78762bda23331c97afd523cf98a5c118b113e --- /dev/null +++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml @@ -0,0 +1,6 @@ +_BASE_: "./mask_rcnn_R_50_FPN_training_acc_test.yaml" +MODEL: + ROI_BOX_HEAD: + TRAIN_ON_PRED_BOXES: True +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 42.6, 1.0], ["segm", "AP", 35.8, 0.8]] diff --git a/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..aadae4ce898761e1e40e5af65a9e5ea01053b936 --- /dev/null +++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml @@ -0,0 +1,21 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + ROI_HEADS: + BATCH_SIZE_PER_IMAGE: 256 + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_val",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (600,) + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1000 +SOLVER: + WARMUP_FACTOR: 0.3333333 + WARMUP_ITERS: 100 + STEPS: (5500, 5800) + MAX_ITER: 6000 +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 42.5, 1.0], ["segm", "AP", 35.8, 0.8]] diff --git a/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml b/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..70874e3a92c9034d75cbbebb145b61084ba15e42 --- /dev/null +++ b/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl" +DATASETS: + TEST: ("coco_2017_val_100_panoptic_separated",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]] diff --git a/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml b/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..7cdee7bfcf6dc75dda52602a0d9177ad0a9cc6ed --- /dev/null +++ b/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml @@ -0,0 +1,19 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "PanopticFPN" + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + SEM_SEG_HEAD: + LOSS_WEIGHT: 0.5 +DATASETS: + TRAIN: ("coco_2017_val_100_panoptic_separated",) + TEST: ("coco_2017_val_100_panoptic_separated",) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 1 diff --git a/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml b/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..f3bbf30196cb35434340d4c343cab0c96283cd4f --- /dev/null +++ b/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml @@ -0,0 +1,20 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "PanopticFPN" + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + SEM_SEG_HEAD: + LOSS_WEIGHT: 0.5 +DATASETS: + TRAIN: ("coco_2017_val_panoptic_separated",) + TEST: ("coco_2017_val_panoptic_separated",) +SOLVER: + BASE_LR: 0.01 + WARMUP_FACTOR: 0.001 + WARMUP_ITERS: 500 + STEPS: (5500,) + MAX_ITER: 7000 +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 39.0, 0.7], ["sem_seg", "mIoU", 64.73, 1.3], ["panoptic_seg", "PQ", 48.13, 0.8]] diff --git a/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..cb666c1a6b3e351227046bc9c2af8799408858e8 --- /dev/null +++ b/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 44.45, 0.02]] diff --git a/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml b/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..8d95c1f614296716374686b22055a587ccd052b9 --- /dev/null +++ b/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml @@ -0,0 +1,13 @@ +_BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" +DATASETS: + TRAIN: ("coco_2017_val_100",) + TEST: ("coco_2017_val_100",) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..c7c3f908a9e80e98b2d25b6d384a60acaba9d4f8 --- /dev/null +++ b/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]] diff --git a/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..402d432477507dc36f04c4a9777cb80fe06b2809 --- /dev/null +++ b/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml @@ -0,0 +1,13 @@ +_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" +DATASETS: + TRAIN: ("coco_2017_val_100",) + TEST: ("coco_2017_val_100",) +SOLVER: + STEPS: (30,) + MAX_ITER: 40 + BASE_LR: 0.005 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..bca74987d5218736983617883e0fe37f79d219b7 --- /dev/null +++ b/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,10 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "SemanticSegmentor" + WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl" + RESNETS: + DEPTH: 50 +DATASETS: + TEST: ("coco_2017_val_100_panoptic_stuffonly",) +TEST: + EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]] diff --git a/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml b/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..14ab606f219b462fe37fcc7d5fbdbe65cb5c2642 --- /dev/null +++ b/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml @@ -0,0 +1,18 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "SemanticSegmentor" + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 +DATASETS: + TRAIN: ("coco_2017_val_100_panoptic_stuffonly",) + TEST: ("coco_2017_val_100_panoptic_stuffonly",) +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml b/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml new file mode 100755 index 0000000000000000000000000000000000000000..1f78d775889b11e9e76743de5ddb8139198edf61 --- /dev/null +++ b/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml @@ -0,0 +1,20 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "SemanticSegmentor" + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 +DATASETS: + TRAIN: ("coco_2017_val_panoptic_stuffonly",) + TEST: ("coco_2017_val_panoptic_stuffonly",) +SOLVER: + BASE_LR: 0.01 + WARMUP_FACTOR: 0.001 + WARMUP_ITERS: 300 + STEPS: (5500,) + MAX_ITER: 7000 +TEST: + EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]] +INPUT: + # no scale augmentation + MIN_SIZE_TRAIN: (800, ) diff --git a/configs/transfiner/mask_rcnn_R_101_FPN_3x.yaml b/configs/transfiner/mask_rcnn_R_101_FPN_3x.yaml new file mode 100755 index 0000000000000000000000000000000000000000..ad87a098a83e5d670378e7fa451bef3c6cc1f406 --- /dev/null +++ b/configs/transfiner/mask_rcnn_R_101_FPN_3x.yaml @@ -0,0 +1,10 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "./output_101_3x" diff --git a/configs/transfiner/mask_rcnn_R_101_FPN_3x_deform.yaml b/configs/transfiner/mask_rcnn_R_101_FPN_3x_deform.yaml new file mode 100755 index 0000000000000000000000000000000000000000..157f74d4078260d1261985d68cfde47cca5a80c9 --- /dev/null +++ b/configs/transfiner/mask_rcnn_R_101_FPN_3x_deform.yaml @@ -0,0 +1,12 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 + DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 + DEFORM_MODULATED: False +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "./output_101_3x_deform" diff --git a/configs/transfiner/mask_rcnn_R_50_FPN_1x.yaml b/configs/transfiner/mask_rcnn_R_50_FPN_1x.yaml new file mode 100755 index 0000000000000000000000000000000000000000..e0cfed630dcce0da04c1461e72197e11bdc820c8 --- /dev/null +++ b/configs/transfiner/mask_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,7 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 +OUTPUT_DIR: "./output_r50_1x" diff --git a/configs/transfiner/mask_rcnn_R_50_FPN_3x.yaml b/configs/transfiner/mask_rcnn_R_50_FPN_3x.yaml new file mode 100755 index 0000000000000000000000000000000000000000..9f75563b51e1e071d9c70f7254c4495f2085fa12 --- /dev/null +++ b/configs/transfiner/mask_rcnn_R_50_FPN_3x.yaml @@ -0,0 +1,10 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "./output_r50_3x" diff --git a/configs/transfiner/mask_rcnn_R_50_FPN_3x_deform.yaml b/configs/transfiner/mask_rcnn_R_50_FPN_3x_deform.yaml new file mode 100755 index 0000000000000000000000000000000000000000..2354b3419225809fd660c09779a3ee94841f5cdb --- /dev/null +++ b/configs/transfiner/mask_rcnn_R_50_FPN_3x_deform.yaml @@ -0,0 +1,12 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 + DEFORM_MODULATED: False +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "./output_r50_3x_deform" diff --git a/demo/README.md b/demo/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f11ad3eb72953a7bc05d5e333fca4a62ab633b9c --- /dev/null +++ b/demo/README.md @@ -0,0 +1,5 @@ + +## Mask Transfiner Demo + +For visualization demo, please refer to our [visualization script](https://github.com/SysCV/transfiner#visualization). + diff --git a/demo/__pycache__/predictor.cpython-38.pyc b/demo/__pycache__/predictor.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5fed9aa41681040202f3708d1122e5240091b8a9 Binary files /dev/null and b/demo/__pycache__/predictor.cpython-38.pyc differ diff --git a/demo/demo.py b/demo/demo.py new file mode 100755 index 0000000000000000000000000000000000000000..a14dfb94c998bd3bfb650004a6fe1a23bf17eda3 --- /dev/null +++ b/demo/demo.py @@ -0,0 +1,190 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import argparse +import glob +import multiprocessing as mp +import numpy as np +import os +import tempfile +import time +import warnings +import cv2 +import tqdm + +from detectron2.config import get_cfg +from detectron2.data.detection_utils import read_image +from detectron2.utils.logger import setup_logger + +from predictor import VisualizationDemo + +# constants +WINDOW_NAME = "COCO detections" + + +def setup_cfg(args): + # load config from file and command-line arguments + cfg = get_cfg() + # To use demo for Panoptic-DeepLab, please uncomment the following two lines. + # from detectron2.projects.panoptic_deeplab import add_panoptic_deeplab_config # noqa + # add_panoptic_deeplab_config(cfg) + cfg.merge_from_file(args.config_file) + cfg.merge_from_list(args.opts) + # Set score_threshold for builtin models + cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold + cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold + cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold + cfg.freeze() + return cfg + + +def get_parser(): + parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs") + parser.add_argument( + "--config-file", + default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml", + metavar="FILE", + help="path to config file", + ) + parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.") + parser.add_argument("--video-input", help="Path to video file.") + parser.add_argument( + "--input", + nargs="+", + help="A list of space separated input images; " + "or a single glob pattern such as 'directory/*.jpg'", + ) + parser.add_argument( + "--output", + help="A file or directory to save output visualizations. " + "If not given, will show output in an OpenCV window.", + ) + + parser.add_argument( + "--confidence-threshold", + type=float, + default=0.5, + help="Minimum score for instance predictions to be shown", + ) + parser.add_argument( + "--opts", + help="Modify config options using the command-line 'KEY VALUE' pairs", + default=[], + nargs=argparse.REMAINDER, + ) + return parser + + +def test_opencv_video_format(codec, file_ext): + with tempfile.TemporaryDirectory(prefix="video_format_test") as dir: + filename = os.path.join(dir, "test_file" + file_ext) + writer = cv2.VideoWriter( + filename=filename, + fourcc=cv2.VideoWriter_fourcc(*codec), + fps=float(30), + frameSize=(10, 10), + isColor=True, + ) + [writer.write(np.zeros((10, 10, 3), np.uint8)) for _ in range(30)] + writer.release() + if os.path.isfile(filename): + return True + return False + + +if __name__ == "__main__": + mp.set_start_method("spawn", force=True) + args = get_parser().parse_args() + setup_logger(name="fvcore") + logger = setup_logger() + logger.info("Arguments: " + str(args)) + + cfg = setup_cfg(args) + + demo = VisualizationDemo(cfg) + + if args.input: + if len(args.input) == 1: + args.input = glob.glob(os.path.expanduser(args.input[0])) + assert args.input, "The input path(s) was not found" + for path in tqdm.tqdm(args.input, disable=not args.output): + # use PIL, to be consistent with evaluation + img = read_image(path, format="BGR") + start_time = time.time() + predictions, visualized_output = demo.run_on_image(img) + logger.info( + "{}: {} in {:.2f}s".format( + path, + "detected {} instances".format(len(predictions["instances"])) + if "instances" in predictions + else "finished", + time.time() - start_time, + ) + ) + + if args.output: + if os.path.isdir(args.output): + assert os.path.isdir(args.output), args.output + out_filename = os.path.join(args.output, os.path.basename(path)) + else: + #assert len(args.input) == 1, "Please specify a directory with args.output" + os.makedirs(args.output) + out_filename = os.path.join(args.output, os.path.basename(path)) + #out_filename = args.output + visualized_output.save(out_filename) + else: + cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) + cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1]) + if cv2.waitKey(0) == 27: + break # esc to quit + elif args.webcam: + assert args.input is None, "Cannot have both --input and --webcam!" + assert args.output is None, "output not yet supported with --webcam!" + cam = cv2.VideoCapture(0) + for vis in tqdm.tqdm(demo.run_on_video(cam)): + cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) + cv2.imshow(WINDOW_NAME, vis) + if cv2.waitKey(1) == 27: + break # esc to quit + cam.release() + cv2.destroyAllWindows() + elif args.video_input: + video = cv2.VideoCapture(args.video_input) + width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + frames_per_second = video.get(cv2.CAP_PROP_FPS) + num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) + basename = os.path.basename(args.video_input) + codec, file_ext = ( + ("x264", ".mkv") if test_opencv_video_format("x264", ".mkv") else ("mp4v", ".mp4") + ) + if codec == ".mp4v": + warnings.warn("x264 codec not available, switching to mp4v") + if args.output: + if os.path.isdir(args.output): + output_fname = os.path.join(args.output, basename) + output_fname = os.path.splitext(output_fname)[0] + file_ext + else: + output_fname = args.output + assert not os.path.isfile(output_fname), output_fname + output_file = cv2.VideoWriter( + filename=output_fname, + # some installation of opencv may not support x264 (due to its license), + # you can try other format (e.g. MPEG) + fourcc=cv2.VideoWriter_fourcc(*codec), + fps=float(frames_per_second), + frameSize=(width, height), + isColor=True, + ) + assert os.path.isfile(args.video_input) + for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames): + if args.output: + output_file.write(vis_frame) + else: + cv2.namedWindow(basename, cv2.WINDOW_NORMAL) + cv2.imshow(basename, vis_frame) + if cv2.waitKey(1) == 27: + break # esc to quit + video.release() + if args.output: + output_file.release() + else: + cv2.destroyAllWindows() diff --git a/demo/predictor.py b/demo/predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..7b7ebd3f846850172c1f560f8492d51e5667f76d --- /dev/null +++ b/demo/predictor.py @@ -0,0 +1,220 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import atexit +import bisect +import multiprocessing as mp +from collections import deque +import cv2 +import torch + +from detectron2.data import MetadataCatalog +from detectron2.engine.defaults import DefaultPredictor +from detectron2.utils.video_visualizer import VideoVisualizer +from detectron2.utils.visualizer import ColorMode, Visualizer + + +class VisualizationDemo(object): + def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False): + """ + Args: + cfg (CfgNode): + instance_mode (ColorMode): + parallel (bool): whether to run the model in different processes from visualization. + Useful since the visualization logic can be slow. + """ + self.metadata = MetadataCatalog.get( + cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused" + ) + self.cpu_device = torch.device("cpu") + self.instance_mode = instance_mode + + self.parallel = parallel + if parallel: + num_gpu = torch.cuda.device_count() + self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) + else: + self.predictor = DefaultPredictor(cfg) + + def run_on_image(self, image): + """ + Args: + image (np.ndarray): an image of shape (H, W, C) (in BGR order). + This is the format used by OpenCV. + + Returns: + predictions (dict): the output of the model. + vis_output (VisImage): the visualized image output. + """ + vis_output = None + predictions = self.predictor(image) + # Convert image from OpenCV BGR format to Matplotlib RGB format. + image = image[:, :, ::-1] + visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) + if "panoptic_seg" in predictions: + panoptic_seg, segments_info = predictions["panoptic_seg"] + vis_output = visualizer.draw_panoptic_seg_predictions( + panoptic_seg.to(self.cpu_device), segments_info + ) + else: + if "sem_seg" in predictions: + vis_output = visualizer.draw_sem_seg( + predictions["sem_seg"].argmax(dim=0).to(self.cpu_device) + ) + if "instances" in predictions: + instances = predictions["instances"].to(self.cpu_device) + vis_output = visualizer.draw_instance_predictions(predictions=instances) + + return predictions, vis_output + + def _frame_from_video(self, video): + while video.isOpened(): + success, frame = video.read() + if success: + yield frame + else: + break + + def run_on_video(self, video): + """ + Visualizes predictions on frames of the input video. + + Args: + video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be + either a webcam or a video file. + + Yields: + ndarray: BGR visualizations of each video frame. + """ + video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) + + def process_predictions(frame, predictions): + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + if "panoptic_seg" in predictions: + panoptic_seg, segments_info = predictions["panoptic_seg"] + vis_frame = video_visualizer.draw_panoptic_seg_predictions( + frame, panoptic_seg.to(self.cpu_device), segments_info + ) + elif "instances" in predictions: + predictions = predictions["instances"].to(self.cpu_device) + vis_frame = video_visualizer.draw_instance_predictions(frame, predictions) + elif "sem_seg" in predictions: + vis_frame = video_visualizer.draw_sem_seg( + frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device) + ) + + # Converts Matplotlib RGB format to OpenCV BGR format + vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) + return vis_frame + + frame_gen = self._frame_from_video(video) + if self.parallel: + buffer_size = self.predictor.default_buffer_size + + frame_data = deque() + + for cnt, frame in enumerate(frame_gen): + frame_data.append(frame) + self.predictor.put(frame) + + if cnt >= buffer_size: + frame = frame_data.popleft() + predictions = self.predictor.get() + yield process_predictions(frame, predictions) + + while len(frame_data): + frame = frame_data.popleft() + predictions = self.predictor.get() + yield process_predictions(frame, predictions) + else: + for frame in frame_gen: + yield process_predictions(frame, self.predictor(frame)) + + +class AsyncPredictor: + """ + A predictor that runs the model asynchronously, possibly on >1 GPUs. + Because rendering the visualization takes considerably amount of time, + this helps improve throughput a little bit when rendering videos. + """ + + class _StopToken: + pass + + class _PredictWorker(mp.Process): + def __init__(self, cfg, task_queue, result_queue): + self.cfg = cfg + self.task_queue = task_queue + self.result_queue = result_queue + super().__init__() + + def run(self): + predictor = DefaultPredictor(self.cfg) + + while True: + task = self.task_queue.get() + if isinstance(task, AsyncPredictor._StopToken): + break + idx, data = task + result = predictor(data) + self.result_queue.put((idx, result)) + + def __init__(self, cfg, num_gpus: int = 1): + """ + Args: + cfg (CfgNode): + num_gpus (int): if 0, will run on CPU + """ + num_workers = max(num_gpus, 1) + self.task_queue = mp.Queue(maxsize=num_workers * 3) + self.result_queue = mp.Queue(maxsize=num_workers * 3) + self.procs = [] + for gpuid in range(max(num_gpus, 1)): + cfg = cfg.clone() + cfg.defrost() + cfg.MODEL.DEVICE = "cuda:{}".format(gpuid) if num_gpus > 0 else "cpu" + self.procs.append( + AsyncPredictor._PredictWorker(cfg, self.task_queue, self.result_queue) + ) + + self.put_idx = 0 + self.get_idx = 0 + self.result_rank = [] + self.result_data = [] + + for p in self.procs: + p.start() + atexit.register(self.shutdown) + + def put(self, image): + self.put_idx += 1 + self.task_queue.put((self.put_idx, image)) + + def get(self): + self.get_idx += 1 # the index needed for this request + if len(self.result_rank) and self.result_rank[0] == self.get_idx: + res = self.result_data[0] + del self.result_data[0], self.result_rank[0] + return res + + while True: + # make sure the results are returned in the correct order + idx, res = self.result_queue.get() + if idx == self.get_idx: + return res + insert = bisect.bisect(self.result_rank, idx) + self.result_rank.insert(insert, idx) + self.result_data.insert(insert, res) + + def __len__(self): + return self.put_idx - self.get_idx + + def __call__(self, image): + self.put(image) + return self.get() + + def shutdown(self): + for _ in self.procs: + self.task_queue.put(AsyncPredictor._StopToken()) + + @property + def default_buffer_size(self): + return len(self.procs) * 5 diff --git a/demo/sample_imgs/000000008844.jpg b/demo/sample_imgs/000000008844.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d117937ec29e62d694bd6d2dc70eb41d9a92326c Binary files /dev/null and b/demo/sample_imgs/000000008844.jpg differ diff --git a/demo/sample_imgs/000000018737.jpg b/demo/sample_imgs/000000018737.jpg new file mode 100644 index 0000000000000000000000000000000000000000..340c394ff1398a1496c81855ff1128bbf8071842 Binary files /dev/null and b/demo/sample_imgs/000000018737.jpg differ diff --git a/demo/sample_imgs/000000126137.jpg b/demo/sample_imgs/000000126137.jpg new file mode 100644 index 0000000000000000000000000000000000000000..83c736e918992a085819d8a70103159b80c90998 Binary files /dev/null and b/demo/sample_imgs/000000126137.jpg differ diff --git a/demo/sample_imgs/000000131444.jpg b/demo/sample_imgs/000000131444.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d4f63ec0b4ce746eb0eba168eceacb0032d1aac3 Binary files /dev/null and b/demo/sample_imgs/000000131444.jpg differ diff --git a/demo/sample_imgs/000000132408.jpg b/demo/sample_imgs/000000132408.jpg new file mode 100644 index 0000000000000000000000000000000000000000..dac4b04c9fefe52341456fe400d56a3d6ccb367b Binary files /dev/null and b/demo/sample_imgs/000000132408.jpg differ diff --git a/demo/sample_imgs/000000157365.jpg b/demo/sample_imgs/000000157365.jpg new file mode 100644 index 0000000000000000000000000000000000000000..10e719bef58161855d280d7a0034491d12a382f0 Binary files /dev/null and b/demo/sample_imgs/000000157365.jpg differ diff --git a/demo/sample_imgs/000000176037.jpg b/demo/sample_imgs/000000176037.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0abf887fb73e89869f761c8046c9227bc5bb298a Binary files /dev/null and b/demo/sample_imgs/000000176037.jpg differ diff --git a/demo/sample_imgs/000000224200.jpg b/demo/sample_imgs/000000224200.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fc5b3de83c8b3861ec92ecef6263249ad7b11473 Binary files /dev/null and b/demo/sample_imgs/000000224200.jpg differ diff --git a/demo/sample_imgs/000000244019.jpg b/demo/sample_imgs/000000244019.jpg new file mode 100644 index 0000000000000000000000000000000000000000..54927eb0e93cf3b5ce55a33aa64f5dd36ebd1008 Binary files /dev/null and b/demo/sample_imgs/000000244019.jpg differ diff --git a/demo/sample_imgs/000000252776.jpg b/demo/sample_imgs/000000252776.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8b9dd0ef433ad232164a0c92c3414f49dffc6fec Binary files /dev/null and b/demo/sample_imgs/000000252776.jpg differ diff --git a/demo/sample_imgs/000000286849.jpg b/demo/sample_imgs/000000286849.jpg new file mode 100644 index 0000000000000000000000000000000000000000..12d9e147d759e2aeeb4e3903bc129157f71ac642 Binary files /dev/null and b/demo/sample_imgs/000000286849.jpg differ diff --git a/demo/sample_imgs/000000292997.jpg b/demo/sample_imgs/000000292997.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4d56af9492d02539b68805cb80c075d6efad63e3 Binary files /dev/null and b/demo/sample_imgs/000000292997.jpg differ diff --git a/demo/sample_imgs/000000321214.jpg b/demo/sample_imgs/000000321214.jpg new file mode 100644 index 0000000000000000000000000000000000000000..427cdf048ac5bb950bdf808e791e6a52477169b4 Binary files /dev/null and b/demo/sample_imgs/000000321214.jpg differ diff --git a/demo/sample_imgs/000000344909.jpg b/demo/sample_imgs/000000344909.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6323a743693f7d87c620888e5587edbf545f0f76 Binary files /dev/null and b/demo/sample_imgs/000000344909.jpg differ diff --git a/demo/sample_imgs/000000360661.jpg b/demo/sample_imgs/000000360661.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c90c058740466131082aed6fee6964cda04a4711 Binary files /dev/null and b/demo/sample_imgs/000000360661.jpg differ diff --git a/demo/sample_imgs/000000396903.jpg b/demo/sample_imgs/000000396903.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f10456ff9d60df5821d6427e672f9ffe51480d9b Binary files /dev/null and b/demo/sample_imgs/000000396903.jpg differ diff --git a/demo/sample_imgs/000000404922.jpg b/demo/sample_imgs/000000404922.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6595f7b259bbfeb5de8d8aa172254db8a0e56645 Binary files /dev/null and b/demo/sample_imgs/000000404922.jpg differ diff --git a/demo/sample_imgs/000000442836.jpg b/demo/sample_imgs/000000442836.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3e24da5924c518e34bc7c56dd7dc1404d58463b3 Binary files /dev/null and b/demo/sample_imgs/000000442836.jpg differ diff --git a/demo/sample_imgs/000000464144.jpg b/demo/sample_imgs/000000464144.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b59f7e4b7fd684e7d2b47b3ac9036fb592a5457d Binary files /dev/null and b/demo/sample_imgs/000000464144.jpg differ diff --git a/demo/sample_imgs/000000482477.jpg b/demo/sample_imgs/000000482477.jpg new file mode 100644 index 0000000000000000000000000000000000000000..98c5277b190faa54f12e85df99768bef255abfff Binary files /dev/null and b/demo/sample_imgs/000000482477.jpg differ diff --git a/demo/sample_imgs/000000495054.jpg b/demo/sample_imgs/000000495054.jpg new file mode 100644 index 0000000000000000000000000000000000000000..53ae52e0be22fbb426eeec63d14e85c5b2b9fab2 Binary files /dev/null and b/demo/sample_imgs/000000495054.jpg differ diff --git a/demo/sample_imgs/000000558073.jpg b/demo/sample_imgs/000000558073.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6d0a14a1b5b8765c0df91fde476f6a1d488a6b05 Binary files /dev/null and b/demo/sample_imgs/000000558073.jpg differ diff --git a/output_3x_transfiner_r101_deform.pth b/output_3x_transfiner_r101_deform.pth new file mode 100644 index 0000000000000000000000000000000000000000..78a66ab48160fc823ea4daaa93d19261c77f1ec9 --- /dev/null +++ b/output_3x_transfiner_r101_deform.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d5fefa123e70f379af846c979139e53e89b8c661b7285dd485e640fa86faead +size 294703133 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..ed14d209773f90b011767ba0a2ea47f9cfc497d4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +pyyaml==5.1 +torch==1.7.1 +torchvision==0.8.2 +opencv-python-headless +scikit-image +kornia==0.5.11 + +