File size: 3,767 Bytes
a06fad0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# -*- coding: utf-8 -*-
from detectron2.config import CfgNode as CN


def add_kmax_deeplab_config(cfg):
    """
    Add config for KMAX_DEEPLAB.
    """
    # NOTE: configs from original maskformer
    # data config
    # select the dataset mapper
    cfg.INPUT.DATASET_MAPPER_NAME = "coco_panoptic_kmaxdeeplab"
    # Color augmentation
    # Pad image and segmentation GT in dataset mapper.
    cfg.INPUT.SIZE_DIVISIBILITY = -1

    # solver config
    # weight decay on embedding
    cfg.SOLVER.WEIGHT_DECAY_EMBED = 0.05
    # optimizer
    cfg.SOLVER.OPTIMIZER = "ADAMW"
    cfg.SOLVER.BACKBONE_MULTIPLIER = 0.1

    # kMaX-DeepLab model config
    cfg.MODEL.KMAX_DEEPLAB = CN()

    # whether to share matching results
    cfg.MODEL.KMAX_DEEPLAB.SHARE_FINAL_MATCHING = True

    # vis
    cfg.MODEL.KMAX_DEEPLAB.SAVE_VIS_NUM = 0

    # loss
    cfg.MODEL.KMAX_DEEPLAB.DEEP_SUPERVISION = True
    cfg.MODEL.KMAX_DEEPLAB.SKIP_CONN_INIT_VALUE = 0.0
    cfg.MODEL.KMAX_DEEPLAB.NO_OBJECT_WEIGHT = 1e-5
    cfg.MODEL.KMAX_DEEPLAB.CLASS_WEIGHT = 3.0
    cfg.MODEL.KMAX_DEEPLAB.DICE_WEIGHT = 3.0
    cfg.MODEL.KMAX_DEEPLAB.MASK_WEIGHT = 0.3
    cfg.MODEL.KMAX_DEEPLAB.INSDIS_WEIGHT = 1.0
    cfg.MODEL.KMAX_DEEPLAB.AUX_SEMANTIC_WEIGHT = 1.0

    cfg.MODEL.KMAX_DEEPLAB.PIXEL_INSDIS_TEMPERATURE = 1.5
    cfg.MODEL.KMAX_DEEPLAB.PIXEL_INSDIS_SAMPLE_K = 4096
    cfg.MODEL.KMAX_DEEPLAB.AUX_SEMANTIC_TEMPERATURE = 2.0
    cfg.MODEL.KMAX_DEEPLAB.UX_SEMANTIC_SAMPLE_K = 4096


    # pixel decoder config
    cfg.MODEL.KMAX_DEEPLAB.PIXEL_DEC = CN()
    cfg.MODEL.KMAX_DEEPLAB.PIXEL_DEC.NAME = "kMaXPixelDecoder"
    cfg.MODEL.KMAX_DEEPLAB.PIXEL_DEC.IN_FEATURES = ['res2', 'res3', 'res4', 'res5']
    cfg.MODEL.KMAX_DEEPLAB.PIXEL_DEC.DEC_LAYERS = [1, 5, 1, 1]
    cfg.MODEL.KMAX_DEEPLAB.PIXEL_DEC.LAYER_TYPES = ["axial", "axial", "bottleneck", "bottleneck"]
    cfg.MODEL.KMAX_DEEPLAB.PIXEL_DEC.DEC_CHANNELS = [512, 256, 128, 64]
    cfg.MODEL.KMAX_DEEPLAB.PIXEL_DEC.DROP_PATH_PROB = 0.0

    # transformer decoder config
    cfg.MODEL.KMAX_DEEPLAB.TRANS_DEC = CN()
    cfg.MODEL.KMAX_DEEPLAB.TRANS_DEC.NAME = "kMaXTransformerDecoder"
    cfg.MODEL.KMAX_DEEPLAB.TRANS_DEC.DEC_LAYERS = [2, 2, 2]
    cfg.MODEL.KMAX_DEEPLAB.TRANS_DEC.NUM_OBJECT_QUERIES = 128
    cfg.MODEL.KMAX_DEEPLAB.TRANS_DEC.IN_CHANNELS = [2048, 1024, 512]
    cfg.MODEL.KMAX_DEEPLAB.TRANS_DEC.DROP_PATH_PROB = 0.0

    # kMaX-DeepLab inference config
    cfg.MODEL.KMAX_DEEPLAB.TEST = CN()
    cfg.MODEL.KMAX_DEEPLAB.TEST.SEMANTIC_ON = False
    cfg.MODEL.KMAX_DEEPLAB.TEST.INSTANCE_ON = False
    cfg.MODEL.KMAX_DEEPLAB.TEST.PANOPTIC_ON = True
    cfg.MODEL.KMAX_DEEPLAB.TEST.OBJECT_MASK_THRESHOLD = 0.4
    cfg.MODEL.KMAX_DEEPLAB.TEST.CLASS_THRESHOLD_THING = 0.7
    cfg.MODEL.KMAX_DEEPLAB.TEST.CLASS_THRESHOLD_STUFF = 0.5
    cfg.MODEL.KMAX_DEEPLAB.TEST.REORDER_CLASS_WEIGHT = 1.0
    cfg.MODEL.KMAX_DEEPLAB.TEST.REORDER_MASK_WEIGHT = 1.0
    cfg.MODEL.KMAX_DEEPLAB.TEST.OVERLAP_THRESHOLD = 0.8
    cfg.MODEL.KMAX_DEEPLAB.TEST.SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE = False

    # Sometimes `backbone.size_divisibility` is set to 0 for some backbone (e.g. ResNet)
    # you can use this config to override
    cfg.MODEL.KMAX_DEEPLAB.SIZE_DIVISIBILITY = -1

    # https://github.com/SHI-Labs/OneFormer/blob/main/oneformer/config.py#L197
    cfg.MODEL.CONVNEXT = CN()
    cfg.MODEL.CONVNEXT.IN_CHANNELS = 3
    cfg.MODEL.CONVNEXT.DEPTHS = [3, 3, 27, 3]
    cfg.MODEL.CONVNEXT.DIMS = [192, 384, 768, 1536]
    cfg.MODEL.CONVNEXT.DROP_PATH_RATE = 0.6
    cfg.MODEL.CONVNEXT.LSIT = 1e-6
    cfg.MODEL.CONVNEXT.OUT_INDICES = [0, 1, 2, 3]
    cfg.MODEL.CONVNEXT.OUT_FEATURES = ["res2", "res3", "res4", "res5"]

    cfg.INPUT.IMAGE_SIZE = [1281, 1281]
    cfg.INPUT.MIN_SCALE = 0.2
    cfg.INPUT.MAX_SCALE = 2.0