MaureenZOU commited on
Commit
e972e1f
·
1 Parent(s): a74dbcb
This view is limited to 50 files because it contains too many changes.   See raw diff
.gitattributes CHANGED
@@ -32,3 +32,88 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ images/animals.png filter=lfs diff=lfs merge=lfs -text
36
+ images/region_retrieval.png filter=lfs diff=lfs merge=lfs -text
37
+ xdecoder_focalt_last_novg.pt filter=lfs diff=lfs merge=lfs -text
38
+ xdecoder_focalt_last.pt filter=lfs diff=lfs merge=lfs -text
39
+ v_emb.da filter=lfs diff=lfs merge=lfs -text
40
+ images/coco/077.jpg filter=lfs diff=lfs merge=lfs -text
41
+ images/coco/071.jpg filter=lfs diff=lfs merge=lfs -text
42
+ images/coco/022.jpg filter=lfs diff=lfs merge=lfs -text
43
+ images/coco/026.jpg filter=lfs diff=lfs merge=lfs -text
44
+ images/coco/036.jpg filter=lfs diff=lfs merge=lfs -text
45
+ images/coco/039.jpg filter=lfs diff=lfs merge=lfs -text
46
+ images/coco/052.jpg filter=lfs diff=lfs merge=lfs -text
47
+ images/coco/057.jpg filter=lfs diff=lfs merge=lfs -text
48
+ images/coco/061.jpg filter=lfs diff=lfs merge=lfs -text
49
+ images/coco/017.jpg filter=lfs diff=lfs merge=lfs -text
50
+ images/coco/021.jpg filter=lfs diff=lfs merge=lfs -text
51
+ images/coco/030.jpg filter=lfs diff=lfs merge=lfs -text
52
+ images/coco/056.jpg filter=lfs diff=lfs merge=lfs -text
53
+ images/coco/064.jpg filter=lfs diff=lfs merge=lfs -text
54
+ images/coco/072.jpg filter=lfs diff=lfs merge=lfs -text
55
+ images/coco/014.jpg filter=lfs diff=lfs merge=lfs -text
56
+ images/coco/025.jpg filter=lfs diff=lfs merge=lfs -text
57
+ images/coco/027.jpg filter=lfs diff=lfs merge=lfs -text
58
+ images/coco/038.jpg filter=lfs diff=lfs merge=lfs -text
59
+ images/coco/044.jpg filter=lfs diff=lfs merge=lfs -text
60
+ images/coco/049.jpg filter=lfs diff=lfs merge=lfs -text
61
+ images/coco/053.jpg filter=lfs diff=lfs merge=lfs -text
62
+ images/coco/078.jpg filter=lfs diff=lfs merge=lfs -text
63
+ images/coco/002.jpg filter=lfs diff=lfs merge=lfs -text
64
+ images/coco/005.jpg filter=lfs diff=lfs merge=lfs -text
65
+ images/coco/007.jpg filter=lfs diff=lfs merge=lfs -text
66
+ images/coco/008.jpg filter=lfs diff=lfs merge=lfs -text
67
+ images/coco/011.jpg filter=lfs diff=lfs merge=lfs -text
68
+ images/coco/013.jpg filter=lfs diff=lfs merge=lfs -text
69
+ images/coco/020.jpg filter=lfs diff=lfs merge=lfs -text
70
+ images/coco/034.jpg filter=lfs diff=lfs merge=lfs -text
71
+ images/coco/000.jpg filter=lfs diff=lfs merge=lfs -text
72
+ images/coco/066.jpg filter=lfs diff=lfs merge=lfs -text
73
+ images/coco/074.jpg filter=lfs diff=lfs merge=lfs -text
74
+ images/coco/065.jpg filter=lfs diff=lfs merge=lfs -text
75
+ images/coco/023.jpg filter=lfs diff=lfs merge=lfs -text
76
+ images/coco/024.jpg filter=lfs diff=lfs merge=lfs -text
77
+ images/coco/033.jpg filter=lfs diff=lfs merge=lfs -text
78
+ images/coco/040.jpg filter=lfs diff=lfs merge=lfs -text
79
+ images/coco/041.jpg filter=lfs diff=lfs merge=lfs -text
80
+ images/coco/046.jpg filter=lfs diff=lfs merge=lfs -text
81
+ images/coco/060.jpg filter=lfs diff=lfs merge=lfs -text
82
+ images/coco/003.jpg filter=lfs diff=lfs merge=lfs -text
83
+ images/coco/058.jpg filter=lfs diff=lfs merge=lfs -text
84
+ images/coco/073.jpg filter=lfs diff=lfs merge=lfs -text
85
+ images/coco/042.jpg filter=lfs diff=lfs merge=lfs -text
86
+ images/coco/015.jpg filter=lfs diff=lfs merge=lfs -text
87
+ images/coco/016.jpg filter=lfs diff=lfs merge=lfs -text
88
+ images/coco/018.jpg filter=lfs diff=lfs merge=lfs -text
89
+ images/coco/051.jpg filter=lfs diff=lfs merge=lfs -text
90
+ images/coco/054.jpg filter=lfs diff=lfs merge=lfs -text
91
+ images/coco/063.jpg filter=lfs diff=lfs merge=lfs -text
92
+ images/coco/010.jpg filter=lfs diff=lfs merge=lfs -text
93
+ images/coco/050.jpg filter=lfs diff=lfs merge=lfs -text
94
+ images/coco/070.jpg filter=lfs diff=lfs merge=lfs -text
95
+ images/coco/037.jpg filter=lfs diff=lfs merge=lfs -text
96
+ images/coco/031.jpg filter=lfs diff=lfs merge=lfs -text
97
+ images/coco/062.jpg filter=lfs diff=lfs merge=lfs -text
98
+ images/coco/067.jpg filter=lfs diff=lfs merge=lfs -text
99
+ images/coco/069.jpg filter=lfs diff=lfs merge=lfs -text
100
+ images/coco/029.jpg filter=lfs diff=lfs merge=lfs -text
101
+ images/coco/012.jpg filter=lfs diff=lfs merge=lfs -text
102
+ images/coco/068.jpg filter=lfs diff=lfs merge=lfs -text
103
+ images/coco/075.jpg filter=lfs diff=lfs merge=lfs -text
104
+ images/coco/006.jpg filter=lfs diff=lfs merge=lfs -text
105
+ images/coco/035.jpg filter=lfs diff=lfs merge=lfs -text
106
+ images/coco/001.jpg filter=lfs diff=lfs merge=lfs -text
107
+ images/coco/055.jpg filter=lfs diff=lfs merge=lfs -text
108
+ images/coco/048.jpg filter=lfs diff=lfs merge=lfs -text
109
+ images/coco/019.jpg filter=lfs diff=lfs merge=lfs -text
110
+ images/coco/028.jpg filter=lfs diff=lfs merge=lfs -text
111
+ images/coco/043.jpg filter=lfs diff=lfs merge=lfs -text
112
+ images/coco/059.jpg filter=lfs diff=lfs merge=lfs -text
113
+ images/coco/079.jpg filter=lfs diff=lfs merge=lfs -text
114
+ images/coco/004.jpg filter=lfs diff=lfs merge=lfs -text
115
+ images/coco/032.jpg filter=lfs diff=lfs merge=lfs -text
116
+ images/coco/045.jpg filter=lfs diff=lfs merge=lfs -text
117
+ images/coco/047.jpg filter=lfs diff=lfs merge=lfs -text
118
+ images/coco/076.jpg filter=lfs diff=lfs merge=lfs -text
119
+ images/coco/009.jpg filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # IntelliJ project files
2
+ .idea
3
+ *.iml
4
+ out
5
+ gen
6
+
7
+ ### Vim template
8
+ [._]*.s[a-w][a-z]
9
+ [._]s[a-w][a-z]
10
+ *.un~
11
+ Session.vim
12
+ .netrwhist
13
+ *~
14
+
15
+ ### IPythonNotebook template
16
+ # Temporary data
17
+ .ipynb_checkpoints/
18
+
19
+ ### Python template
20
+ # Byte-compiled / optimized / DLL files
21
+ __pycache__/
22
+ *.py[cod]
23
+ *$py.class
24
+
25
+ # C extensions
26
+ *.so
27
+
28
+ # Distribution / packaging
29
+ .Python
30
+ env/
31
+ build/
32
+ develop-eggs/
33
+ dist/
34
+ downloads/
35
+ eggs/
36
+ .eggs/
37
+ #lib/
38
+ #lib64/
39
+ parts/
40
+ sdist/
41
+ var/
42
+ *.egg-info/
43
+ .installed.cfg
44
+ *.egg
45
+
46
+ # PyInstaller
47
+ # Usually these files are written by a python script from a template
48
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
49
+ *.manifest
50
+ *.spec
51
+
52
+ # Installer logs
53
+ pip-log.txt
54
+ pip-delete-this-directory.txt
55
+
56
+ # Unit test / coverage reports
57
+ htmlcov/
58
+ .tox/
59
+ .coverage
60
+ .coverage.*
61
+ .cache
62
+ nosetests.xml
63
+ coverage.xml
64
+ *,cover
65
+
66
+ # Translations
67
+ *.mo
68
+ *.pot
69
+
70
+ # Django stuff:
71
+ *.log
72
+
73
+ # Sphinx documentation
74
+ docs/_build/
75
+
76
+ # PyBuilder
77
+ target/
78
+
79
+ *.ipynb
80
+ *.params
81
+ # *.json
82
+ .vscode/
83
+ *.code-workspace/
84
+
85
+ lib/pycocotools/_mask.c
86
+ lib/nms/cpu_nms.c
87
+
88
+ OUTPUT
89
+ OUTPUT/*
90
+ models/*
91
+ DATASET
92
+ DATASET/*
93
+ external/
94
+ MODELS
95
+ MODELS/*
96
+ gradio_cached_examples/*
97
+
98
+ kill.sh
99
+
100
+ draws/
101
+ plot/
102
+
103
+ *venv/*
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
- title: Demo
3
- emoji: 🐢
4
  colorFrom: purple
5
- colorTo: red
6
  sdk: gradio
7
- sdk_version: 3.15.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: X Decoder
3
+ emoji: 📈
4
  colorFrom: purple
5
+ colorTo: gray
6
  sdk: gradio
7
+ sdk_version: 3.14.0
8
  app_file: app.py
9
  pinned: false
10
+ license: afl-3.0
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
__init__.py ADDED
File without changes
app.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --------------------------------------------------------
2
+ # X-Decoder -- Generalized Decoding for Pixel, Image, and Language
3
+ # Copyright (c) 2022 Microsoft
4
+ # Licensed under The MIT License [see LICENSE for details]
5
+ # Written by Xueyan Zou ([email protected]), Jianwei Yang ([email protected])
6
+ # --------------------------------------------------------
7
+
8
+ import os
9
+ os.system("python -m pip install git+https://github.com/MaureenZOU/detectron2-xyz.git")
10
+
11
+ import gradio as gr
12
+ import torch
13
+ import argparse
14
+
15
+ from xdecoder.BaseModel import BaseModel
16
+ from xdecoder import build_model
17
+ from utils.distributed import init_distributed
18
+ from utils.arguments import load_opt_from_config_files
19
+
20
+ from tasks import *
21
+
22
+ def parse_option():
23
+ parser = argparse.ArgumentParser('X-Decoder All-in-One Demo', add_help=False)
24
+ parser.add_argument('--conf_files', default="configs/xdecoder/svlp_focalt_lang.yaml", metavar="FILE", help='path to config file', )
25
+ args = parser.parse_args()
26
+
27
+ return args
28
+
29
+ '''
30
+ build args
31
+ '''
32
+ args = parse_option()
33
+ opt = load_opt_from_config_files(args.conf_files)
34
+ opt = init_distributed(opt)
35
+
36
+ # META DATA
37
+ pretrained_pth_last = os.path.join("xdecoder_focalt_last.pt")
38
+ pretrained_pth_novg = os.path.join("xdecoder_focalt_last_novg.pt")
39
+
40
+ if not os.path.exists(pretrained_pth_last):
41
+ os.system("wget {}".format("https://projects4jw.blob.core.windows.net/x-decoder/release/xdecoder_focalt_last.pt"))
42
+
43
+ if not os.path.exists(pretrained_pth_novg):
44
+ os.system("wget {}".format("https://projects4jw.blob.core.windows.net/x-decoder/release/xdecoder_focalt_last_novg.pt"))
45
+
46
+
47
+ '''
48
+ build model
49
+ '''
50
+ model_last = BaseModel(opt, build_model(opt)).from_pretrained(pretrained_pth_last).eval().cuda()
51
+ model_cap = BaseModel(opt, build_model(opt)).from_pretrained(pretrained_pth_novg).eval().cuda()
52
+
53
+ with torch.no_grad():
54
+ model_last.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(["background", "background"], is_eval=True)
55
+ model_cap.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(["background", "background"], is_eval=True)
56
+
57
+ '''
58
+ inference model
59
+ '''
60
+
61
+ @torch.no_grad()
62
+ def inference(image, task, *args, **kwargs):
63
+ image = image.convert("RGB")
64
+ with torch.autocast(device_type='cuda', dtype=torch.float16):
65
+ if task == 'Referring Inpainting':
66
+ return referring_inpainting(model_last, image, *args, **kwargs)
67
+ elif task == 'Referring Segmentation':
68
+ return referring_segmentation(model_last, image, *args, **kwargs)
69
+ elif task == 'Open Vocabulary Semantic Segmentation':
70
+ return open_semseg(model_last, image, *args, **kwargs)
71
+ elif task == 'Open Vocabulary Panoptic Segmentation':
72
+ return open_panoseg(model_last, image, *args, **kwargs)
73
+ elif task == 'Open Vocabulary Instance Segmentation':
74
+ return open_instseg(model_last, image, *args, **kwargs)
75
+ elif task == 'Image Captioning':
76
+ return image_captioning(model_cap, image, *args, **kwargs)
77
+ elif task == 'Referring Captioning (Beta)':
78
+ return referring_captioning([model_last, model_cap], image, *args, **kwargs)
79
+ elif task == 'Text Retrieval':
80
+ return text_retrieval(model_cap, image, *args, **kwargs)
81
+ elif task == 'Image/Region Retrieval (Only Support Exampled 80 images)':
82
+ return region_retrieval([model_cap, model_last], image, *args, **kwargs)
83
+
84
+ '''
85
+ launch app
86
+ '''
87
+ title = "X-Decoder All-in-One Demo"
88
+ description = "<p style='text-align: center'> <a href='' target='_blank'>Project Page</a> | <a href='' target='_blank'>Paper</a> | <a href='https://github.com/microsoft/X-Decoder' target='_blank'>Github Repo</a> | <a href='' target='_blank'>Video</a> </p>"
89
+ article = "The Demo is Run on X-Decoder (Focal-T)."
90
+
91
+ inputs = [gr.inputs.Image(type='pil'), gr.inputs.Radio(choices=["Referring Segmentation", 'Open Vocabulary Semantic Segmentation','Open Vocabulary Instance Segmentation', "Open Vocabulary Panoptic Segmentation", "Image Captioning", "Text Retrieval", "Referring Inpainting", "Referring Captioning (Beta)", "Image/Region Retrieval (Only Support Exampled 80 images)"], type="value", default="OpenVocab Semantic Segmentation", label="Task"), gr.Textbox(label="xdecoder_text"), gr.Textbox(label="inpainting_text"), gr.Textbox(label="task_description")]
92
+ gr.Interface(
93
+ fn=inference,
94
+ inputs=inputs,
95
+ outputs=[
96
+ gr.outputs.Image(
97
+ type="pil",
98
+ label="segmentation results"),
99
+ gr.Textbox(label="text restuls"),
100
+ gr.outputs.Image(
101
+ type="pil",
102
+ label="inpainting results"),
103
+ ],
104
+ examples=[
105
+ ["./images/fruit.jpg", "Referring Segmentation", "The larger watermelon.,The front white flower.,White tea pot.,Flower bunch.,white vase.,The peach on the left.,The brown knife.", '', 'Format: s,s,s'],
106
+ ["./images/animals.png", "Open Vocabulary Semantic Segmentation", "zebra,antelope,giraffe,ostrich,sky,water,grass,sand,tree", '', 'Format: x,x,x'],
107
+ ["./images/street.jpg", "Open Vocabulary Panoptic Segmentation", "stuff:building,sky,street,tree,rock,sidewalk;thing:car,person,traffic light", '', 'Format: stuff:x,x,x;thing:y,y,y'],
108
+ ["./images/owls.jpeg", "Open Vocabulary Instance Segmentation", "owl", '', 'Format: y,y,y'],
109
+ ["./images/mountain.jpeg", "Image Captioning", "", '', ''],
110
+ ["./images/rose.webp", "Text Retrieval", "lily,rose,peoney,tulip", '', 'Format: s,s,s'],
111
+ ["./images/region_retrieval.png", "Image/Region Retrieval (Only Support Exampled 80 images)", "The tangerine on the plate.", '', 'Please describe the object in a detailed way.'],
112
+ ["./images/landscape.jpg", "Referring Captioning (Beta)", "cloud", '', 'Please fill in a noun/noun phrase. (may start with a/the)'],
113
+ ["./images/apples.jpg", "Referring Inpainting", "a yellow apple", 'a pear', 'x-decoder + ldm (inference takes ~40s.)'],
114
+ ],
115
+ title=title,
116
+ description=description,
117
+ article=article,
118
+ allow_flagging='never',
119
+ cache_examples=True,
120
+ ).launch(share=True)
configs/xdecoder/svlp_focalt_lang.yaml ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --------------------------------------------------------
2
+ # X-Decoder -- Generalized Decoding for Pixel, Image, and Language
3
+ # Copyright (c) 2022 Microsoft
4
+ # Licensed under The MIT License [see LICENSE for details]
5
+ # Written by Xueyan Zou ([email protected])
6
+ # --------------------------------------------------------
7
+
8
+ ##################
9
+ # Task settings
10
+ ##################
11
+ VERBOSE: true
12
+ MODEL:
13
+ NAME: xdecoder_model
14
+ HEAD: xdecoder_head
15
+ DIM_PROJ: 512
16
+ BACKBONE_DIM: 768
17
+ TEXT:
18
+ ARCH: vlpencoder
19
+ NAME: transformer
20
+ TOKENIZER: clip
21
+ CONTEXT_LENGTH: 77 # 77
22
+ WIDTH: 512
23
+ HEADS: 8
24
+ LAYERS: 12 # 6
25
+ AUTOGRESSIVE: True
26
+ BACKBONE:
27
+ NAME: focal_dw
28
+ PRETRAINED: ''
29
+ LOAD_PRETRAINED: false
30
+ FOCAL:
31
+ PRETRAIN_IMG_SIZE: 224
32
+ PATCH_SIZE: 4
33
+ EMBED_DIM: 96
34
+ DEPTHS: [2, 2, 6, 2]
35
+ FOCAL_LEVELS: [3, 3, 3, 3]
36
+ FOCAL_WINDOWS: [3, 3, 3, 3]
37
+ DROP_PATH_RATE: 0.3
38
+ MLP_RATIO: 4.0
39
+ DROP_RATE: 0.0
40
+ PATCH_NORM: True
41
+ USE_CONV_EMBED: True
42
+ SCALING_MODULATOR: True
43
+ USE_CHECKPOINT: False
44
+ USE_POSTLN: true
45
+ USE_POSTLN_IN_MODULATION: false
46
+ USE_LAYERSCALE: True
47
+ OUT_FEATURES: ["res2", "res3", "res4", "res5"]
48
+ OUT_INDICES: [0, 1, 2, 3]
49
+ ENCODER:
50
+ NAME: transformer_encoder_fpn
51
+ IGNORE_VALUE: 255
52
+ NUM_CLASSES: 133
53
+ LOSS_WEIGHT: 1.0
54
+ CONVS_DIM: 512
55
+ MASK_DIM: 512
56
+ NORM: "GN"
57
+ IN_FEATURES: ["res2", "res3", "res4", "res5"]
58
+ DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["res3", "res4", "res5"]
59
+ COMMON_STRIDE: 4
60
+ TRANSFORMER_ENC_LAYERS: 6
61
+ DECODER:
62
+ NAME: xdecoder
63
+ TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder"
64
+ MASK: True
65
+ GROUNDING:
66
+ ENABLED: True
67
+ MAX_LEN: 5
68
+ TEXT_WEIGHT: 2.0
69
+ CLASS_WEIGHT: 0.5
70
+ DETECTION: False
71
+ CAPTION:
72
+ ENABLED: True
73
+ PHRASE_PROB: 0.0
74
+ SIM_THRES: 0.95
75
+ CAPTIONING:
76
+ ENABLED: True
77
+ STEP: 50
78
+ RETRIEVAL:
79
+ ENABLED: True
80
+ DIM_IMG: 768
81
+ ENSEMBLE: True
82
+ HIDDEN_DIM: 512
83
+ NUM_OBJECT_QUERIES: 101
84
+ NHEADS: 8
85
+ DROPOUT: 0.0
86
+ DIM_FEEDFORWARD: 2048
87
+ PRE_NORM: False
88
+ ENFORCE_INPUT_PROJ: False
89
+ SIZE_DIVISIBILITY: 32
90
+ TRAIN_NUM_POINTS: 12544
91
+ OVERSAMPLE_RATIO: 3.0
92
+ IMPORTANCE_SAMPLE_RATIO: 0.75
93
+ DEC_LAYERS: 10 # 9 decoder layers, add one for the loss on learnable query
94
+ TOP_GROUNDING_LAYERS: 3
95
+ TOP_CAPTION_LAYERS: 3
96
+ TOP_CAPTIONING_LAYERS: 3
97
+ TOP_RETRIEVAL_LAYERS: 3
98
+ TOP_OPENIMAGE_LAYERS: 10
99
+ TEST:
100
+ SEMANTIC_ON: True
101
+ INSTANCE_ON: True
102
+ PANOPTIC_ON: True
103
+ OVERLAP_THRESHOLD: 0.8
104
+ OBJECT_MASK_THRESHOLD: 0.4
105
+ SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE: false
106
+ DETECTIONS_PER_IMAGE: 100
107
+
108
+ INPUT:
109
+ PIXEL_MEAN: [123.675, 116.280, 103.530]
110
+ PIXEL_STD: [58.395, 57.120, 57.375]
images/apples.jpg ADDED
images/coco/000.jpg ADDED

Git LFS Details

  • SHA256: d0fc594a75f8407f6595501b86bd98bd8c3a245a124c3dcfb17fc77e6b2e5aef
  • Pointer size: 131 Bytes
  • Size of remote file: 192 kB
images/coco/001.jpg ADDED

Git LFS Details

  • SHA256: 380992fb43f611e1e4c7abb763f6c3178dd16ba647f20d428da8e9334ecfe4ad
  • Pointer size: 131 Bytes
  • Size of remote file: 178 kB
images/coco/002.jpg ADDED

Git LFS Details

  • SHA256: 9d79b9739c0ebb2089e3475b9a664ebd982a1b6ed2d93739590a5bbc1f5bbd5e
  • Pointer size: 131 Bytes
  • Size of remote file: 175 kB
images/coco/003.jpg ADDED

Git LFS Details

  • SHA256: d0ed66b03146c8c001b308d9ba601731ef6a2684b33452fac162b5160ad424c5
  • Pointer size: 130 Bytes
  • Size of remote file: 67.1 kB
images/coco/004.jpg ADDED

Git LFS Details

  • SHA256: f5e6d63081ba8fbb2a59770144d7c2350de6a1a8288e15565df7dd1bd87d85bf
  • Pointer size: 131 Bytes
  • Size of remote file: 327 kB
images/coco/005.jpg ADDED

Git LFS Details

  • SHA256: 74411e93658f68edd930e9e665a8cfabb542425dd63016303e7e23f4f895aaca
  • Pointer size: 131 Bytes
  • Size of remote file: 139 kB
images/coco/006.jpg ADDED

Git LFS Details

  • SHA256: 8c1e01146df6e51a43b12bedd7913834d5ed8f91031924a964e915a74cc9f72d
  • Pointer size: 131 Bytes
  • Size of remote file: 152 kB
images/coco/007.jpg ADDED

Git LFS Details

  • SHA256: e55b86a1cda874117ba1bd855e38dbc9b5427b840c362d72330019676ec47a44
  • Pointer size: 131 Bytes
  • Size of remote file: 153 kB
images/coco/008.jpg ADDED

Git LFS Details

  • SHA256: 3b3ed6a27e07126c2175d168405a9bffe60b0f03555d0ac4b33ec0788bd745f2
  • Pointer size: 131 Bytes
  • Size of remote file: 155 kB
images/coco/009.jpg ADDED

Git LFS Details

  • SHA256: d90d4573a2b3d32f7ce3d704cb81b816f42a6326c59e935c6fa471dccc636718
  • Pointer size: 131 Bytes
  • Size of remote file: 247 kB
images/coco/010.jpg ADDED

Git LFS Details

  • SHA256: 23acc13dba72469607a7e61b6c2fd6e0e2c73dfa33fff340424bd6c805ac8cfc
  • Pointer size: 131 Bytes
  • Size of remote file: 132 kB
images/coco/011.jpg ADDED

Git LFS Details

  • SHA256: 74e9d13ddccbc52d2fd9da5984279a6218b250668e1ae1c6450be9d1ca2bc41a
  • Pointer size: 131 Bytes
  • Size of remote file: 240 kB
images/coco/012.jpg ADDED

Git LFS Details

  • SHA256: bf4c36daca41208fe9b7c3ae1cd37aade17f6aaf76b716a4b240171990458d74
  • Pointer size: 131 Bytes
  • Size of remote file: 284 kB
images/coco/013.jpg ADDED

Git LFS Details

  • SHA256: 1febd4177cfc04323204378f72499a03760127e3b524e09f30cf3c4b5f1b5f81
  • Pointer size: 131 Bytes
  • Size of remote file: 151 kB
images/coco/014.jpg ADDED

Git LFS Details

  • SHA256: 63129a14da61b57cbcd9b3403bc76a73d091e1940e1b416ae9f5c70b7a1dbb54
  • Pointer size: 131 Bytes
  • Size of remote file: 130 kB
images/coco/015.jpg ADDED

Git LFS Details

  • SHA256: b896f9d8d31a98765a0e15dfec38f7f036ba71b2af8330b4f7a600fb5294aa98
  • Pointer size: 131 Bytes
  • Size of remote file: 225 kB
images/coco/016.jpg ADDED

Git LFS Details

  • SHA256: cb4972816eb01ebee8f3bde47a4993972951671e990dca797ad0fa9e0f385c11
  • Pointer size: 131 Bytes
  • Size of remote file: 231 kB
images/coco/017.jpg ADDED

Git LFS Details

  • SHA256: 8c57b1f9042b59d09a82cfea238c0f11bca7ed54ad57a52c5a24b7b59c7b556e
  • Pointer size: 131 Bytes
  • Size of remote file: 213 kB
images/coco/018.jpg ADDED

Git LFS Details

  • SHA256: 40e469d78ea8f94bdb8d8ed4c04351b49e46593e2c7b6db0433a72e6b1b2bd2f
  • Pointer size: 131 Bytes
  • Size of remote file: 115 kB
images/coco/019.jpg ADDED

Git LFS Details

  • SHA256: 93aab01b240687e18da883e5ec325e4bbcd166519a557369f0eb59c07e346610
  • Pointer size: 131 Bytes
  • Size of remote file: 127 kB
images/coco/020.jpg ADDED

Git LFS Details

  • SHA256: 068ee6be7441d2a84dbd61eceedd8baff651c4058704b504072f18c755203bdc
  • Pointer size: 131 Bytes
  • Size of remote file: 172 kB
images/coco/021.jpg ADDED

Git LFS Details

  • SHA256: acd7784c7a33da47014e5c55118789be664617e8028a8d89ada6b59b9212db25
  • Pointer size: 131 Bytes
  • Size of remote file: 240 kB
images/coco/022.jpg ADDED

Git LFS Details

  • SHA256: bf59212be5221a26ae41a0e3e05adc902986b029d00b89d0c68760991a322053
  • Pointer size: 131 Bytes
  • Size of remote file: 191 kB
images/coco/023.jpg ADDED

Git LFS Details

  • SHA256: 741146235c81e978c8dae22c59159c0762a9d75fad9dbbe3511d3ec62196adca
  • Pointer size: 131 Bytes
  • Size of remote file: 136 kB
images/coco/024.jpg ADDED

Git LFS Details

  • SHA256: 5f68d0c7c06169c086a71966090c325af62cd9f13b8e60967b5559f0fc24268c
  • Pointer size: 131 Bytes
  • Size of remote file: 164 kB
images/coco/025.jpg ADDED

Git LFS Details

  • SHA256: 27781696bf9427875689f4a8f5e7a517a04824fe935d3698552e8f57099b75a4
  • Pointer size: 131 Bytes
  • Size of remote file: 170 kB
images/coco/026.jpg ADDED

Git LFS Details

  • SHA256: c15957d6f41d5a02ecf30c6aa10c146ce6982abf540ef8493523336f0f26b659
  • Pointer size: 131 Bytes
  • Size of remote file: 170 kB
images/coco/027.jpg ADDED

Git LFS Details

  • SHA256: 0a80f5f2ab54c11477641ffa731a8afdcb5309d70bf2eb18c4c834b2bca90971
  • Pointer size: 131 Bytes
  • Size of remote file: 101 kB
images/coco/028.jpg ADDED

Git LFS Details

  • SHA256: 24fdb66de0ab05eab30ceeceba3b4725084e075b8e40d91bb9d28201cd70b484
  • Pointer size: 131 Bytes
  • Size of remote file: 164 kB
images/coco/029.jpg ADDED

Git LFS Details

  • SHA256: 74b8025dfee410ebeb229b7bc355745eb0013b3c199d87ba70c369d3abed4b2c
  • Pointer size: 130 Bytes
  • Size of remote file: 66.3 kB
images/coco/030.jpg ADDED

Git LFS Details

  • SHA256: 05f27bd1f910d75402f951863cd5a0743ae462c917dc7d0e1611db5a98b3fef5
  • Pointer size: 130 Bytes
  • Size of remote file: 92 kB
images/coco/031.jpg ADDED

Git LFS Details

  • SHA256: 4bb0d5850695a09964d7f111ba8bf70233fd4798332aae8add1648ef736a1de3
  • Pointer size: 131 Bytes
  • Size of remote file: 133 kB
images/coco/032.jpg ADDED

Git LFS Details

  • SHA256: 3c00136461e4776e5464555c425ae9f2e6cabaf8de8df74b3b46df82b4eb56d6
  • Pointer size: 130 Bytes
  • Size of remote file: 67.9 kB
images/coco/033.jpg ADDED

Git LFS Details

  • SHA256: ce4fc4045c9dbe6d5c321248c3d8af821036e2403c6d159d3745c75f6f6bc9e1
  • Pointer size: 131 Bytes
  • Size of remote file: 106 kB
images/coco/034.jpg ADDED

Git LFS Details

  • SHA256: 56fa45f1e2d627d0bf4f4f0e6c133659783d22b0a56979227f803bfd3056b153
  • Pointer size: 131 Bytes
  • Size of remote file: 109 kB
images/coco/035.jpg ADDED

Git LFS Details

  • SHA256: c03edd46968d1ffc56d0402853f072e3e02798a01b549d345e4875974348c05e
  • Pointer size: 131 Bytes
  • Size of remote file: 169 kB
images/coco/036.jpg ADDED

Git LFS Details

  • SHA256: 9a6d9379fd7feb77987a43d1ce0e304e2eb5dd7f28c2a818fd8baec5ed4cb700
  • Pointer size: 130 Bytes
  • Size of remote file: 67.2 kB
images/coco/037.jpg ADDED

Git LFS Details

  • SHA256: e5c2df473a26427ae57950acec86d1e4d3a49cdf1a18d427cd1a354465408f00
  • Pointer size: 131 Bytes
  • Size of remote file: 103 kB
images/coco/038.jpg ADDED

Git LFS Details

  • SHA256: 261ba1b81c768f47b8452cd8fc7866a354a69c336a927032486d129b8684eb44
  • Pointer size: 130 Bytes
  • Size of remote file: 98.9 kB
images/coco/039.jpg ADDED

Git LFS Details

  • SHA256: 9b9b4956016c3e25d300d53681a25117bb093d031944f0bbdad6d2d98613a88e
  • Pointer size: 131 Bytes
  • Size of remote file: 113 kB
images/coco/040.jpg ADDED

Git LFS Details

  • SHA256: 41dd62010a54964537df22befdf97aab83979ffc406c89909f8eac8075841497
  • Pointer size: 131 Bytes
  • Size of remote file: 257 kB
images/coco/041.jpg ADDED

Git LFS Details

  • SHA256: f35471cf97958cfcdab03f925210a9fb37636d9508174e4077a767c628d45cfa
  • Pointer size: 131 Bytes
  • Size of remote file: 144 kB
images/coco/042.jpg ADDED

Git LFS Details

  • SHA256: 871e678893ac7fcdaafe3870900985bbf75707579ab3966a2c7690543d2f68b0
  • Pointer size: 131 Bytes
  • Size of remote file: 219 kB