File size: 3,672 Bytes
914926a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
{
  "data": {
    "augmentations": {
      "affine_p": 0.0,
      "blur_p": 0.5,
      "cut_p": 0.0,
      "flip_p": 0.5,
      "gamma_p": 1.0,
      "grayscale_p": 0.2,
      "invert_p": 0.0,
      "jitter_p": 1.0,
      "noise_pad": 1.0,
      "random_blur": 2.0,
      "random_gamma": 0.5,
      "random_jitter": 0.5,
      "random_rotation": 0.0,
      "random_scale": 4.0,
      "random_shear": 0.0,
      "random_translate_x": 0.04,
      "random_translate_y": 0.01,
      "rotation_p": 0.0,
      "scale_p": 0.0,
      "shape_mult": 14,
      "test_context": 1.0,
      "translate_p": 0.0
    },
    "crop": "garg",
    "data_root": "datasets",
    "image_shape": [
      518,
      518
    ],
    "mini": 1.0,
    "normalization": "imagenet",
    "num_frames": 1,
    "pair": 1,
    "resize_method": "contextcrop",
    "sampling": {
      "KITTI": 1.0
    },
    "shape_constraints": {
      "height_min": 15,
      "pixels_max": 600000.0,
      "pixels_min": 200000.0,
      "ratio_bounds": [
        0.5,
        2.5
      ],
      "sample": true,
      "shape_mult": 14,
      "width_min": 15
    },
    "train_datasets": [
      "KITTI"
    ],
    "val_datasets": [
      "KITTI"
    ]
  },
  "eps": 1e-06,
  "generic": {
    "deterministic": true,
    "name_page": "ufish",
    "seed": 42
  },
  "model": {
    "camera": {
      "augment": true,
      "tau": 50000,
      "weak_ratio": 0.9
    },
    "expansion": 4,
    "layer_scale": 0.0001,
    "name": "UniK3D",
    "num_heads": 8,
    "num_steps": 100000,
    "pixel_decoder": {
      "depths": [
        2,
        2,
        2
      ],
      "detach": 0.1,
      "dropout": 0.0,
      "hidden_dim": 256,
      "kernel_size": 3,
      "name": "Decoder",
      "num_prompt_blocks": 1,
      "out_dim": 32,
      "use_norm": false
    },
    "pixel_encoder": {
      "cls_token_embed_dims": [
        384,
        384,
        384,
        384,
        384,
        384,
        384,
        384,
        384,
        384,
        384,
        384
      ],
      "depths": [
        3,
        6,
        9,
        12
      ],
      "embed_dim": 384,
      "embed_dims": [
        384,
        384,
        384,
        384,
        384,
        384,
        384,
        384,
        384,
        384,
        384,
        384
      ],
      "freeze_norm": true,
      "frozen_stages": 0,
      "lr": 3e-06,
      "name": "dinov2_vits14",
      "num_register_tokens": 0,
      "output_idx": [
        3,
        6,
        9,
        12
      ],
      "pretrained": null,
      "stacking_fn": "last",
      "use_norm": true,
      "wd": 0.1
    }
  },
  "training": {
    "batch_size": 8,
    "clipping": 1.0,
    "cycle_beta": true,
    "drop_path": 0.0,
    "ema": 0.9995,
    "f16": "f16",
    "ld": 1.0,
    "losses": {
      "camera": {
        "alpha": 1.0,
        "dims": [
          1,
          2
        ],
        "fn": "l1",
        "gamma": 1.0,
        "input_fn": "linear",
        "name": "PolarRegression",
        "output_fn": "sqrt",
        "polar_asym": 0.7,
        "polar_weight": 3.0,
        "weight": 1.0
      },
      "confidence": {
        "input_fn": "log",
        "name": "Confidence",
        "output_fn": "sqrt",
        "weight": 0.1
      },
      "scale": {
        "alpha": 1.0,
        "fn": "l1",
        "gamma": 1.0,
        "input_fn": "log",
        "name": "Scale",
        "output_fn": "sqrt",
        "weight": 1.0
      }
    },
    "lr": 5e-05,
    "lr_final": 1e-06,
    "lr_warmup": 1.0,
    "n_iters": 250000,
    "nsteps_accumulation_gradient": 4,
    "validation_interval": 2500,
    "warmup_iters": 75000,
    "wd": 0.1,
    "wd_final": 0.1
  }
}