unik3d-vits / config.json
lpiccinelli's picture
Push model using huggingface_hub.
914926a verified
{
"data": {
"augmentations": {
"affine_p": 0.0,
"blur_p": 0.5,
"cut_p": 0.0,
"flip_p": 0.5,
"gamma_p": 1.0,
"grayscale_p": 0.2,
"invert_p": 0.0,
"jitter_p": 1.0,
"noise_pad": 1.0,
"random_blur": 2.0,
"random_gamma": 0.5,
"random_jitter": 0.5,
"random_rotation": 0.0,
"random_scale": 4.0,
"random_shear": 0.0,
"random_translate_x": 0.04,
"random_translate_y": 0.01,
"rotation_p": 0.0,
"scale_p": 0.0,
"shape_mult": 14,
"test_context": 1.0,
"translate_p": 0.0
},
"crop": "garg",
"data_root": "datasets",
"image_shape": [
518,
518
],
"mini": 1.0,
"normalization": "imagenet",
"num_frames": 1,
"pair": 1,
"resize_method": "contextcrop",
"sampling": {
"KITTI": 1.0
},
"shape_constraints": {
"height_min": 15,
"pixels_max": 600000.0,
"pixels_min": 200000.0,
"ratio_bounds": [
0.5,
2.5
],
"sample": true,
"shape_mult": 14,
"width_min": 15
},
"train_datasets": [
"KITTI"
],
"val_datasets": [
"KITTI"
]
},
"eps": 1e-06,
"generic": {
"deterministic": true,
"name_page": "ufish",
"seed": 42
},
"model": {
"camera": {
"augment": true,
"tau": 50000,
"weak_ratio": 0.9
},
"expansion": 4,
"layer_scale": 0.0001,
"name": "UniK3D",
"num_heads": 8,
"num_steps": 100000,
"pixel_decoder": {
"depths": [
2,
2,
2
],
"detach": 0.1,
"dropout": 0.0,
"hidden_dim": 256,
"kernel_size": 3,
"name": "Decoder",
"num_prompt_blocks": 1,
"out_dim": 32,
"use_norm": false
},
"pixel_encoder": {
"cls_token_embed_dims": [
384,
384,
384,
384,
384,
384,
384,
384,
384,
384,
384,
384
],
"depths": [
3,
6,
9,
12
],
"embed_dim": 384,
"embed_dims": [
384,
384,
384,
384,
384,
384,
384,
384,
384,
384,
384,
384
],
"freeze_norm": true,
"frozen_stages": 0,
"lr": 3e-06,
"name": "dinov2_vits14",
"num_register_tokens": 0,
"output_idx": [
3,
6,
9,
12
],
"pretrained": null,
"stacking_fn": "last",
"use_norm": true,
"wd": 0.1
}
},
"training": {
"batch_size": 8,
"clipping": 1.0,
"cycle_beta": true,
"drop_path": 0.0,
"ema": 0.9995,
"f16": "f16",
"ld": 1.0,
"losses": {
"camera": {
"alpha": 1.0,
"dims": [
1,
2
],
"fn": "l1",
"gamma": 1.0,
"input_fn": "linear",
"name": "PolarRegression",
"output_fn": "sqrt",
"polar_asym": 0.7,
"polar_weight": 3.0,
"weight": 1.0
},
"confidence": {
"input_fn": "log",
"name": "Confidence",
"output_fn": "sqrt",
"weight": 0.1
},
"scale": {
"alpha": 1.0,
"fn": "l1",
"gamma": 1.0,
"input_fn": "log",
"name": "Scale",
"output_fn": "sqrt",
"weight": 1.0
}
},
"lr": 5e-05,
"lr_final": 1e-06,
"lr_warmup": 1.0,
"n_iters": 250000,
"nsteps_accumulation_gradient": 4,
"validation_interval": 2500,
"warmup_iters": 75000,
"wd": 0.1,
"wd_final": 0.1
}
}