Spaces:
Running
on
L4
Running
on
L4
File size: 1,202 Bytes
dcc8c59 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
pixel_mean: [0.485, 0.456, 0.406]
pixel_std: [0.229, 0.224, 0.225]
pixel_dim: 256
key_dim: 64
value_dim: 256
sensory_dim: 256
embed_dim: 256
pixel_encoder:
type: resnet50
ms_dims: [1024, 512, 256, 64, 3] # f16, f8, f4, f2, f1
mask_encoder:
type: resnet18
final_dim: 256
pixel_pe_scale: 32
pixel_pe_temperature: 128
object_transformer:
embed_dim: ${model.embed_dim}
ff_dim: 2048
num_heads: 8
num_blocks: 3
num_queries: 16
read_from_pixel:
input_norm: False
input_add_pe: False
add_pe_to_qkv: [True, True, False]
read_from_past:
add_pe_to_qkv: [True, True, False]
read_from_memory:
add_pe_to_qkv: [True, True, False]
read_from_query:
add_pe_to_qkv: [True, True, False]
output_norm: False
query_self_attention:
add_pe_to_qkv: [True, True, False]
pixel_self_attention:
add_pe_to_qkv: [True, True, False]
object_summarizer:
embed_dim: ${model.object_transformer.embed_dim}
num_summaries: ${model.object_transformer.num_queries}
add_pe: True
aux_loss:
sensory:
enabled: True
weight: 0.01
query:
enabled: True
weight: 0.01
mask_decoder:
# first value must equal embed_dim
up_dims: [256, 128, 128, 64, 16]
|