Spaces:
Running
on
L4
Running
on
L4
pixel_mean: [0.485, 0.456, 0.406] | |
pixel_std: [0.229, 0.224, 0.225] | |
pixel_dim: 256 | |
key_dim: 64 | |
value_dim: 256 | |
sensory_dim: 256 | |
embed_dim: 256 | |
pixel_encoder: | |
type: resnet50 | |
ms_dims: [1024, 512, 256, 64, 3] # f16, f8, f4, f2, f1 | |
mask_encoder: | |
type: resnet18 | |
final_dim: 256 | |
pixel_pe_scale: 32 | |
pixel_pe_temperature: 128 | |
object_transformer: | |
embed_dim: ${model.embed_dim} | |
ff_dim: 2048 | |
num_heads: 8 | |
num_blocks: 3 | |
num_queries: 16 | |
read_from_pixel: | |
input_norm: False | |
input_add_pe: False | |
add_pe_to_qkv: [True, True, False] | |
read_from_past: | |
add_pe_to_qkv: [True, True, False] | |
read_from_memory: | |
add_pe_to_qkv: [True, True, False] | |
read_from_query: | |
add_pe_to_qkv: [True, True, False] | |
output_norm: False | |
query_self_attention: | |
add_pe_to_qkv: [True, True, False] | |
pixel_self_attention: | |
add_pe_to_qkv: [True, True, False] | |
object_summarizer: | |
embed_dim: ${model.object_transformer.embed_dim} | |
num_summaries: ${model.object_transformer.num_queries} | |
add_pe: True | |
aux_loss: | |
sensory: | |
enabled: True | |
weight: 0.01 | |
query: | |
enabled: True | |
weight: 0.01 | |
mask_decoder: | |
# first value must equal embed_dim | |
up_dims: [256, 128, 128, 64, 16] | |