jxie's picture
Upload SMAForSSL
e44c65c verified
{
"_name_or_path": null,
"architectures": [
"SMAForSSL"
],
"attention_dropout_prob": 0.0,
"cross_attention_widening_factor": 1,
"cross_eval_noising_args": null,
"cross_train_noising_args": [
[
"RandomlySelectedCrossAttentionMasking",
{
"exclude_seen_reconstruction": true,
"masking_ratio": 0.15,
"num_per_query": 4,
"varying_length": true
}
]
],
"decoder_attention_channels": 512,
"decoder_heads": 8,
"decoder_latent_channels": 512,
"decoder_type": "cross_attention",
"dense_use_bias": true,
"drop_path_rate": 0.0,
"embedded_channels": 512,
"encoder_cross_attention_channels": 256,
"encoder_type": "cross_attention",
"final_project": true,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.0,
"initializer_range": 0.02,
"input_channels": 3,
"input_type": "discrete",
"latent_channels": 1024,
"layer_norm_eps": 1e-12,
"layernorm_eps": 1e-12,
"loss_fn": "mse",
"max_position_embeddings": 1024,
"model_type": "sma",
"num_blocks": 1,
"num_cross_attention_heads": 8,
"num_discrete_tokens": 262,
"num_latents": 256,
"num_outputs": 1024,
"num_self_attends_per_block": 16,
"num_self_attention_heads": 8,
"output_channels": 262,
"pe_initializer_range": 0.02,
"post_decoder_layers": null,
"project_after_concat": true,
"qk_channels": 256,
"self_attention_widening_factor": 1,
"share_decoder_queries": true,
"share_embedding_weights": true,
"teacher_args": {
"auxiliary_loss_fn": "mse",
"auxiliary_loss_weight": 1.0,
"ema_args": {
"ema_decay_end": 0.0,
"ema_decay_start": 0.0
},
"eval_transform_args": null,
"mask_replace": 3,
"num_layer_target_avg": null,
"reconstruction_decoder_args": {
"num_heads": 1,
"num_outputs": 1024,
"output_channels": 262,
"qk_channels": 256,
"query_num_channels": 512,
"share_decoder_queries": true,
"share_embedding_weights": true,
"use_query_residual": true,
"v_channels": 512
},
"reconstruction_loss_fn": "crossentropy",
"reconstruction_loss_weight": 1.0,
"reconstruction_weighted_loss": false,
"target_normalization_fn": "layernorm",
"train_transform_args": null
},
"teacher_name": "ReconstructionTeacher",
"torch_dtype": "float32",
"transformers_version": "4.26.0.dev0",
"use_decoder": false,
"use_position_embeddings": true,
"use_query_residual": true,
"v_channels": 1024
}