jxie
/

sma-physics-pretrained

Transformers

PyTorch

sma

Inference Endpoints

Model card Files Files and versions Community

jxie commited on Feb 29

Commit

6710be9

•

1 Parent(s): 064d0b6

Upload SMAForSSL

Browse files

Files changed (2) hide show

config.json +106 -0
pytorch_model.bin +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,106 @@

+{
+  "_name_or_path": "/iris/u/jwxie/workspace/releases/domain-agnostic-pretraining/examples/saved_models/physics_pretrained/higgs_guided_self_rand_select_masking_recon_small_noise_mask_self_random_mix-normalized-adamw_torch-lr1e-4-wd0.01-ws10000-masking_schedule_length0.25-mr0.2",
+  "architectures": [
+    "SMAForSSL"
+  ],
+  "attention_dropout_prob": 0.0,
+  "cross_attention_widening_factor": 1,
+  "cross_eval_noising_args": null,
+  "cross_train_noising_args": [
+    [
+      "RandomlySelectedCrossAttentionMasking",
+      {
+        "exclude_seen_reconstruction": true,
+        "head_aggregation": "random_mix",
+        "mask_self": true,
+        "masking_ratio": 0.2,
+        "num_per_query": 3,
+        "select_initial_ratio": 1.0,
+        "varying_length": true
+      }
+    ]
+  ],
+  "decoder_attention_channels": 128,
+  "decoder_heads": 1,
+  "decoder_latent_channels": 128,
+  "decoder_type": "cross_attention",
+  "dense_use_bias": true,
+  "drop_path_rate": 0.0,
+  "embedded_channels": 128,
+  "encoder_cross_attention_channels": 128,
+  "encoder_type": "self_attention",
+  "final_project": true,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.0,
+  "initializer_range": 0.02,
+  "input_channels": 1,
+  "input_type": "continuous",
+  "latent_channels": 128,
+  "layer_norm_eps": 1e-12,
+  "layernorm_eps": 1e-12,
+  "loss_fn": "mse",
+  "max_position_embeddings": 28,
+  "model_type": "perceiver_sma",
+  "num_blocks": 1,
+  "num_cross_attention_heads": 8,
+  "num_discrete_tokens": 262,
+  "num_latents": 128,
+  "num_outputs": 2048,
+  "num_self_attends_per_block": 4,
+  "num_self_attention_heads": 8,
+  "output_channels": 262,
+  "pe_initializer_range": 0.02,
+  "post_decoder_layers": null,
+  "project_after_concat": true,
+  "qk_channels": 128,
+  "self_attention_widening_factor": 1,
+  "share_decoder_queries": true,
+  "share_embedding_weights": true,
+  "teacher_args": {
+    "auxiliary_loss_fn": "mse",
+    "auxiliary_loss_weight": 1.0,
+    "ema_args": {
+      "ema_decay_end": 0.0,
+      "ema_decay_start": 0.0
+    },
+    "eval_transform_args": [
+      [
+        "RandomlySelectedCrossAttentionMasking",
+        {
+          "exclude_seen_reconstruction": true,
+          "head_aggregation": "random_mix",
+          "mask_self": true,
+          "masking_ratio": 0.2,
+          "num_per_query": 3,
+          "select_initial_ratio": 1.0,
+          "varying_length": true
+        }
+      ]
+    ],
+    "mask_replace": 3,
+    "num_layer_target_avg": null,
+    "reconstruction_decoder_args": {
+      "num_heads": 8,
+      "num_outputs": 28,
+      "output_channels": 1,
+      "qk_channels": 128,
+      "query_num_channels": 128,
+      "share_decoder_queries": true,
+      "share_embedding_weights": true,
+      "use_query_residual": true,
+      "v_channels": 128
+    },
+    "reconstruction_loss_fn": "mse",
+    "reconstruction_loss_weight": 1.0,
+    "reconstruction_weighted_loss": false,
+    "target_normalization_fn": "layernorm",
+    "train_transform_args": null
+  },
+  "teacher_name": "ReconstructionTeacher",
+  "torch_dtype": "float32",
+  "transformers_version": "4.26.0.dev0",
+  "use_decoder": false,
+  "use_position_embeddings": true,
+  "use_query_residual": true,
+  "v_channels": 128
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c96639bcf32c123c23acbff5e7bf1b91415d95d2e7c6117c3975bdd6978e65f0
+size 2458681