mjschock
/

mamba-130m

Feature Extraction

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

mjschock commited on Feb 19, 2024

Commit

0b66f62

·

verified ·

1 Parent(s): 470c524

Upload model

Files changed (3) hide show

config.json +6 -1
model.safetensors +2 -2
modeling_mamba.py +4 -2

config.json CHANGED Viewed

@@ -1,6 +1,10 @@
 {
   "auto_map": {
-    "AutoConfig": "configuration_mamba.MambaConfig"
   },
   "bias": false,
   "conv_bias": true,
@@ -14,6 +18,7 @@
   "model_type": "mamba",
   "n_layer": 24,
   "pad_vocab_size_multiple": 8,
   "transformers_version": "4.37.2",
   "vocab_size": 50280
 }

 {
+  "architectures": [
+    "MambaModelForCausalLM"
+  ],
   "auto_map": {
+    "AutoConfig": "configuration_mamba.MambaConfig",
+    "AutoModelForCausalLM": "modeling_mamba.MambaModelForCausalLM"
   },
   "bias": false,
   "conv_bias": true,
   "model_type": "mamba",
   "n_layer": 24,
   "pad_vocab_size_multiple": 8,
+  "torch_dtype": "float32",
   "transformers_version": "4.37.2",
   "vocab_size": 50280
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1bd3ca62665de4bfabff9d443f87a11090a10e505c0ccb56e6f9ca495b6e05bd
-size 671027808

 version https://git-lfs.github.com/spec/v1
+oid sha256:699ed6f59fb948186f449c5031e0dc659d504c90d7e018302aa1e190cdb40220
+size 516567560

modeling_mamba.py CHANGED Viewed

@@ -312,8 +312,7 @@ class MambaModel(MambaPreTrainedModel):
 class MambaModelForCausalLM(MambaPreTrainedModel):
     _tied_weights_keys = [
-        "lm_head.weight",
-        "backbone.embedding.weight",
     ]
     def __init__(self, config, **kwargs):
@@ -342,6 +341,9 @@ class MambaModelForCausalLM(MambaPreTrainedModel):
         # self.lm_head.weight = self.backbone.embedding.weight
         self.post_init()
     # def get_input_embeddings(self):
     #     return self.model.embedding

 class MambaModelForCausalLM(MambaPreTrainedModel):
     _tied_weights_keys = [
+        "lm_head.weight", # will remove this since it's a duplicate of backbone.embedding.weight
     ]
     def __init__(self, config, **kwargs):
         # self.lm_head.weight = self.backbone.embedding.weight
         self.post_init()
+    def _tie_weights(self):
+        self.lm_head.weight = self.backbone.embedding.weight
     # def get_input_embeddings(self):
     #     return self.model.embedding