mjschock commited on
Commit
e7a8087
·
verified ·
1 Parent(s): b409485

Upload model

Browse files
Files changed (2) hide show
  1. config.json +6 -1
  2. modeling_mamba.py +6 -5
config.json CHANGED
@@ -1,6 +1,10 @@
1
  {
 
 
 
2
  "auto_map": {
3
- "AutoConfig": "configuration_mamba.MambaConfig"
 
4
  },
5
  "bias": false,
6
  "conv_bias": true,
@@ -14,6 +18,7 @@
14
  "model_type": "mamba",
15
  "n_layer": 24,
16
  "pad_vocab_size_multiple": 8,
 
17
  "transformers_version": "4.37.2",
18
  "vocab_size": 50280
19
  }
 
1
  {
2
+ "architectures": [
3
+ "MambaModelForCausalLM"
4
+ ],
5
  "auto_map": {
6
+ "AutoConfig": "configuration_mamba.MambaConfig",
7
+ "AutoModelForCausalLM": "modeling_mamba.MambaModelForCausalLM"
8
  },
9
  "bias": false,
10
  "conv_bias": true,
 
18
  "model_type": "mamba",
19
  "n_layer": 24,
20
  "pad_vocab_size_multiple": 8,
21
+ "torch_dtype": "float32",
22
  "transformers_version": "4.37.2",
23
  "vocab_size": 50280
24
  }
modeling_mamba.py CHANGED
@@ -236,16 +236,16 @@ class MambaModel(MambaPreTrainedModel):
236
  self.gradient_checkpointing = False
237
  self.post_init()
238
 
239
- def get_input_embeddings(self):
240
- return self.embedding
241
 
242
- def set_input_embeddings(self, value):
243
- self.embedding = value
244
 
245
  def forward(
246
  self,
247
  input_ids: torch.LongTensor = None,
248
- return_dict: Optional[bool] = None,
249
  ) -> Union[Tuple, BaseModelOutputWithPast]:
250
  x = self.embedding(input_ids)
251
  all_hidden_states = list()
@@ -297,6 +297,7 @@ class MambaModelForCausalLM(MambaPreTrainedModel):
297
  output_attentions: Optional[bool] = None,
298
  output_hidden_states: Optional[bool] = None,
299
  return_dict: Optional[bool] = None,
 
300
  ) -> Union[Tuple, CausalLMOutputWithPast]:
301
  outputs = self.backbone(
302
  input_ids=input_ids,
 
236
  self.gradient_checkpointing = False
237
  self.post_init()
238
 
239
+ # def get_input_embeddings(self):
240
+ # return self.embedding
241
 
242
+ # def set_input_embeddings(self, value):
243
+ # self.embedding = value
244
 
245
  def forward(
246
  self,
247
  input_ids: torch.LongTensor = None,
248
+ **kwargs,
249
  ) -> Union[Tuple, BaseModelOutputWithPast]:
250
  x = self.embedding(input_ids)
251
  all_hidden_states = list()
 
297
  output_attentions: Optional[bool] = None,
298
  output_hidden_states: Optional[bool] = None,
299
  return_dict: Optional[bool] = None,
300
+ **kwargs,
301
  ) -> Union[Tuple, CausalLMOutputWithPast]:
302
  outputs = self.backbone(
303
  input_ids=input_ids,