mjschock commited on
Commit
44ea34e
·
verified ·
1 Parent(s): d7bba61

Upload model

Browse files
Files changed (2) hide show
  1. config.json +6 -1
  2. modeling_mamba.py +1 -0
config.json CHANGED
@@ -1,6 +1,10 @@
1
  {
 
 
 
2
  "auto_map": {
3
- "AutoConfig": "configuration_mamba.MambaConfig"
 
4
  },
5
  "d_model": 768,
6
  "fused_add_norm": true,
@@ -10,6 +14,7 @@
10
  "residual_in_fp32": true,
11
  "rms_norm": true,
12
  "ssm_cfg": {},
 
13
  "transformers_version": "4.37.2",
14
  "vocab_size": 50277
15
  }
 
1
  {
2
+ "architectures": [
3
+ "MambaModel"
4
+ ],
5
  "auto_map": {
6
+ "AutoConfig": "configuration_mamba.MambaConfig",
7
+ "AutoModel": "modeling_mamba.MambaModel"
8
  },
9
  "d_model": 768,
10
  "fused_add_norm": true,
 
14
  "residual_in_fp32": true,
15
  "rms_norm": true,
16
  "ssm_cfg": {},
17
+ "torch_dtype": "float16",
18
  "transformers_version": "4.37.2",
19
  "vocab_size": 50277
20
  }
modeling_mamba.py CHANGED
@@ -1,4 +1,5 @@
1
  from typing import Optional
 
2
  from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
3
  from transformers import GenerationMixin, PreTrainedModel
4
  from transformers.generation import TextStreamer
 
1
  from typing import Optional
2
+
3
  from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
4
  from transformers import GenerationMixin, PreTrainedModel
5
  from transformers.generation import TextStreamer