mjschock commited on
Commit
2b74da7
·
verified ·
1 Parent(s): 845dd1c

Upload config

Browse files
Files changed (2) hide show
  1. config.json +2 -7
  2. configuration_mamba.py +2 -0
config.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
- "architectures": [
3
- "MambaModel"
4
- ],
5
  "auto_map": {
6
- "AutoConfig": "configuration_mamba.MambaConfig",
7
- "AutoModel": "modeling_mamba.MambaModel",
8
- "AutoModelForCausalLM": "modeling_mamba.MambaModelForCausalLM"
9
  },
10
  "bias": false,
11
  "conv_bias": true,
@@ -15,11 +10,11 @@
15
  "d_state": 16,
16
  "dt_rank": 48,
17
  "expand": 2,
 
18
  "initializer_range": 0.02,
19
  "model_type": "mamba",
20
  "n_layer": 24,
21
  "pad_vocab_size_multiple": 8,
22
- "torch_dtype": "float32",
23
  "transformers_version": "4.37.2",
24
  "vocab_size": 50280
25
  }
 
1
  {
 
 
 
2
  "auto_map": {
3
+ "AutoConfig": "configuration_mamba.MambaConfig"
 
 
4
  },
5
  "bias": false,
6
  "conv_bias": true,
 
10
  "d_state": 16,
11
  "dt_rank": 48,
12
  "expand": 2,
13
+ "hidden_size": 768,
14
  "initializer_range": 0.02,
15
  "model_type": "mamba",
16
  "n_layer": 24,
17
  "pad_vocab_size_multiple": 8,
 
18
  "transformers_version": "4.37.2",
19
  "vocab_size": 50280
20
  }
configuration_mamba.py CHANGED
@@ -35,6 +35,8 @@ class MambaConfig(PretrainedConfig):
35
  self.initializer_range = initializer_range
36
  self.bias = bias
37
 
 
 
38
  if self.dt_rank == "auto":
39
  self.dt_rank = math.ceil(self.d_model / 16)
40
 
 
35
  self.initializer_range = initializer_range
36
  self.bias = bias
37
 
38
+ self.hidden_size = self.d_model
39
+
40
  if self.dt_rank == "auto":
41
  self.dt_rank = math.ceil(self.d_model / 16)
42