mjschock commited on
Commit
5585048
·
verified ·
1 Parent(s): 5e40b6f

Upload config

Browse files
Files changed (2) hide show
  1. config.json +0 -5
  2. configuration_mamba.py +0 -10
config.json CHANGED
@@ -10,15 +10,10 @@
10
  "d_state": 16,
11
  "dt_rank": 48,
12
  "expand": 2,
13
- "fused_add_norm": true,
14
  "initializer_range": 0.02,
15
  "model_type": "mamba",
16
  "n_layer": 24,
17
- "norm_epsilon": 1e-05,
18
  "pad_vocab_size_multiple": 8,
19
- "residual_in_fp32": true,
20
- "rms_norm": true,
21
- "ssm_cfg": {},
22
  "transformers_version": "4.37.2",
23
  "vocab_size": 50280
24
  }
 
10
  "d_state": 16,
11
  "dt_rank": 48,
12
  "expand": 2,
 
13
  "initializer_range": 0.02,
14
  "model_type": "mamba",
15
  "n_layer": 24,
 
16
  "pad_vocab_size_multiple": 8,
 
 
 
17
  "transformers_version": "4.37.2",
18
  "vocab_size": 50280
19
  }
configuration_mamba.py CHANGED
@@ -17,14 +17,9 @@ class MambaConfig(PretrainedConfig):
17
  conv_bias=True,
18
  bias=False,
19
  n_layer=64,
20
- norm_epsilon=1e-5,
21
  dt_rank: Union[int, str] = "auto",
22
  pad_vocab_size_multiple=8,
23
  initializer_range=0.02,
24
- rms_norm: bool = True,
25
- fused_add_norm: bool = True,
26
- ssm_cfg={},
27
- residual_in_fp32: bool = True,
28
  **kwargs,
29
  ):
30
  self.vocab_size = vocab_size
@@ -39,11 +34,6 @@ class MambaConfig(PretrainedConfig):
39
  self.dt_rank = dt_rank
40
  self.initializer_range = initializer_range
41
  self.bias = bias
42
- self.ssm_cfg = ssm_cfg
43
- self.norm_epsilon = norm_epsilon
44
- self.rms_norm = rms_norm
45
- self.residual_in_fp32 = residual_in_fp32
46
- self.fused_add_norm = fused_add_norm
47
 
48
  if self.dt_rank == "auto":
49
  self.dt_rank = math.ceil(self.d_model / 16)
 
17
  conv_bias=True,
18
  bias=False,
19
  n_layer=64,
 
20
  dt_rank: Union[int, str] = "auto",
21
  pad_vocab_size_multiple=8,
22
  initializer_range=0.02,
 
 
 
 
23
  **kwargs,
24
  ):
25
  self.vocab_size = vocab_size
 
34
  self.dt_rank = dt_rank
35
  self.initializer_range = initializer_range
36
  self.bias = bias
 
 
 
 
 
37
 
38
  if self.dt_rank == "auto":
39
  self.dt_rank = math.ceil(self.d_model / 16)