Upload config
Browse files- config.json +0 -5
- configuration_mamba.py +0 -10
config.json
CHANGED
@@ -10,15 +10,10 @@
|
|
10 |
"d_state": 16,
|
11 |
"dt_rank": 48,
|
12 |
"expand": 2,
|
13 |
-
"fused_add_norm": true,
|
14 |
"initializer_range": 0.02,
|
15 |
"model_type": "mamba",
|
16 |
"n_layer": 24,
|
17 |
-
"norm_epsilon": 1e-05,
|
18 |
"pad_vocab_size_multiple": 8,
|
19 |
-
"residual_in_fp32": true,
|
20 |
-
"rms_norm": true,
|
21 |
-
"ssm_cfg": {},
|
22 |
"transformers_version": "4.37.2",
|
23 |
"vocab_size": 50280
|
24 |
}
|
|
|
10 |
"d_state": 16,
|
11 |
"dt_rank": 48,
|
12 |
"expand": 2,
|
|
|
13 |
"initializer_range": 0.02,
|
14 |
"model_type": "mamba",
|
15 |
"n_layer": 24,
|
|
|
16 |
"pad_vocab_size_multiple": 8,
|
|
|
|
|
|
|
17 |
"transformers_version": "4.37.2",
|
18 |
"vocab_size": 50280
|
19 |
}
|
configuration_mamba.py
CHANGED
@@ -17,14 +17,9 @@ class MambaConfig(PretrainedConfig):
|
|
17 |
conv_bias=True,
|
18 |
bias=False,
|
19 |
n_layer=64,
|
20 |
-
norm_epsilon=1e-5,
|
21 |
dt_rank: Union[int, str] = "auto",
|
22 |
pad_vocab_size_multiple=8,
|
23 |
initializer_range=0.02,
|
24 |
-
rms_norm: bool = True,
|
25 |
-
fused_add_norm: bool = True,
|
26 |
-
ssm_cfg={},
|
27 |
-
residual_in_fp32: bool = True,
|
28 |
**kwargs,
|
29 |
):
|
30 |
self.vocab_size = vocab_size
|
@@ -39,11 +34,6 @@ class MambaConfig(PretrainedConfig):
|
|
39 |
self.dt_rank = dt_rank
|
40 |
self.initializer_range = initializer_range
|
41 |
self.bias = bias
|
42 |
-
self.ssm_cfg = ssm_cfg
|
43 |
-
self.norm_epsilon = norm_epsilon
|
44 |
-
self.rms_norm = rms_norm
|
45 |
-
self.residual_in_fp32 = residual_in_fp32
|
46 |
-
self.fused_add_norm = fused_add_norm
|
47 |
|
48 |
if self.dt_rank == "auto":
|
49 |
self.dt_rank = math.ceil(self.d_model / 16)
|
|
|
17 |
conv_bias=True,
|
18 |
bias=False,
|
19 |
n_layer=64,
|
|
|
20 |
dt_rank: Union[int, str] = "auto",
|
21 |
pad_vocab_size_multiple=8,
|
22 |
initializer_range=0.02,
|
|
|
|
|
|
|
|
|
23 |
**kwargs,
|
24 |
):
|
25 |
self.vocab_size = vocab_size
|
|
|
34 |
self.dt_rank = dt_rank
|
35 |
self.initializer_range = initializer_range
|
36 |
self.bias = bias
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
if self.dt_rank == "auto":
|
39 |
self.dt_rank = math.ceil(self.d_model / 16)
|