{ | |
"d_model": 2560, | |
"ssm_cfg": { | |
"expand": 1, | |
"ngroups": 32, | |
"d_state": 80 | |
}, | |
"layer_norm_eps": 1e-05, | |
"vocab_size": null, | |
"d_inner": 2560, | |
"d_xb": 2560, | |
"intermediate_size": 10240, | |
"hidden_act": "gelu_new", | |
"n_layer": 32, | |
"attn_layers": [ | |
1, | |
2, | |
3, | |
5, | |
6, | |
7, | |
9, | |
10, | |
11, | |
13, | |
14, | |
15, | |
17, | |
18, | |
19, | |
21, | |
22, | |
23, | |
25, | |
26, | |
27, | |
29, | |
30, | |
31 | |
], | |
"resid_pdrop": 0.1, | |
"bidirectional": false, | |
"is_bias": false | |
} |