{ | |
"model_type": "llamba", | |
"vocab_size": 128256, | |
"tie_embeddings": false, | |
"pad_vocab_size_multiple": 8, | |
"lm_head_bias": false, | |
"d_model": 4096, | |
"n_layer": 32, | |
"resid_dropout": 0.0, | |
"norm_epsilon": 1e-5, | |
"mlp_cfg": { | |
"intermediate_size": 14336, | |
"bias": false, | |
"act_fn": "silu" | |
}, | |
"ssm_cfg": { | |
"d_state": 64, | |
"n_v_heads": 32, | |
"n_qk_heads": 32, | |
"expand": 1, | |
"chunk_size": 128, | |
"activation": "identity", | |
"bias": false | |
} | |
} |