yusx-swapp's picture
Upload MambaForCausalLM
01fea75 verified
raw
history blame
6.5 kB
{
"_name_or_path": "mamba-1.4b-hf-supernet/last",
"architecture": {
"layer_1": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_10": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_11": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_12": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_13": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_14": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_15": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_16": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_17": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_18": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_19": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_2": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_20": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_21": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_22": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_23": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_24": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_25": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_26": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_27": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_28": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_29": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_3": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_30": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_31": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_32": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_33": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_34": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_35": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_36": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_37": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_38": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_39": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_4": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_40": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_41": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_42": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_43": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_44": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_45": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_46": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_47": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_48": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_5": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_6": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_7": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_8": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
},
"layer_9": {
"atten_out": "None",
"inter_hidden": 1280,
"residual_hidden": "None"
}
},
"architectures": [
"MambaForCausalLM"
],
"bos_token_id": 0,
"conv_kernel": 4,
"d_model": 2048,
"elastic_config": {
"atten_out_space": [
"None"
],
"inter_hidden_space": [
4096,
1280
],
"residual_hidden_space": [
"None"
]
},
"eos_token_id": 0,
"expand": 2,
"fused_add_norm": true,
"hidden_act": "silu",
"hidden_size": 2048,
"initializer_range": 0.1,
"intermediate_size": 4096,
"layer_norm_epsilon": 1e-05,
"model_type": "mamba",
"n_layer": 48,
"num_hidden_layers": 48,
"pad_token_id": 0,
"pad_vocab_size_multiple": 8,
"rescale_prenorm_residual": false,
"residual_in_fp32": true,
"rms_norm": true,
"ssm_cfg": {},
"state_size": 16,
"time_step_floor": 0.0001,
"time_step_init_scheme": "random",
"time_step_max": 0.1,
"time_step_min": 0.001,
"time_step_rank": 128,
"time_step_scale": 1.0,
"torch_dtype": "float32",
"transformers_version": "4.40.0.dev0",
"use_bias": false,
"use_cache": true,
"use_conv_bias": true,
"vocab_size": 50280
}