msr2000's picture
Release DeepSeek-V3
cc85cae
raw
history blame contribute delete
455 Bytes
{
"vocab_size": 102400,
"dim": 5120,
"inter_dim": 12288,
"moe_inter_dim": 1536,
"n_layers": 60,
"n_dense_layers": 1,
"n_heads": 128,
"n_routed_experts": 160,
"n_shared_experts": 2,
"n_activated_experts": 6,
"n_expert_groups": 8,
"n_limited_groups": 3,
"route_scale": 16.0,
"q_lora_rank": 1536,
"kv_lora_rank": 512,
"qk_nope_head_dim": 128,
"qk_rope_head_dim": 64,
"v_head_dim": 128
}