qwen2-2x1.5B / mergekit_moe_config.yml
djuna's picture
Upload folder using huggingface_hub
36c5db3 verified
raw
history blame contribute delete
842 Bytes
base_model: cognitivecomputations/dolphin-2.9.3-qwen2-1.5b
gate_mode: hidden
architecture: qwen
dtype: bfloat16
experts_per_token: 2
experts:
- source_model: cognitivecomputations/dolphin-2.9.3-qwen2-1.5b
positive_prompts:
- "chat"
- "assistant"
- "explain"
- "describe"
- "define"
- "what is"
- "tell me"
- "help me"
- "show me"
- "can you"
- source_model: macadeliccc/Samantha-Qwen2-1.5B
positive_prompts:
- "characters"
- "scene"
- "roleplay"
- "writing"
- "creative"
- "you are"
- "act as"
shared_experts:
- source_model: cognitivecomputations/dolphin-2.9.3-qwen2-1.5b
positive_prompts: # required by Qwen MoE for "hidden" gate mode, otherwise not allowed
- "chat"
- "assistant"
# (optional, but recommended:)
residual_scale: 0.1