Qwen1.5-4x0.5B-MoE / mergekit_moe_config.yml
nopainkiller's picture
Upload 10 files
164395d verified
raw
history blame contribute delete
777 Bytes
base_model: Qwen1.5-0.5B-Chat
gate_mode: hidden # one of "hidden", "cheap_embed", or "random"
dtype: bfloat16 # output dtype (float32, float16, or bfloat16)
## (optional)
# experts_per_token: 2
experts:
- source_model: Qwen1.5-0.5B-Chat
positive_prompts:
- "chat"
- "asistant"
- "tell me"
- "explain"
- source_model: Qwen1.5-0.5B-Chat
positive_prompts:
- "code"
- "python"
- "javascript"
- "programming"
- "algorithm"
- source_model: Qwen1.5-0.5B-Chat
positive_prompts:
- "storywriting"
- "write"
- "scene"
- "story"
- "character"
- source_model: Qwen1.5-0.5B-Chat
positive_prompts:
- "reason"
- "math"
- "mathematics"
- "solve"
- "count"