models: | |
- model: FuseAI/FuseO1-DeepSeekR1-QwQ-SkyT1-32B-Preview | |
parameters: | |
weight: 1.2 # Slightly favor | |
density: 0.9 # Sparsified a bit to reduce noise | |
- model: FuseAI/FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview | |
parameters: | |
weight: 1 | |
density: 0.9 | |
- model: FuseAI/FuseO1-DeepSeekR1-QwQ-SkyT1-Flash-32B-Preview | |
parameters: | |
weight: 1 | |
density: 0.9 | |
merge_method: sce # SCE for adaptive weighting | |
base_model: Rombo-Org/Rombo-LLM-V3.1-QWQ-32b | |
parameters: | |
normalize: true | |
int8_mask: true | |
select_topk: 0.1 # Retain the top 10% high-variance elements | |
tokenizer_source: union # Union to combine vocabularies | |
dtype: bfloat16 |