merge_method: linear | |
parameters: | |
weight: 1.0 | |
slices: | |
- sources: | |
- model: nvidia/Llama-3.1-Nemotron-70B-Instruct-HF | |
layer_range: [0, 16] | |
- sources: | |
- model: cyberagent/Llama-3.1-70B-Japanese-Instruct-2407 | |
layer_range: [8, 24] | |
- sources: | |
- model: nvidia/Llama-3.1-Nemotron-70B-Instruct-HF | |
layer_range: [17, 32] | |
- sources: | |
- model: cyberagent/Llama-3.1-70B-Japanese-Instruct-2407 | |
layer_range: [25, 40] | |
- sources: | |
- model: nvidia/Llama-3.1-Nemotron-70B-Instruct-HF | |
layer_range: [33, 48] | |
- sources: | |
- model: cyberagent/Llama-3.1-70B-Japanese-Instruct-2407 | |
layer_range: [41, 56] | |
- sources: | |
- model: nvidia/Llama-3.1-Nemotron-70B-Instruct-HF | |
layer_range: [49, 64] | |
- sources: | |
- model: cyberagent/Llama-3.1-70B-Japanese-Instruct-2407 | |
layer_range: [57, 72] | |
- sources: | |
- model: nvidia/Llama-3.1-Nemotron-70B-Instruct-HF | |
layer_range: [65, 80] | |
dtype: float16 | |
tokenizer_source: nvidia/Llama-3.1-Nemotron-70B-Instruct-HF | |