|
slices: |
|
- sources: |
|
- model: bamec66557/MISCHIEVOUS-12B-Mix_0.1v |
|
layer_range: [0, 40] |
|
- model: bamec66557/MISCHIEVOUS-12B-Mix_0.2v |
|
layer_range: [0, 40] |
|
|
|
parameters: |
|
t: |
|
- filter: self_attn |
|
value: [0.1, 0.3, 0.7, 0.9, 1.0] |
|
- filter: mlp |
|
value: [1.0, 0.7, 0.4, 0.1, 0.0] |
|
- filter: layer_norm |
|
value: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, |
|
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, |
|
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, |
|
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3] |
|
- value: 0.9 |
|
|
|
merge_method: slerp |
|
|
|
base_model: bamec66557/MISCHIEVOUS-12B-Mix_0.2v |
|
|
|
dtype: bfloat16 |
|
|
|
|
|
regularization: |
|
- method: l2_norm |
|
scale: 0.005 |
|
|
|
postprocessing: |
|
- operation: smoothing |
|
kernel_size: 5 |
|
- operation: normalize |