File size: 1,339 Bytes
441d9e7 e4e3ffd 441d9e7 e4e3ffd 441d9e7 e4e3ffd 441d9e7 e4e3ffd 441d9e7 e4e3ffd 441d9e7 e4e3ffd 441d9e7 e4e3ffd 441d9e7 e4e3ffd 441d9e7 e4e3ffd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
slices:
- sources:
- model: bamec66557/MISCHIEVOUS-12B-Mix_0.1v
layer_range: [0, 40]
- model: bamec66557/MISCHIEVOUS-12B-Mix_0.2v
layer_range: [0, 40]
parameters:
t:
- filter: self_attn
value: [0.1, 0.3, 0.7, 0.9, 1.0] # Spikes for dramatic change
- filter: mlp
value: [1.0, 0.7, 0.4, 0.1, 0.0] # Conversely, a sharp decline
- filter: layer_norm
value: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, # First 10 layers
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, # The remaining 30 layers
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3]
- value: 0.9 # Set the default merge ratio to high
merge_method: slerp # maintain slerp
base_model: bamec66557/MISCHIEVOUS-12B-Mix_0.2v # Base model
dtype: bfloat16 # Data types for fast merges
# Additional options
regularization:
- method: l2_norm # Stabilise after merging with L2 normalisation
scale: 0.005 # Reduce normalisation strength to allow for variation
postprocessing:
- operation: smoothing # Smoothing weights after merging
kernel_size: 5 # Smoothing larger ranges with increased kernel size
- operation: normalize # Normalise after merge |