MISCHIEVOUS-12B-Mix_0.3v / mergekit_config.yml
bamec66557's picture
Update mergekit_config.yml
e4e3ffd verified
slices:
- sources:
- model: bamec66557/MISCHIEVOUS-12B-Mix_0.1v
layer_range: [0, 40]
- model: bamec66557/MISCHIEVOUS-12B-Mix_0.2v
layer_range: [0, 40]
parameters:
t:
- filter: self_attn
value: [0.1, 0.3, 0.7, 0.9, 1.0] # Spikes for dramatic change
- filter: mlp
value: [1.0, 0.7, 0.4, 0.1, 0.0] # Conversely, a sharp decline
- filter: layer_norm
value: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, # First 10 layers
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, # The remaining 30 layers
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3]
- value: 0.9 # Set the default merge ratio to high
merge_method: slerp # maintain slerp
base_model: bamec66557/MISCHIEVOUS-12B-Mix_0.2v # Base model
dtype: bfloat16 # Data types for fast merges
# Additional options
regularization:
- method: l2_norm # Stabilise after merging with L2 normalisation
scale: 0.005 # Reduce normalisation strength to allow for variation
postprocessing:
- operation: smoothing # Smoothing weights after merging
kernel_size: 5 # Smoothing larger ranges with increased kernel size
- operation: normalize # Normalise after merge