bamec66557
commited on
Commit
โข
e4e3ffd
1
Parent(s):
141bfb2
Update mergekit_config.yml
Browse files- mergekit_config.yml +13 -13
mergekit_config.yml
CHANGED
@@ -8,28 +8,28 @@ slices:
|
|
8 |
parameters:
|
9 |
t:
|
10 |
- filter: self_attn
|
11 |
-
value: [0.1, 0.3, 0.7, 0.9, 1.0] #
|
12 |
- filter: mlp
|
13 |
-
value: [1.0, 0.7, 0.4, 0.1, 0.0] #
|
14 |
- filter: layer_norm
|
15 |
-
value: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, #
|
16 |
-
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, #
|
17 |
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
|
18 |
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3]
|
19 |
-
- value: 0.9 #
|
20 |
|
21 |
merge_method: slerp # maintain slerp
|
22 |
|
23 |
-
base_model: bamec66557/MISCHIEVOUS-12B-Mix_0.2v #
|
24 |
|
25 |
-
dtype: bfloat16 #
|
26 |
|
27 |
-
#
|
28 |
regularization:
|
29 |
-
- method: l2_norm #
|
30 |
-
scale: 0.005 #
|
31 |
|
32 |
postprocessing:
|
33 |
-
- operation: smoothing #
|
34 |
-
kernel_size: 5 #
|
35 |
-
- operation: normalize #
|
|
|
8 |
parameters:
|
9 |
t:
|
10 |
- filter: self_attn
|
11 |
+
value: [0.1, 0.3, 0.7, 0.9, 1.0] # Spikes for dramatic change
|
12 |
- filter: mlp
|
13 |
+
value: [1.0, 0.7, 0.4, 0.1, 0.0] # Conversely, a sharp decline
|
14 |
- filter: layer_norm
|
15 |
+
value: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, # First 10 layers
|
16 |
+
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, # The remaining 30 layers
|
17 |
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
|
18 |
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3]
|
19 |
+
- value: 0.9 # Set the default merge ratio to high
|
20 |
|
21 |
merge_method: slerp # maintain slerp
|
22 |
|
23 |
+
base_model: bamec66557/MISCHIEVOUS-12B-Mix_0.2v # Base model
|
24 |
|
25 |
+
dtype: bfloat16 # Data types for fast merges
|
26 |
|
27 |
+
# Additional options
|
28 |
regularization:
|
29 |
+
- method: l2_norm # Stabilise after merging with L2 normalisation
|
30 |
+
scale: 0.005 # Reduce normalisation strength to allow for variation
|
31 |
|
32 |
postprocessing:
|
33 |
+
- operation: smoothing # Smoothing weights after merging
|
34 |
+
kernel_size: 5 # Smoothing larger ranges with increased kernel size
|
35 |
+
- operation: normalize # Normalise after merge
|