bamec66557 commited on
Commit
e4e3ffd
โ€ข
1 Parent(s): 141bfb2

Update mergekit_config.yml

Browse files
Files changed (1) hide show
  1. mergekit_config.yml +13 -13
mergekit_config.yml CHANGED
@@ -8,28 +8,28 @@ slices:
8
  parameters:
9
  t:
10
  - filter: self_attn
11
- value: [0.1, 0.3, 0.7, 0.9, 1.0] # ๊ทน์ ์ธ ๋ณ€ํ™”๋ฅผ ์œ„ํ•œ ๊ธ‰๊ฒฉํ•œ ์ฆ๊ฐ€
12
  - filter: mlp
13
- value: [1.0, 0.7, 0.4, 0.1, 0.0] # ๋ฐ˜๋Œ€๋กœ ๊ธ‰๊ฒฉํžˆ ๊ฐ์†Œ
14
  - filter: layer_norm
15
- value: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, # ์ฒซ 10๊ฐœ ๋ ˆ์ด์–ด
16
- 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, # ๋‚˜๋จธ์ง€ 30๊ฐœ ๋ ˆ์ด์–ด
17
  0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
18
  0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3]
19
- - value: 0.9 # ๊ธฐ๋ณธ ๋ณ‘ํ•ฉ ๋น„์œจ์„ ๋†’๊ฒŒ ์„ค์ •
20
 
21
  merge_method: slerp # maintain slerp
22
 
23
- base_model: bamec66557/MISCHIEVOUS-12B-Mix_0.2v # ๊ธฐ๋ณธ ๋ชจ๋ธ
24
 
25
- dtype: bfloat16 # ๋น ๋ฅธ ๋ณ‘ํ•ฉ์„ ์œ„ํ•œ ๋ฐ์ดํ„ฐ ํƒ€์ž…
26
 
27
- # ์ถ”๊ฐ€ ์˜ต์…˜
28
  regularization:
29
- - method: l2_norm # L2 ์ •๊ทœํ™”๋กœ ๋ณ‘ํ•ฉ ํ›„ ์•ˆ์ •ํ™”
30
- scale: 0.005 # ์ •๊ทœํ™” ๊ฐ•๋„๋ฅผ ๋‚ฎ์ถฐ ๋ณ€ํ™” ํ—ˆ์šฉ
31
 
32
  postprocessing:
33
- - operation: smoothing # ๋ณ‘ํ•ฉ ํ›„ ๊ฐ€์ค‘์น˜ ๋ถ€๋“œ๋Ÿฝ๊ฒŒ ์กฐ์ •
34
- kernel_size: 5 # ์ปค๋„ ํฌ๊ธฐ ์ฆ๊ฐ€๋กœ ๋” ๋„“์€ ๋ฒ”์œ„ ์Šค๋ฌด๋”ฉ
35
- - operation: normalize # ๋ณ‘ํ•ฉ ํ›„ ์ •๊ทœํ™”
 
8
  parameters:
9
  t:
10
  - filter: self_attn
11
+ value: [0.1, 0.3, 0.7, 0.9, 1.0] # Spikes for dramatic change
12
  - filter: mlp
13
+ value: [1.0, 0.7, 0.4, 0.1, 0.0] # Conversely, a sharp decline
14
  - filter: layer_norm
15
+ value: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, # First 10 layers
16
+ 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, # The remaining 30 layers
17
  0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
18
  0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3]
19
+ - value: 0.9 # Set the default merge ratio to high
20
 
21
  merge_method: slerp # maintain slerp
22
 
23
+ base_model: bamec66557/MISCHIEVOUS-12B-Mix_0.2v # Base model
24
 
25
+ dtype: bfloat16 # Data types for fast merges
26
 
27
+ # Additional options
28
  regularization:
29
+ - method: l2_norm # Stabilise after merging with L2 normalisation
30
+ scale: 0.005 # Reduce normalisation strength to allow for variation
31
 
32
  postprocessing:
33
+ - operation: smoothing # Smoothing weights after merging
34
+ kernel_size: 5 # Smoothing larger ranges with increased kernel size
35
+ - operation: normalize # Normalise after merge