bamec66557 commited on
Commit
9d8458e
·
verified ·
1 Parent(s): 8d9e37c

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,119 +1,12 @@
1
  ---
2
- license: apache-2.0
 
 
3
  library_name: transformers
4
  tags:
5
  - mergekit
6
  - merge
7
- - not-for-all-audiences
8
- base_model:
9
- - bamec66557/VICIOUS_MESH-12B-BETA
10
- - bamec66557/VICIOUS_MESH-12B-OMEGA
11
- model-index:
12
- - name: Mistral-Nemo-VICIOUS_MESH-12B-2407
13
- results:
14
- - task:
15
- type: text-generation
16
- name: Text Generation
17
- dataset:
18
- name: IFEval (0-Shot)
19
- type: HuggingFaceH4/ifeval
20
- args:
21
- num_few_shot: 0
22
- metrics:
23
- - type: inst_level_strict_acc and prompt_level_strict_acc
24
- value: 67.21
25
- name: strict accuracy
26
- source:
27
- url: >-
28
- https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407
29
- name: Open LLM Leaderboard
30
- - task:
31
- type: text-generation
32
- name: Text Generation
33
- dataset:
34
- name: BBH (3-Shot)
35
- type: BBH
36
- args:
37
- num_few_shot: 3
38
- metrics:
39
- - type: acc_norm
40
- value: 31.36
41
- name: normalized accuracy
42
- source:
43
- url: >-
44
- https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407
45
- name: Open LLM Leaderboard
46
- - task:
47
- type: text-generation
48
- name: Text Generation
49
- dataset:
50
- name: MATH Lvl 5 (4-Shot)
51
- type: hendrycks/competition_math
52
- args:
53
- num_few_shot: 4
54
- metrics:
55
- - type: exact_match
56
- value: 12.08
57
- name: exact match
58
- source:
59
- url: >-
60
- https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407
61
- name: Open LLM Leaderboard
62
- - task:
63
- type: text-generation
64
- name: Text Generation
65
- dataset:
66
- name: GPQA (0-shot)
67
- type: Idavidrein/gpqa
68
- args:
69
- num_few_shot: 0
70
- metrics:
71
- - type: acc_norm
72
- value: 8.84
73
- name: acc_norm
74
- source:
75
- url: >-
76
- https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407
77
- name: Open LLM Leaderboard
78
- - task:
79
- type: text-generation
80
- name: Text Generation
81
- dataset:
82
- name: MuSR (0-shot)
83
- type: TAUR-Lab/MuSR
84
- args:
85
- num_few_shot: 0
86
- metrics:
87
- - type: acc_norm
88
- value: 14.34
89
- name: acc_norm
90
- source:
91
- url: >-
92
- https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407
93
- name: Open LLM Leaderboard
94
- - task:
95
- type: text-generation
96
- name: Text Generation
97
- dataset:
98
- name: MMLU-PRO (5-shot)
99
- type: TIGER-Lab/MMLU-Pro
100
- config: main
101
- split: test
102
- args:
103
- num_few_shot: 5
104
- metrics:
105
- - type: acc
106
- value: 29.76
107
- name: accuracy
108
- source:
109
- url: >-
110
- https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407
111
- name: Open LLM Leaderboard
112
- datasets:
113
- - open-llm-leaderboard/bamec66557__Mistral-Nemo-VICIOUS_MESH-12B-2407-details
114
- - open-llm-leaderboard/bamec66557__VICIOUS_MESH-12B-BETA-details
115
- - open-llm-leaderboard/bamec66557__VICIOUS_MESH-12B-OMEGA-details
116
- - open-llm-leaderboard/bamec66557__VICIOUS_MESH-12B-ALPHA-details
117
  ---
118
  # merge
119
 
@@ -140,175 +33,93 @@ dtype: bfloat16
140
  merge_method: slerp
141
  tokenizer_source: base
142
 
143
- # Slices Configuration (Layer-Specific Merging)
144
  slices:
145
  - sources:
146
  - model: bamec66557/VICIOUS_MESH-12B-OMEGA
147
- layer_range: [0, 5]
148
- - model: bamec66557/VICIOUS_MESH-12B-BETA
149
- layer_range: [0, 5]
150
- parameters:
151
- t:
152
- - name: self_attn
153
- value: [0.85, 0.88, 0.91, 0.94, 0.97]
154
- - name: mlp
155
- value: [0.9, 0.92, 0.95, 0.98, 1.0]
156
- - name: layer_norm
157
- value: [0.75, 0.78, 0.81, 0.84, 0.87]
158
- - name: embed_tokens
159
- value: [1.0]
160
-
161
- - sources:
162
- - model: bamec66557/VICIOUS_MESH-12B-OMEGA
163
- layer_range: [5, 10]
164
- - model: bamec66557/VICIOUS_MESH-12B-BETA
165
- layer_range: [5, 10]
166
- parameters:
167
- t:
168
- - name: self_attn
169
- value: [0.8, 0.83, 0.86, 0.89, 0.92]
170
- - name: mlp
171
- value: [0.88, 0.91, 0.94, 0.97, 1.0]
172
- - name: layer_norm
173
- value: [0.7, 0.73, 0.76, 0.79, 0.82]
174
- - name: embed_tokens
175
- value: [1.0]
176
-
177
- - sources:
178
- - model: bamec66557/VICIOUS_MESH-12B-OMEGA
179
- layer_range: [10, 15]
180
- - model: bamec66557/VICIOUS_MESH-12B-BETA
181
- layer_range: [10, 15]
182
- parameters:
183
- t:
184
- - name: self_attn
185
- value: [0.75, 0.78, 0.81, 0.84, 0.87]
186
- - name: mlp
187
- value: [0.85, 0.88, 0.91, 0.94, 0.97]
188
- - name: layer_norm
189
- value: [0.65, 0.68, 0.71, 0.74, 0.77]
190
- - name: embed_tokens
191
- value: [1.0]
192
-
193
- - sources:
194
- - model: bamec66557/VICIOUS_MESH-12B-OMEGA
195
- layer_range: [15, 20]
196
  - model: bamec66557/VICIOUS_MESH-12B-BETA
197
- layer_range: [15, 20]
198
  parameters:
199
  t:
200
  - name: self_attn
201
- value: [0.72, 0.75, 0.78, 0.81, 0.84]
202
  - name: mlp
203
- value: [0.8, 0.83, 0.86, 0.89, 0.92]
204
  - name: layer_norm
205
- value: [0.6, 0.63, 0.66, 0.69, 0.72]
206
- - name: embed_tokens
207
- value: [1.0]
208
 
209
  - sources:
210
  - model: bamec66557/VICIOUS_MESH-12B-OMEGA
211
- layer_range: [20, 25]
212
  - model: bamec66557/VICIOUS_MESH-12B-BETA
213
- layer_range: [20, 25]
214
  parameters:
215
  t:
216
  - name: self_attn
217
- value: [0.7, 0.73, 0.76, 0.79, 0.82]
218
  - name: mlp
219
- value: [0.75, 0.78, 0.81, 0.84, 0.87]
220
  - name: layer_norm
221
- value: [0.55, 0.58, 0.61, 0.64, 0.67]
222
- - name: embed_tokens
223
- value: [1.0]
224
 
225
  - sources:
226
  - model: bamec66557/VICIOUS_MESH-12B-OMEGA
227
- layer_range: [25, 30]
228
  - model: bamec66557/VICIOUS_MESH-12B-BETA
229
- layer_range: [25, 30]
230
  parameters:
231
  t:
232
  - name: self_attn
233
- value: [0.68, 0.71, 0.74, 0.77, 0.8]
234
  - name: mlp
235
- value: [0.7, 0.73, 0.76, 0.79, 0.82]
236
  - name: layer_norm
237
- value: [0.5, 0.53, 0.56, 0.59, 0.62]
238
- - name: embed_tokens
239
- value: [1.0]
240
 
241
  - sources:
242
  - model: bamec66557/VICIOUS_MESH-12B-OMEGA
243
- layer_range: [30, 35]
244
  - model: bamec66557/VICIOUS_MESH-12B-BETA
245
- layer_range: [30, 35]
246
  parameters:
247
  t:
248
  - name: self_attn
249
- value: [0.65, 0.68, 0.71, 0.74, 0.77]
250
  - name: mlp
251
- value: [0.68, 0.71, 0.74, 0.77, 0.8]
252
  - name: layer_norm
253
- value: [0.45, 0.48, 0.51, 0.54, 0.57]
254
- - name: embed_tokens
255
- value: [1.0]
256
-
257
- - sources:
258
- - model: bamec66557/VICIOUS_MESH-12B-OMEGA
259
- layer_range: [35, 40]
260
- - model: bamec66557/VICIOUS_MESH-12B-BETA
261
- layer_range: [35, 40]
262
- parameters:
263
- t:
264
- - name: self_attn
265
- value: [0.6, 0.63, 0.66, 0.69, 0.72]
266
- - name: mlp
267
- value: [0.65, 0.68, 0.71, 0.74, 0.77]
268
- - name: layer_norm
269
- value: [0.4, 0.43, 0.46, 0.49, 0.52]
270
- - name: embed_tokens
271
- value: [1.0]
272
 
273
  # Regularization
274
  regularization:
 
 
275
  - method: weight_clipping
276
- clip_range: [-0.1, 0.1]
277
  - method: random_noise
278
- scale: 0.003
279
  - method: attention_dropout
280
- scale: 0.05
281
- - method: gradient_clipping
282
- clip_norm: 1.0
283
 
284
  # Postprocessing
285
  postprocessing:
 
 
286
  - operation: non_linear_scaling
287
  parameters:
288
  function: tanh
289
  - operation: sharpening
290
- intensity: 0.4
291
  - operation: gaussian_smoothing
292
- sigma: 1.0
293
  - operation: normalize
294
  - operation: dynamic_scaling
295
- scale_range: [0.85, 1.15]
296
  - operation: smoothing
297
  parameters:
298
  adaptive: true
299
- range: [0.9, 1.1]
300
- kernel_size: 3
301
 
302
  ```
303
- # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
304
- Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/bamec66557__Mistral-Nemo-VICIOUS_MESH-12B-2407-details)
305
-
306
- | Metric |Value|
307
- |-------------------|----:|
308
- |Avg. |27.26|
309
- |IFEval (0-Shot) |67.21|
310
- |BBH (3-Shot) |31.36|
311
- |MATH Lvl 5 (4-Shot)|12.08|
312
- |GPQA (0-shot) | 8.84|
313
- |MuSR (0-shot) |14.34|
314
- |MMLU-PRO (5-shot) |29.76|
 
1
  ---
2
+ base_model:
3
+ - bamec66557/VICIOUS_MESH-12B-BETA
4
+ - bamec66557/VICIOUS_MESH-12B-OMEGA
5
  library_name: transformers
6
  tags:
7
  - mergekit
8
  - merge
9
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  ---
11
  # merge
12
 
 
33
  merge_method: slerp
34
  tokenizer_source: base
35
 
36
+ # Slices Configuration
37
  slices:
38
  - sources:
39
  - model: bamec66557/VICIOUS_MESH-12B-OMEGA
40
+ layer_range: [0, 10]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  - model: bamec66557/VICIOUS_MESH-12B-BETA
42
+ layer_range: [0, 10]
43
  parameters:
44
  t:
45
  - name: self_attn
46
+ value: [0.5, 0.55, 0.6, 0.65, 0.7]
47
  - name: mlp
48
+ value: [1.0, 1.05, 1.1, 1.15, 1.2]
49
  - name: layer_norm
50
+ value: [0.9, 0.95, 1.0, 1.05, 1.1]
 
 
51
 
52
  - sources:
53
  - model: bamec66557/VICIOUS_MESH-12B-OMEGA
54
+ layer_range: [10, 20]
55
  - model: bamec66557/VICIOUS_MESH-12B-BETA
56
+ layer_range: [10, 20]
57
  parameters:
58
  t:
59
  - name: self_attn
60
+ value: [0.4, 0.45, 0.5, 0.55, 0.6]
61
  - name: mlp
62
+ value: [1.1, 1.15, 1.2, 1.25, 1.3]
63
  - name: layer_norm
64
+ value: [1.0, 1.05, 1.1, 1.15, 1.2]
 
 
65
 
66
  - sources:
67
  - model: bamec66557/VICIOUS_MESH-12B-OMEGA
68
+ layer_range: [20, 30]
69
  - model: bamec66557/VICIOUS_MESH-12B-BETA
70
+ layer_range: [20, 30]
71
  parameters:
72
  t:
73
  - name: self_attn
74
+ value: [0.6, 0.65, 0.7, 0.75, 0.8]
75
  - name: mlp
76
+ value: [0.9, 0.95, 1.0, 1.05, 1.1]
77
  - name: layer_norm
78
+ value: [0.85, 0.9, 0.95, 1.0, 1.05]
 
 
79
 
80
  - sources:
81
  - model: bamec66557/VICIOUS_MESH-12B-OMEGA
82
+ layer_range: [30, 40]
83
  - model: bamec66557/VICIOUS_MESH-12B-BETA
84
+ layer_range: [30, 40]
85
  parameters:
86
  t:
87
  - name: self_attn
88
+ value: [0.7, 0.75, 0.8, 0.85, 0.9]
89
  - name: mlp
90
+ value: [0.8, 0.85, 0.9, 0.95, 1.0]
91
  - name: layer_norm
92
+ value: [0.8, 0.85, 0.9, 0.95, 1.0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  # Regularization
95
  regularization:
96
+ - method: gradient_penalty
97
+ scale: 0.05 # Increased influence for gradient control
98
  - method: weight_clipping
99
+ clip_range: [-0.2, 0.2] # Broader clipping range for flexibility
100
  - method: random_noise
101
+ scale: 0.01 # Stronger noise injection
102
  - method: attention_dropout
103
+ scale: 0.1 # Higher dropout to reduce attention fixation
 
 
104
 
105
  # Postprocessing
106
  postprocessing:
107
+ - operation: entropy_regularization
108
+ scale: 0.05 # Stronger encouragement for diverse outputs
109
  - operation: non_linear_scaling
110
  parameters:
111
  function: tanh
112
  - operation: sharpening
113
+ intensity: 0.5 # Enhanced sharpening for precise outputs
114
  - operation: gaussian_smoothing
115
+ sigma: 1.5 # Increased smoothing for stable outputs
116
  - operation: normalize
117
  - operation: dynamic_scaling
118
+ scale_range: [0.8, 1.2] # Expanded dynamic range for scaling
119
  - operation: smoothing
120
  parameters:
121
  adaptive: true
122
+ range: [0.85, 1.15] # Wider adaptive smoothing range
123
+ kernel_size: 5
124
 
125
  ```
 
 
 
 
 
 
 
 
 
 
 
 
mergekit_config.yml CHANGED
@@ -3,161 +3,91 @@ dtype: bfloat16
3
  merge_method: slerp
4
  tokenizer_source: base
5
 
6
- # Slices Configuration (Layer-Specific Merging)
7
  slices:
8
  - sources:
9
  - model: bamec66557/VICIOUS_MESH-12B-OMEGA
10
- layer_range: [0, 5]
11
  - model: bamec66557/VICIOUS_MESH-12B-BETA
12
- layer_range: [0, 5]
13
  parameters:
14
  t:
15
  - name: self_attn
16
- value: [0.85, 0.88, 0.91, 0.94, 0.97]
17
  - name: mlp
18
- value: [0.9, 0.92, 0.95, 0.98, 1.0]
19
  - name: layer_norm
20
- value: [0.75, 0.78, 0.81, 0.84, 0.87]
21
- - name: embed_tokens
22
- value: [1.0]
23
 
24
  - sources:
25
  - model: bamec66557/VICIOUS_MESH-12B-OMEGA
26
- layer_range: [5, 10]
27
  - model: bamec66557/VICIOUS_MESH-12B-BETA
28
- layer_range: [5, 10]
29
  parameters:
30
  t:
31
  - name: self_attn
32
- value: [0.8, 0.83, 0.86, 0.89, 0.92]
33
  - name: mlp
34
- value: [0.88, 0.91, 0.94, 0.97, 1.0]
35
  - name: layer_norm
36
- value: [0.7, 0.73, 0.76, 0.79, 0.82]
37
- - name: embed_tokens
38
- value: [1.0]
39
 
40
  - sources:
41
  - model: bamec66557/VICIOUS_MESH-12B-OMEGA
42
- layer_range: [10, 15]
43
  - model: bamec66557/VICIOUS_MESH-12B-BETA
44
- layer_range: [10, 15]
45
  parameters:
46
  t:
47
  - name: self_attn
48
- value: [0.75, 0.78, 0.81, 0.84, 0.87]
49
  - name: mlp
50
- value: [0.85, 0.88, 0.91, 0.94, 0.97]
51
  - name: layer_norm
52
- value: [0.65, 0.68, 0.71, 0.74, 0.77]
53
- - name: embed_tokens
54
- value: [1.0]
55
 
56
  - sources:
57
  - model: bamec66557/VICIOUS_MESH-12B-OMEGA
58
- layer_range: [15, 20]
59
  - model: bamec66557/VICIOUS_MESH-12B-BETA
60
- layer_range: [15, 20]
61
  parameters:
62
  t:
63
  - name: self_attn
64
- value: [0.72, 0.75, 0.78, 0.81, 0.84]
65
  - name: mlp
66
- value: [0.8, 0.83, 0.86, 0.89, 0.92]
67
  - name: layer_norm
68
- value: [0.6, 0.63, 0.66, 0.69, 0.72]
69
- - name: embed_tokens
70
- value: [1.0]
71
-
72
- - sources:
73
- - model: bamec66557/VICIOUS_MESH-12B-OMEGA
74
- layer_range: [20, 25]
75
- - model: bamec66557/VICIOUS_MESH-12B-BETA
76
- layer_range: [20, 25]
77
- parameters:
78
- t:
79
- - name: self_attn
80
- value: [0.7, 0.73, 0.76, 0.79, 0.82]
81
- - name: mlp
82
- value: [0.75, 0.78, 0.81, 0.84, 0.87]
83
- - name: layer_norm
84
- value: [0.55, 0.58, 0.61, 0.64, 0.67]
85
- - name: embed_tokens
86
- value: [1.0]
87
-
88
- - sources:
89
- - model: bamec66557/VICIOUS_MESH-12B-OMEGA
90
- layer_range: [25, 30]
91
- - model: bamec66557/VICIOUS_MESH-12B-BETA
92
- layer_range: [25, 30]
93
- parameters:
94
- t:
95
- - name: self_attn
96
- value: [0.68, 0.71, 0.74, 0.77, 0.8]
97
- - name: mlp
98
- value: [0.7, 0.73, 0.76, 0.79, 0.82]
99
- - name: layer_norm
100
- value: [0.5, 0.53, 0.56, 0.59, 0.62]
101
- - name: embed_tokens
102
- value: [1.0]
103
-
104
- - sources:
105
- - model: bamec66557/VICIOUS_MESH-12B-OMEGA
106
- layer_range: [30, 35]
107
- - model: bamec66557/VICIOUS_MESH-12B-BETA
108
- layer_range: [30, 35]
109
- parameters:
110
- t:
111
- - name: self_attn
112
- value: [0.65, 0.68, 0.71, 0.74, 0.77]
113
- - name: mlp
114
- value: [0.68, 0.71, 0.74, 0.77, 0.8]
115
- - name: layer_norm
116
- value: [0.45, 0.48, 0.51, 0.54, 0.57]
117
- - name: embed_tokens
118
- value: [1.0]
119
-
120
- - sources:
121
- - model: bamec66557/VICIOUS_MESH-12B-OMEGA
122
- layer_range: [35, 40]
123
- - model: bamec66557/VICIOUS_MESH-12B-BETA
124
- layer_range: [35, 40]
125
- parameters:
126
- t:
127
- - name: self_attn
128
- value: [0.6, 0.63, 0.66, 0.69, 0.72]
129
- - name: mlp
130
- value: [0.65, 0.68, 0.71, 0.74, 0.77]
131
- - name: layer_norm
132
- value: [0.4, 0.43, 0.46, 0.49, 0.52]
133
- - name: embed_tokens
134
- value: [1.0]
135
 
136
  # Regularization
137
  regularization:
 
 
138
  - method: weight_clipping
139
- clip_range: [-0.1, 0.1]
140
  - method: random_noise
141
- scale: 0.003
142
  - method: attention_dropout
143
- scale: 0.05
144
- - method: gradient_clipping
145
- clip_norm: 1.0
146
 
147
  # Postprocessing
148
  postprocessing:
 
 
149
  - operation: non_linear_scaling
150
  parameters:
151
  function: tanh
152
  - operation: sharpening
153
- intensity: 0.4
154
  - operation: gaussian_smoothing
155
- sigma: 1.0
156
  - operation: normalize
157
  - operation: dynamic_scaling
158
- scale_range: [0.85, 1.15]
159
  - operation: smoothing
160
  parameters:
161
  adaptive: true
162
- range: [0.9, 1.1]
163
- kernel_size: 3
 
3
  merge_method: slerp
4
  tokenizer_source: base
5
 
6
+ # Slices Configuration
7
  slices:
8
  - sources:
9
  - model: bamec66557/VICIOUS_MESH-12B-OMEGA
10
+ layer_range: [0, 10]
11
  - model: bamec66557/VICIOUS_MESH-12B-BETA
12
+ layer_range: [0, 10]
13
  parameters:
14
  t:
15
  - name: self_attn
16
+ value: [0.5, 0.55, 0.6, 0.65, 0.7]
17
  - name: mlp
18
+ value: [1.0, 1.05, 1.1, 1.15, 1.2]
19
  - name: layer_norm
20
+ value: [0.9, 0.95, 1.0, 1.05, 1.1]
 
 
21
 
22
  - sources:
23
  - model: bamec66557/VICIOUS_MESH-12B-OMEGA
24
+ layer_range: [10, 20]
25
  - model: bamec66557/VICIOUS_MESH-12B-BETA
26
+ layer_range: [10, 20]
27
  parameters:
28
  t:
29
  - name: self_attn
30
+ value: [0.4, 0.45, 0.5, 0.55, 0.6]
31
  - name: mlp
32
+ value: [1.1, 1.15, 1.2, 1.25, 1.3]
33
  - name: layer_norm
34
+ value: [1.0, 1.05, 1.1, 1.15, 1.2]
 
 
35
 
36
  - sources:
37
  - model: bamec66557/VICIOUS_MESH-12B-OMEGA
38
+ layer_range: [20, 30]
39
  - model: bamec66557/VICIOUS_MESH-12B-BETA
40
+ layer_range: [20, 30]
41
  parameters:
42
  t:
43
  - name: self_attn
44
+ value: [0.6, 0.65, 0.7, 0.75, 0.8]
45
  - name: mlp
46
+ value: [0.9, 0.95, 1.0, 1.05, 1.1]
47
  - name: layer_norm
48
+ value: [0.85, 0.9, 0.95, 1.0, 1.05]
 
 
49
 
50
  - sources:
51
  - model: bamec66557/VICIOUS_MESH-12B-OMEGA
52
+ layer_range: [30, 40]
53
  - model: bamec66557/VICIOUS_MESH-12B-BETA
54
+ layer_range: [30, 40]
55
  parameters:
56
  t:
57
  - name: self_attn
58
+ value: [0.7, 0.75, 0.8, 0.85, 0.9]
59
  - name: mlp
60
+ value: [0.8, 0.85, 0.9, 0.95, 1.0]
61
  - name: layer_norm
62
+ value: [0.8, 0.85, 0.9, 0.95, 1.0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  # Regularization
65
  regularization:
66
+ - method: gradient_penalty
67
+ scale: 0.05 # Increased influence for gradient control
68
  - method: weight_clipping
69
+ clip_range: [-0.2, 0.2] # Broader clipping range for flexibility
70
  - method: random_noise
71
+ scale: 0.01 # Stronger noise injection
72
  - method: attention_dropout
73
+ scale: 0.1 # Higher dropout to reduce attention fixation
 
 
74
 
75
  # Postprocessing
76
  postprocessing:
77
+ - operation: entropy_regularization
78
+ scale: 0.05 # Stronger encouragement for diverse outputs
79
  - operation: non_linear_scaling
80
  parameters:
81
  function: tanh
82
  - operation: sharpening
83
+ intensity: 0.5 # Enhanced sharpening for precise outputs
84
  - operation: gaussian_smoothing
85
+ sigma: 1.5 # Increased smoothing for stable outputs
86
  - operation: normalize
87
  - operation: dynamic_scaling
88
+ scale_range: [0.8, 1.2] # Expanded dynamic range for scaling
89
  - operation: smoothing
90
  parameters:
91
  adaptive: true
92
+ range: [0.85, 1.15] # Wider adaptive smoothing range
93
+ kernel_size: 5
model-00001-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94eb310d8e6a586cd8941b6b4f89fda811928dc3ee0a0b3493521c0132573f72
3
  size 4865489336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:606929c8ca1b67717338cc8a043d291796649691a67833de1c547f17f105b2ea
3
  size 4865489336
model-00002-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f9f2ad97fdfffc4fea4ad87595ce2c859dcc20140959b964649d8bcd783bec4
3
  size 4907529456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d7ed0e2c571a185bafd363f79ddb85d760c1955008863eabef4fff22c42aa03
3
  size 4907529456