NicoNico commited on
Commit
1838e48
1 Parent(s): 8e2882c

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -5,8 +5,8 @@ tags:
5
  ---
6
 
7
  # GreenBitAI/Qwen-1.5-7B-Chat-layer-mix-bpw-3.0-mlx
8
- This quantized low-bit model was converted to MLX format from [`GreenBitAI/Qwen-1.5-7B-Chat-layer-mix-bpw-3.0`]().
9
- Refer to the [original model card](https://huggingface.co/GreenBitAI/Qwen-1.5-7B-Chat-layer-mix-bpw-3.0) for more details on the model.
10
  ## Use with mlx
11
 
12
  ```bash
 
5
  ---
6
 
7
  # GreenBitAI/Qwen-1.5-7B-Chat-layer-mix-bpw-3.0-mlx
8
+ This quantized low-bit model was converted to MLX format from [`GreenBitAI/01-Yi-6B-Chat-layer-mix-bpw-3.0`]().
9
+ Refer to the [original model card](https://huggingface.co/GreenBitAI/01-Yi-6B-Chat-layer-mix-bpw-3.0) for more details on the model.
10
  ## Use with mlx
11
 
12
  ```bash
config.json CHANGED
@@ -1,12 +1,13 @@
1
  {
2
  "add_cross_attention": false,
3
  "architectures": [
4
- "Qwen2ForCausalLM"
5
  ],
 
6
  "attention_dropout": 0.0,
7
  "bad_words_ids": null,
8
  "begin_suppress_tokens": null,
9
- "bos_token_id": 151643,
10
  "chunk_size_feed_forward": 0,
11
  "cross_attention_hidden_size": null,
12
  "decoder_start_token_id": null,
@@ -14,7 +15,7 @@
14
  "do_sample": false,
15
  "early_stopping": false,
16
  "encoder_no_repeat_ngram_size": 0,
17
- "eos_token_id": 151645,
18
  "exponential_decay_length_penalty": null,
19
  "finetuning_task": null,
20
  "forced_bos_token_id": null,
@@ -35,22 +36,22 @@
35
  },
36
  "length_penalty": 1.0,
37
  "max_length": 20,
38
- "max_position_embeddings": 32768,
39
- "max_window_layers": 28,
40
  "min_length": 0,
41
- "model_type": "qwen2",
42
  "no_repeat_ngram_size": 0,
43
  "num_attention_heads": 32,
44
  "num_beam_groups": 1,
45
  "num_beams": 1,
46
  "num_hidden_layers": 32,
47
- "num_key_value_heads": 32,
48
  "num_return_sequences": 1,
49
  "output_attentions": false,
50
  "output_hidden_states": false,
51
  "output_scores": false,
52
  "pad_token_id": null,
53
  "prefix": null,
 
54
  "problem_type": null,
55
  "pruned_heads": {},
56
  "quantization": {
@@ -61,10 +62,10 @@
61
  "repetition_penalty": 1.0,
62
  "return_dict": true,
63
  "return_dict_in_generate": false,
64
- "rms_norm_eps": 1e-06,
65
- "rope_theta": 1000000.0,
 
66
  "sep_token_id": null,
67
- "sliding_window": 32768,
68
  "suppress_tokens": null,
69
  "task_specific_params": null,
70
  "temperature": 1.0,
@@ -80,6 +81,5 @@
80
  "typical_p": 1.0,
81
  "use_bfloat16": false,
82
  "use_cache": true,
83
- "use_sliding_window": false,
84
- "vocab_size": 151936
85
  }
 
1
  {
2
  "add_cross_attention": false,
3
  "architectures": [
4
+ "LlamaForCausalLM"
5
  ],
6
+ "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bad_words_ids": null,
9
  "begin_suppress_tokens": null,
10
+ "bos_token_id": 1,
11
  "chunk_size_feed_forward": 0,
12
  "cross_attention_hidden_size": null,
13
  "decoder_start_token_id": null,
 
15
  "do_sample": false,
16
  "early_stopping": false,
17
  "encoder_no_repeat_ngram_size": 0,
18
+ "eos_token_id": 2,
19
  "exponential_decay_length_penalty": null,
20
  "finetuning_task": null,
21
  "forced_bos_token_id": null,
 
36
  },
37
  "length_penalty": 1.0,
38
  "max_length": 20,
39
+ "max_position_embeddings": 4096,
 
40
  "min_length": 0,
41
+ "model_type": "llama",
42
  "no_repeat_ngram_size": 0,
43
  "num_attention_heads": 32,
44
  "num_beam_groups": 1,
45
  "num_beams": 1,
46
  "num_hidden_layers": 32,
47
+ "num_key_value_heads": 4,
48
  "num_return_sequences": 1,
49
  "output_attentions": false,
50
  "output_hidden_states": false,
51
  "output_scores": false,
52
  "pad_token_id": null,
53
  "prefix": null,
54
+ "pretraining_tp": 1,
55
  "problem_type": null,
56
  "pruned_heads": {},
57
  "quantization": {
 
62
  "repetition_penalty": 1.0,
63
  "return_dict": true,
64
  "return_dict_in_generate": false,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": null,
67
+ "rope_theta": 5000000.0,
68
  "sep_token_id": null,
 
69
  "suppress_tokens": null,
70
  "task_specific_params": null,
71
  "temperature": 1.0,
 
81
  "typical_p": 1.0,
82
  "use_bfloat16": false,
83
  "use_cache": true,
84
+ "vocab_size": 64000
 
85
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe3b05b579c86f2ae5522c22941fffdab484da14633a47a6d40624334c1892dd
3
- size 5191621850
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21a8d50bdf8fbd1540698c8f71db74b9de56d6a375c5d676122c490a05b15432
3
+ size 3358077486
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 5191475200
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model.safetensors",
@@ -22,7 +22,6 @@
22
  "model.layers.0.mlp.up_proj.scales": "model.safetensors",
23
  "model.layers.0.mlp.up_proj.zeros": "model.safetensors",
24
  "model.layers.0.post_attention_layernorm.weight": "model.safetensors",
25
- "model.layers.0.self_attn.k_proj.bias": "model.safetensors",
26
  "model.layers.0.self_attn.k_proj.channel_scale": "model.safetensors",
27
  "model.layers.0.self_attn.k_proj.q_perm": "model.safetensors",
28
  "model.layers.0.self_attn.k_proj.qweight": "model.safetensors",
@@ -33,13 +32,11 @@
33
  "model.layers.0.self_attn.o_proj.qweight": "model.safetensors",
34
  "model.layers.0.self_attn.o_proj.scales": "model.safetensors",
35
  "model.layers.0.self_attn.o_proj.zeros": "model.safetensors",
36
- "model.layers.0.self_attn.q_proj.bias": "model.safetensors",
37
  "model.layers.0.self_attn.q_proj.channel_scale": "model.safetensors",
38
  "model.layers.0.self_attn.q_proj.q_perm": "model.safetensors",
39
  "model.layers.0.self_attn.q_proj.qweight": "model.safetensors",
40
  "model.layers.0.self_attn.q_proj.scales": "model.safetensors",
41
  "model.layers.0.self_attn.q_proj.zeros": "model.safetensors",
42
- "model.layers.0.self_attn.v_proj.bias": "model.safetensors",
43
  "model.layers.0.self_attn.v_proj.channel_scale": "model.safetensors",
44
  "model.layers.0.self_attn.v_proj.q_perm": "model.safetensors",
45
  "model.layers.0.self_attn.v_proj.qweight": "model.safetensors",
@@ -62,7 +59,6 @@
62
  "model.layers.1.mlp.up_proj.scales": "model.safetensors",
63
  "model.layers.1.mlp.up_proj.zeros": "model.safetensors",
64
  "model.layers.1.post_attention_layernorm.weight": "model.safetensors",
65
- "model.layers.1.self_attn.k_proj.bias": "model.safetensors",
66
  "model.layers.1.self_attn.k_proj.channel_scale": "model.safetensors",
67
  "model.layers.1.self_attn.k_proj.q_perm": "model.safetensors",
68
  "model.layers.1.self_attn.k_proj.qweight": "model.safetensors",
@@ -73,13 +69,11 @@
73
  "model.layers.1.self_attn.o_proj.qweight": "model.safetensors",
74
  "model.layers.1.self_attn.o_proj.scales": "model.safetensors",
75
  "model.layers.1.self_attn.o_proj.zeros": "model.safetensors",
76
- "model.layers.1.self_attn.q_proj.bias": "model.safetensors",
77
  "model.layers.1.self_attn.q_proj.channel_scale": "model.safetensors",
78
  "model.layers.1.self_attn.q_proj.q_perm": "model.safetensors",
79
  "model.layers.1.self_attn.q_proj.qweight": "model.safetensors",
80
  "model.layers.1.self_attn.q_proj.scales": "model.safetensors",
81
  "model.layers.1.self_attn.q_proj.zeros": "model.safetensors",
82
- "model.layers.1.self_attn.v_proj.bias": "model.safetensors",
83
  "model.layers.1.self_attn.v_proj.channel_scale": "model.safetensors",
84
  "model.layers.1.self_attn.v_proj.q_perm": "model.safetensors",
85
  "model.layers.1.self_attn.v_proj.qweight": "model.safetensors",
@@ -102,7 +96,6 @@
102
  "model.layers.10.mlp.up_proj.scales": "model.safetensors",
103
  "model.layers.10.mlp.up_proj.zeros": "model.safetensors",
104
  "model.layers.10.post_attention_layernorm.weight": "model.safetensors",
105
- "model.layers.10.self_attn.k_proj.bias": "model.safetensors",
106
  "model.layers.10.self_attn.k_proj.channel_scale": "model.safetensors",
107
  "model.layers.10.self_attn.k_proj.q_perm": "model.safetensors",
108
  "model.layers.10.self_attn.k_proj.qweight": "model.safetensors",
@@ -113,13 +106,11 @@
113
  "model.layers.10.self_attn.o_proj.qweight": "model.safetensors",
114
  "model.layers.10.self_attn.o_proj.scales": "model.safetensors",
115
  "model.layers.10.self_attn.o_proj.zeros": "model.safetensors",
116
- "model.layers.10.self_attn.q_proj.bias": "model.safetensors",
117
  "model.layers.10.self_attn.q_proj.channel_scale": "model.safetensors",
118
  "model.layers.10.self_attn.q_proj.q_perm": "model.safetensors",
119
  "model.layers.10.self_attn.q_proj.qweight": "model.safetensors",
120
  "model.layers.10.self_attn.q_proj.scales": "model.safetensors",
121
  "model.layers.10.self_attn.q_proj.zeros": "model.safetensors",
122
- "model.layers.10.self_attn.v_proj.bias": "model.safetensors",
123
  "model.layers.10.self_attn.v_proj.channel_scale": "model.safetensors",
124
  "model.layers.10.self_attn.v_proj.q_perm": "model.safetensors",
125
  "model.layers.10.self_attn.v_proj.qweight": "model.safetensors",
@@ -142,7 +133,6 @@
142
  "model.layers.11.mlp.up_proj.scales": "model.safetensors",
143
  "model.layers.11.mlp.up_proj.zeros": "model.safetensors",
144
  "model.layers.11.post_attention_layernorm.weight": "model.safetensors",
145
- "model.layers.11.self_attn.k_proj.bias": "model.safetensors",
146
  "model.layers.11.self_attn.k_proj.channel_scale": "model.safetensors",
147
  "model.layers.11.self_attn.k_proj.q_perm": "model.safetensors",
148
  "model.layers.11.self_attn.k_proj.qweight": "model.safetensors",
@@ -153,13 +143,11 @@
153
  "model.layers.11.self_attn.o_proj.qweight": "model.safetensors",
154
  "model.layers.11.self_attn.o_proj.scales": "model.safetensors",
155
  "model.layers.11.self_attn.o_proj.zeros": "model.safetensors",
156
- "model.layers.11.self_attn.q_proj.bias": "model.safetensors",
157
  "model.layers.11.self_attn.q_proj.channel_scale": "model.safetensors",
158
  "model.layers.11.self_attn.q_proj.q_perm": "model.safetensors",
159
  "model.layers.11.self_attn.q_proj.qweight": "model.safetensors",
160
  "model.layers.11.self_attn.q_proj.scales": "model.safetensors",
161
  "model.layers.11.self_attn.q_proj.zeros": "model.safetensors",
162
- "model.layers.11.self_attn.v_proj.bias": "model.safetensors",
163
  "model.layers.11.self_attn.v_proj.channel_scale": "model.safetensors",
164
  "model.layers.11.self_attn.v_proj.q_perm": "model.safetensors",
165
  "model.layers.11.self_attn.v_proj.qweight": "model.safetensors",
@@ -182,7 +170,6 @@
182
  "model.layers.12.mlp.up_proj.scales": "model.safetensors",
183
  "model.layers.12.mlp.up_proj.zeros": "model.safetensors",
184
  "model.layers.12.post_attention_layernorm.weight": "model.safetensors",
185
- "model.layers.12.self_attn.k_proj.bias": "model.safetensors",
186
  "model.layers.12.self_attn.k_proj.channel_scale": "model.safetensors",
187
  "model.layers.12.self_attn.k_proj.q_perm": "model.safetensors",
188
  "model.layers.12.self_attn.k_proj.qweight": "model.safetensors",
@@ -193,13 +180,11 @@
193
  "model.layers.12.self_attn.o_proj.qweight": "model.safetensors",
194
  "model.layers.12.self_attn.o_proj.scales": "model.safetensors",
195
  "model.layers.12.self_attn.o_proj.zeros": "model.safetensors",
196
- "model.layers.12.self_attn.q_proj.bias": "model.safetensors",
197
  "model.layers.12.self_attn.q_proj.channel_scale": "model.safetensors",
198
  "model.layers.12.self_attn.q_proj.q_perm": "model.safetensors",
199
  "model.layers.12.self_attn.q_proj.qweight": "model.safetensors",
200
  "model.layers.12.self_attn.q_proj.scales": "model.safetensors",
201
  "model.layers.12.self_attn.q_proj.zeros": "model.safetensors",
202
- "model.layers.12.self_attn.v_proj.bias": "model.safetensors",
203
  "model.layers.12.self_attn.v_proj.channel_scale": "model.safetensors",
204
  "model.layers.12.self_attn.v_proj.q_perm": "model.safetensors",
205
  "model.layers.12.self_attn.v_proj.qweight": "model.safetensors",
@@ -222,7 +207,6 @@
222
  "model.layers.13.mlp.up_proj.scales": "model.safetensors",
223
  "model.layers.13.mlp.up_proj.zeros": "model.safetensors",
224
  "model.layers.13.post_attention_layernorm.weight": "model.safetensors",
225
- "model.layers.13.self_attn.k_proj.bias": "model.safetensors",
226
  "model.layers.13.self_attn.k_proj.channel_scale": "model.safetensors",
227
  "model.layers.13.self_attn.k_proj.q_perm": "model.safetensors",
228
  "model.layers.13.self_attn.k_proj.qweight": "model.safetensors",
@@ -233,13 +217,11 @@
233
  "model.layers.13.self_attn.o_proj.qweight": "model.safetensors",
234
  "model.layers.13.self_attn.o_proj.scales": "model.safetensors",
235
  "model.layers.13.self_attn.o_proj.zeros": "model.safetensors",
236
- "model.layers.13.self_attn.q_proj.bias": "model.safetensors",
237
  "model.layers.13.self_attn.q_proj.channel_scale": "model.safetensors",
238
  "model.layers.13.self_attn.q_proj.q_perm": "model.safetensors",
239
  "model.layers.13.self_attn.q_proj.qweight": "model.safetensors",
240
  "model.layers.13.self_attn.q_proj.scales": "model.safetensors",
241
  "model.layers.13.self_attn.q_proj.zeros": "model.safetensors",
242
- "model.layers.13.self_attn.v_proj.bias": "model.safetensors",
243
  "model.layers.13.self_attn.v_proj.channel_scale": "model.safetensors",
244
  "model.layers.13.self_attn.v_proj.q_perm": "model.safetensors",
245
  "model.layers.13.self_attn.v_proj.qweight": "model.safetensors",
@@ -262,7 +244,6 @@
262
  "model.layers.14.mlp.up_proj.scales": "model.safetensors",
263
  "model.layers.14.mlp.up_proj.zeros": "model.safetensors",
264
  "model.layers.14.post_attention_layernorm.weight": "model.safetensors",
265
- "model.layers.14.self_attn.k_proj.bias": "model.safetensors",
266
  "model.layers.14.self_attn.k_proj.channel_scale": "model.safetensors",
267
  "model.layers.14.self_attn.k_proj.q_perm": "model.safetensors",
268
  "model.layers.14.self_attn.k_proj.qweight": "model.safetensors",
@@ -273,13 +254,11 @@
273
  "model.layers.14.self_attn.o_proj.qweight": "model.safetensors",
274
  "model.layers.14.self_attn.o_proj.scales": "model.safetensors",
275
  "model.layers.14.self_attn.o_proj.zeros": "model.safetensors",
276
- "model.layers.14.self_attn.q_proj.bias": "model.safetensors",
277
  "model.layers.14.self_attn.q_proj.channel_scale": "model.safetensors",
278
  "model.layers.14.self_attn.q_proj.q_perm": "model.safetensors",
279
  "model.layers.14.self_attn.q_proj.qweight": "model.safetensors",
280
  "model.layers.14.self_attn.q_proj.scales": "model.safetensors",
281
  "model.layers.14.self_attn.q_proj.zeros": "model.safetensors",
282
- "model.layers.14.self_attn.v_proj.bias": "model.safetensors",
283
  "model.layers.14.self_attn.v_proj.channel_scale": "model.safetensors",
284
  "model.layers.14.self_attn.v_proj.q_perm": "model.safetensors",
285
  "model.layers.14.self_attn.v_proj.qweight": "model.safetensors",
@@ -302,7 +281,6 @@
302
  "model.layers.15.mlp.up_proj.scales": "model.safetensors",
303
  "model.layers.15.mlp.up_proj.zeros": "model.safetensors",
304
  "model.layers.15.post_attention_layernorm.weight": "model.safetensors",
305
- "model.layers.15.self_attn.k_proj.bias": "model.safetensors",
306
  "model.layers.15.self_attn.k_proj.channel_scale": "model.safetensors",
307
  "model.layers.15.self_attn.k_proj.q_perm": "model.safetensors",
308
  "model.layers.15.self_attn.k_proj.qweight": "model.safetensors",
@@ -313,13 +291,11 @@
313
  "model.layers.15.self_attn.o_proj.qweight": "model.safetensors",
314
  "model.layers.15.self_attn.o_proj.scales": "model.safetensors",
315
  "model.layers.15.self_attn.o_proj.zeros": "model.safetensors",
316
- "model.layers.15.self_attn.q_proj.bias": "model.safetensors",
317
  "model.layers.15.self_attn.q_proj.channel_scale": "model.safetensors",
318
  "model.layers.15.self_attn.q_proj.q_perm": "model.safetensors",
319
  "model.layers.15.self_attn.q_proj.qweight": "model.safetensors",
320
  "model.layers.15.self_attn.q_proj.scales": "model.safetensors",
321
  "model.layers.15.self_attn.q_proj.zeros": "model.safetensors",
322
- "model.layers.15.self_attn.v_proj.bias": "model.safetensors",
323
  "model.layers.15.self_attn.v_proj.channel_scale": "model.safetensors",
324
  "model.layers.15.self_attn.v_proj.q_perm": "model.safetensors",
325
  "model.layers.15.self_attn.v_proj.qweight": "model.safetensors",
@@ -342,7 +318,6 @@
342
  "model.layers.16.mlp.up_proj.scales": "model.safetensors",
343
  "model.layers.16.mlp.up_proj.zeros": "model.safetensors",
344
  "model.layers.16.post_attention_layernorm.weight": "model.safetensors",
345
- "model.layers.16.self_attn.k_proj.bias": "model.safetensors",
346
  "model.layers.16.self_attn.k_proj.channel_scale": "model.safetensors",
347
  "model.layers.16.self_attn.k_proj.q_perm": "model.safetensors",
348
  "model.layers.16.self_attn.k_proj.qweight": "model.safetensors",
@@ -353,13 +328,11 @@
353
  "model.layers.16.self_attn.o_proj.qweight": "model.safetensors",
354
  "model.layers.16.self_attn.o_proj.scales": "model.safetensors",
355
  "model.layers.16.self_attn.o_proj.zeros": "model.safetensors",
356
- "model.layers.16.self_attn.q_proj.bias": "model.safetensors",
357
  "model.layers.16.self_attn.q_proj.channel_scale": "model.safetensors",
358
  "model.layers.16.self_attn.q_proj.q_perm": "model.safetensors",
359
  "model.layers.16.self_attn.q_proj.qweight": "model.safetensors",
360
  "model.layers.16.self_attn.q_proj.scales": "model.safetensors",
361
  "model.layers.16.self_attn.q_proj.zeros": "model.safetensors",
362
- "model.layers.16.self_attn.v_proj.bias": "model.safetensors",
363
  "model.layers.16.self_attn.v_proj.channel_scale": "model.safetensors",
364
  "model.layers.16.self_attn.v_proj.q_perm": "model.safetensors",
365
  "model.layers.16.self_attn.v_proj.qweight": "model.safetensors",
@@ -382,7 +355,6 @@
382
  "model.layers.17.mlp.up_proj.scales": "model.safetensors",
383
  "model.layers.17.mlp.up_proj.zeros": "model.safetensors",
384
  "model.layers.17.post_attention_layernorm.weight": "model.safetensors",
385
- "model.layers.17.self_attn.k_proj.bias": "model.safetensors",
386
  "model.layers.17.self_attn.k_proj.channel_scale": "model.safetensors",
387
  "model.layers.17.self_attn.k_proj.q_perm": "model.safetensors",
388
  "model.layers.17.self_attn.k_proj.qweight": "model.safetensors",
@@ -393,13 +365,11 @@
393
  "model.layers.17.self_attn.o_proj.qweight": "model.safetensors",
394
  "model.layers.17.self_attn.o_proj.scales": "model.safetensors",
395
  "model.layers.17.self_attn.o_proj.zeros": "model.safetensors",
396
- "model.layers.17.self_attn.q_proj.bias": "model.safetensors",
397
  "model.layers.17.self_attn.q_proj.channel_scale": "model.safetensors",
398
  "model.layers.17.self_attn.q_proj.q_perm": "model.safetensors",
399
  "model.layers.17.self_attn.q_proj.qweight": "model.safetensors",
400
  "model.layers.17.self_attn.q_proj.scales": "model.safetensors",
401
  "model.layers.17.self_attn.q_proj.zeros": "model.safetensors",
402
- "model.layers.17.self_attn.v_proj.bias": "model.safetensors",
403
  "model.layers.17.self_attn.v_proj.channel_scale": "model.safetensors",
404
  "model.layers.17.self_attn.v_proj.q_perm": "model.safetensors",
405
  "model.layers.17.self_attn.v_proj.qweight": "model.safetensors",
@@ -422,7 +392,6 @@
422
  "model.layers.18.mlp.up_proj.scales": "model.safetensors",
423
  "model.layers.18.mlp.up_proj.zeros": "model.safetensors",
424
  "model.layers.18.post_attention_layernorm.weight": "model.safetensors",
425
- "model.layers.18.self_attn.k_proj.bias": "model.safetensors",
426
  "model.layers.18.self_attn.k_proj.channel_scale": "model.safetensors",
427
  "model.layers.18.self_attn.k_proj.q_perm": "model.safetensors",
428
  "model.layers.18.self_attn.k_proj.qweight": "model.safetensors",
@@ -433,13 +402,11 @@
433
  "model.layers.18.self_attn.o_proj.qweight": "model.safetensors",
434
  "model.layers.18.self_attn.o_proj.scales": "model.safetensors",
435
  "model.layers.18.self_attn.o_proj.zeros": "model.safetensors",
436
- "model.layers.18.self_attn.q_proj.bias": "model.safetensors",
437
  "model.layers.18.self_attn.q_proj.channel_scale": "model.safetensors",
438
  "model.layers.18.self_attn.q_proj.q_perm": "model.safetensors",
439
  "model.layers.18.self_attn.q_proj.qweight": "model.safetensors",
440
  "model.layers.18.self_attn.q_proj.scales": "model.safetensors",
441
  "model.layers.18.self_attn.q_proj.zeros": "model.safetensors",
442
- "model.layers.18.self_attn.v_proj.bias": "model.safetensors",
443
  "model.layers.18.self_attn.v_proj.channel_scale": "model.safetensors",
444
  "model.layers.18.self_attn.v_proj.q_perm": "model.safetensors",
445
  "model.layers.18.self_attn.v_proj.qweight": "model.safetensors",
@@ -462,7 +429,6 @@
462
  "model.layers.19.mlp.up_proj.scales": "model.safetensors",
463
  "model.layers.19.mlp.up_proj.zeros": "model.safetensors",
464
  "model.layers.19.post_attention_layernorm.weight": "model.safetensors",
465
- "model.layers.19.self_attn.k_proj.bias": "model.safetensors",
466
  "model.layers.19.self_attn.k_proj.channel_scale": "model.safetensors",
467
  "model.layers.19.self_attn.k_proj.q_perm": "model.safetensors",
468
  "model.layers.19.self_attn.k_proj.qweight": "model.safetensors",
@@ -473,13 +439,11 @@
473
  "model.layers.19.self_attn.o_proj.qweight": "model.safetensors",
474
  "model.layers.19.self_attn.o_proj.scales": "model.safetensors",
475
  "model.layers.19.self_attn.o_proj.zeros": "model.safetensors",
476
- "model.layers.19.self_attn.q_proj.bias": "model.safetensors",
477
  "model.layers.19.self_attn.q_proj.channel_scale": "model.safetensors",
478
  "model.layers.19.self_attn.q_proj.q_perm": "model.safetensors",
479
  "model.layers.19.self_attn.q_proj.qweight": "model.safetensors",
480
  "model.layers.19.self_attn.q_proj.scales": "model.safetensors",
481
  "model.layers.19.self_attn.q_proj.zeros": "model.safetensors",
482
- "model.layers.19.self_attn.v_proj.bias": "model.safetensors",
483
  "model.layers.19.self_attn.v_proj.channel_scale": "model.safetensors",
484
  "model.layers.19.self_attn.v_proj.q_perm": "model.safetensors",
485
  "model.layers.19.self_attn.v_proj.qweight": "model.safetensors",
@@ -502,7 +466,6 @@
502
  "model.layers.2.mlp.up_proj.scales": "model.safetensors",
503
  "model.layers.2.mlp.up_proj.zeros": "model.safetensors",
504
  "model.layers.2.post_attention_layernorm.weight": "model.safetensors",
505
- "model.layers.2.self_attn.k_proj.bias": "model.safetensors",
506
  "model.layers.2.self_attn.k_proj.channel_scale": "model.safetensors",
507
  "model.layers.2.self_attn.k_proj.q_perm": "model.safetensors",
508
  "model.layers.2.self_attn.k_proj.qweight": "model.safetensors",
@@ -513,13 +476,11 @@
513
  "model.layers.2.self_attn.o_proj.qweight": "model.safetensors",
514
  "model.layers.2.self_attn.o_proj.scales": "model.safetensors",
515
  "model.layers.2.self_attn.o_proj.zeros": "model.safetensors",
516
- "model.layers.2.self_attn.q_proj.bias": "model.safetensors",
517
  "model.layers.2.self_attn.q_proj.channel_scale": "model.safetensors",
518
  "model.layers.2.self_attn.q_proj.q_perm": "model.safetensors",
519
  "model.layers.2.self_attn.q_proj.qweight": "model.safetensors",
520
  "model.layers.2.self_attn.q_proj.scales": "model.safetensors",
521
  "model.layers.2.self_attn.q_proj.zeros": "model.safetensors",
522
- "model.layers.2.self_attn.v_proj.bias": "model.safetensors",
523
  "model.layers.2.self_attn.v_proj.channel_scale": "model.safetensors",
524
  "model.layers.2.self_attn.v_proj.q_perm": "model.safetensors",
525
  "model.layers.2.self_attn.v_proj.qweight": "model.safetensors",
@@ -542,7 +503,6 @@
542
  "model.layers.20.mlp.up_proj.scales": "model.safetensors",
543
  "model.layers.20.mlp.up_proj.zeros": "model.safetensors",
544
  "model.layers.20.post_attention_layernorm.weight": "model.safetensors",
545
- "model.layers.20.self_attn.k_proj.bias": "model.safetensors",
546
  "model.layers.20.self_attn.k_proj.channel_scale": "model.safetensors",
547
  "model.layers.20.self_attn.k_proj.q_perm": "model.safetensors",
548
  "model.layers.20.self_attn.k_proj.qweight": "model.safetensors",
@@ -553,13 +513,11 @@
553
  "model.layers.20.self_attn.o_proj.qweight": "model.safetensors",
554
  "model.layers.20.self_attn.o_proj.scales": "model.safetensors",
555
  "model.layers.20.self_attn.o_proj.zeros": "model.safetensors",
556
- "model.layers.20.self_attn.q_proj.bias": "model.safetensors",
557
  "model.layers.20.self_attn.q_proj.channel_scale": "model.safetensors",
558
  "model.layers.20.self_attn.q_proj.q_perm": "model.safetensors",
559
  "model.layers.20.self_attn.q_proj.qweight": "model.safetensors",
560
  "model.layers.20.self_attn.q_proj.scales": "model.safetensors",
561
  "model.layers.20.self_attn.q_proj.zeros": "model.safetensors",
562
- "model.layers.20.self_attn.v_proj.bias": "model.safetensors",
563
  "model.layers.20.self_attn.v_proj.channel_scale": "model.safetensors",
564
  "model.layers.20.self_attn.v_proj.q_perm": "model.safetensors",
565
  "model.layers.20.self_attn.v_proj.qweight": "model.safetensors",
@@ -582,7 +540,6 @@
582
  "model.layers.21.mlp.up_proj.scales": "model.safetensors",
583
  "model.layers.21.mlp.up_proj.zeros": "model.safetensors",
584
  "model.layers.21.post_attention_layernorm.weight": "model.safetensors",
585
- "model.layers.21.self_attn.k_proj.bias": "model.safetensors",
586
  "model.layers.21.self_attn.k_proj.channel_scale": "model.safetensors",
587
  "model.layers.21.self_attn.k_proj.q_perm": "model.safetensors",
588
  "model.layers.21.self_attn.k_proj.qweight": "model.safetensors",
@@ -593,13 +550,11 @@
593
  "model.layers.21.self_attn.o_proj.qweight": "model.safetensors",
594
  "model.layers.21.self_attn.o_proj.scales": "model.safetensors",
595
  "model.layers.21.self_attn.o_proj.zeros": "model.safetensors",
596
- "model.layers.21.self_attn.q_proj.bias": "model.safetensors",
597
  "model.layers.21.self_attn.q_proj.channel_scale": "model.safetensors",
598
  "model.layers.21.self_attn.q_proj.q_perm": "model.safetensors",
599
  "model.layers.21.self_attn.q_proj.qweight": "model.safetensors",
600
  "model.layers.21.self_attn.q_proj.scales": "model.safetensors",
601
  "model.layers.21.self_attn.q_proj.zeros": "model.safetensors",
602
- "model.layers.21.self_attn.v_proj.bias": "model.safetensors",
603
  "model.layers.21.self_attn.v_proj.channel_scale": "model.safetensors",
604
  "model.layers.21.self_attn.v_proj.q_perm": "model.safetensors",
605
  "model.layers.21.self_attn.v_proj.qweight": "model.safetensors",
@@ -622,7 +577,6 @@
622
  "model.layers.22.mlp.up_proj.scales": "model.safetensors",
623
  "model.layers.22.mlp.up_proj.zeros": "model.safetensors",
624
  "model.layers.22.post_attention_layernorm.weight": "model.safetensors",
625
- "model.layers.22.self_attn.k_proj.bias": "model.safetensors",
626
  "model.layers.22.self_attn.k_proj.channel_scale": "model.safetensors",
627
  "model.layers.22.self_attn.k_proj.q_perm": "model.safetensors",
628
  "model.layers.22.self_attn.k_proj.qweight": "model.safetensors",
@@ -633,13 +587,11 @@
633
  "model.layers.22.self_attn.o_proj.qweight": "model.safetensors",
634
  "model.layers.22.self_attn.o_proj.scales": "model.safetensors",
635
  "model.layers.22.self_attn.o_proj.zeros": "model.safetensors",
636
- "model.layers.22.self_attn.q_proj.bias": "model.safetensors",
637
  "model.layers.22.self_attn.q_proj.channel_scale": "model.safetensors",
638
  "model.layers.22.self_attn.q_proj.q_perm": "model.safetensors",
639
  "model.layers.22.self_attn.q_proj.qweight": "model.safetensors",
640
  "model.layers.22.self_attn.q_proj.scales": "model.safetensors",
641
  "model.layers.22.self_attn.q_proj.zeros": "model.safetensors",
642
- "model.layers.22.self_attn.v_proj.bias": "model.safetensors",
643
  "model.layers.22.self_attn.v_proj.channel_scale": "model.safetensors",
644
  "model.layers.22.self_attn.v_proj.q_perm": "model.safetensors",
645
  "model.layers.22.self_attn.v_proj.qweight": "model.safetensors",
@@ -662,7 +614,6 @@
662
  "model.layers.23.mlp.up_proj.scales": "model.safetensors",
663
  "model.layers.23.mlp.up_proj.zeros": "model.safetensors",
664
  "model.layers.23.post_attention_layernorm.weight": "model.safetensors",
665
- "model.layers.23.self_attn.k_proj.bias": "model.safetensors",
666
  "model.layers.23.self_attn.k_proj.channel_scale": "model.safetensors",
667
  "model.layers.23.self_attn.k_proj.q_perm": "model.safetensors",
668
  "model.layers.23.self_attn.k_proj.qweight": "model.safetensors",
@@ -673,13 +624,11 @@
673
  "model.layers.23.self_attn.o_proj.qweight": "model.safetensors",
674
  "model.layers.23.self_attn.o_proj.scales": "model.safetensors",
675
  "model.layers.23.self_attn.o_proj.zeros": "model.safetensors",
676
- "model.layers.23.self_attn.q_proj.bias": "model.safetensors",
677
  "model.layers.23.self_attn.q_proj.channel_scale": "model.safetensors",
678
  "model.layers.23.self_attn.q_proj.q_perm": "model.safetensors",
679
  "model.layers.23.self_attn.q_proj.qweight": "model.safetensors",
680
  "model.layers.23.self_attn.q_proj.scales": "model.safetensors",
681
  "model.layers.23.self_attn.q_proj.zeros": "model.safetensors",
682
- "model.layers.23.self_attn.v_proj.bias": "model.safetensors",
683
  "model.layers.23.self_attn.v_proj.channel_scale": "model.safetensors",
684
  "model.layers.23.self_attn.v_proj.q_perm": "model.safetensors",
685
  "model.layers.23.self_attn.v_proj.qweight": "model.safetensors",
@@ -702,7 +651,6 @@
702
  "model.layers.24.mlp.up_proj.scales": "model.safetensors",
703
  "model.layers.24.mlp.up_proj.zeros": "model.safetensors",
704
  "model.layers.24.post_attention_layernorm.weight": "model.safetensors",
705
- "model.layers.24.self_attn.k_proj.bias": "model.safetensors",
706
  "model.layers.24.self_attn.k_proj.channel_scale": "model.safetensors",
707
  "model.layers.24.self_attn.k_proj.q_perm": "model.safetensors",
708
  "model.layers.24.self_attn.k_proj.qweight": "model.safetensors",
@@ -713,13 +661,11 @@
713
  "model.layers.24.self_attn.o_proj.qweight": "model.safetensors",
714
  "model.layers.24.self_attn.o_proj.scales": "model.safetensors",
715
  "model.layers.24.self_attn.o_proj.zeros": "model.safetensors",
716
- "model.layers.24.self_attn.q_proj.bias": "model.safetensors",
717
  "model.layers.24.self_attn.q_proj.channel_scale": "model.safetensors",
718
  "model.layers.24.self_attn.q_proj.q_perm": "model.safetensors",
719
  "model.layers.24.self_attn.q_proj.qweight": "model.safetensors",
720
  "model.layers.24.self_attn.q_proj.scales": "model.safetensors",
721
  "model.layers.24.self_attn.q_proj.zeros": "model.safetensors",
722
- "model.layers.24.self_attn.v_proj.bias": "model.safetensors",
723
  "model.layers.24.self_attn.v_proj.channel_scale": "model.safetensors",
724
  "model.layers.24.self_attn.v_proj.q_perm": "model.safetensors",
725
  "model.layers.24.self_attn.v_proj.qweight": "model.safetensors",
@@ -742,7 +688,6 @@
742
  "model.layers.25.mlp.up_proj.scales": "model.safetensors",
743
  "model.layers.25.mlp.up_proj.zeros": "model.safetensors",
744
  "model.layers.25.post_attention_layernorm.weight": "model.safetensors",
745
- "model.layers.25.self_attn.k_proj.bias": "model.safetensors",
746
  "model.layers.25.self_attn.k_proj.channel_scale": "model.safetensors",
747
  "model.layers.25.self_attn.k_proj.q_perm": "model.safetensors",
748
  "model.layers.25.self_attn.k_proj.qweight": "model.safetensors",
@@ -753,13 +698,11 @@
753
  "model.layers.25.self_attn.o_proj.qweight": "model.safetensors",
754
  "model.layers.25.self_attn.o_proj.scales": "model.safetensors",
755
  "model.layers.25.self_attn.o_proj.zeros": "model.safetensors",
756
- "model.layers.25.self_attn.q_proj.bias": "model.safetensors",
757
  "model.layers.25.self_attn.q_proj.channel_scale": "model.safetensors",
758
  "model.layers.25.self_attn.q_proj.q_perm": "model.safetensors",
759
  "model.layers.25.self_attn.q_proj.qweight": "model.safetensors",
760
  "model.layers.25.self_attn.q_proj.scales": "model.safetensors",
761
  "model.layers.25.self_attn.q_proj.zeros": "model.safetensors",
762
- "model.layers.25.self_attn.v_proj.bias": "model.safetensors",
763
  "model.layers.25.self_attn.v_proj.channel_scale": "model.safetensors",
764
  "model.layers.25.self_attn.v_proj.q_perm": "model.safetensors",
765
  "model.layers.25.self_attn.v_proj.qweight": "model.safetensors",
@@ -782,7 +725,6 @@
782
  "model.layers.26.mlp.up_proj.scales": "model.safetensors",
783
  "model.layers.26.mlp.up_proj.zeros": "model.safetensors",
784
  "model.layers.26.post_attention_layernorm.weight": "model.safetensors",
785
- "model.layers.26.self_attn.k_proj.bias": "model.safetensors",
786
  "model.layers.26.self_attn.k_proj.channel_scale": "model.safetensors",
787
  "model.layers.26.self_attn.k_proj.q_perm": "model.safetensors",
788
  "model.layers.26.self_attn.k_proj.qweight": "model.safetensors",
@@ -793,13 +735,11 @@
793
  "model.layers.26.self_attn.o_proj.qweight": "model.safetensors",
794
  "model.layers.26.self_attn.o_proj.scales": "model.safetensors",
795
  "model.layers.26.self_attn.o_proj.zeros": "model.safetensors",
796
- "model.layers.26.self_attn.q_proj.bias": "model.safetensors",
797
  "model.layers.26.self_attn.q_proj.channel_scale": "model.safetensors",
798
  "model.layers.26.self_attn.q_proj.q_perm": "model.safetensors",
799
  "model.layers.26.self_attn.q_proj.qweight": "model.safetensors",
800
  "model.layers.26.self_attn.q_proj.scales": "model.safetensors",
801
  "model.layers.26.self_attn.q_proj.zeros": "model.safetensors",
802
- "model.layers.26.self_attn.v_proj.bias": "model.safetensors",
803
  "model.layers.26.self_attn.v_proj.channel_scale": "model.safetensors",
804
  "model.layers.26.self_attn.v_proj.q_perm": "model.safetensors",
805
  "model.layers.26.self_attn.v_proj.qweight": "model.safetensors",
@@ -822,7 +762,6 @@
822
  "model.layers.27.mlp.up_proj.scales": "model.safetensors",
823
  "model.layers.27.mlp.up_proj.zeros": "model.safetensors",
824
  "model.layers.27.post_attention_layernorm.weight": "model.safetensors",
825
- "model.layers.27.self_attn.k_proj.bias": "model.safetensors",
826
  "model.layers.27.self_attn.k_proj.channel_scale": "model.safetensors",
827
  "model.layers.27.self_attn.k_proj.q_perm": "model.safetensors",
828
  "model.layers.27.self_attn.k_proj.qweight": "model.safetensors",
@@ -833,13 +772,11 @@
833
  "model.layers.27.self_attn.o_proj.qweight": "model.safetensors",
834
  "model.layers.27.self_attn.o_proj.scales": "model.safetensors",
835
  "model.layers.27.self_attn.o_proj.zeros": "model.safetensors",
836
- "model.layers.27.self_attn.q_proj.bias": "model.safetensors",
837
  "model.layers.27.self_attn.q_proj.channel_scale": "model.safetensors",
838
  "model.layers.27.self_attn.q_proj.q_perm": "model.safetensors",
839
  "model.layers.27.self_attn.q_proj.qweight": "model.safetensors",
840
  "model.layers.27.self_attn.q_proj.scales": "model.safetensors",
841
  "model.layers.27.self_attn.q_proj.zeros": "model.safetensors",
842
- "model.layers.27.self_attn.v_proj.bias": "model.safetensors",
843
  "model.layers.27.self_attn.v_proj.channel_scale": "model.safetensors",
844
  "model.layers.27.self_attn.v_proj.q_perm": "model.safetensors",
845
  "model.layers.27.self_attn.v_proj.qweight": "model.safetensors",
@@ -862,7 +799,6 @@
862
  "model.layers.28.mlp.up_proj.scales": "model.safetensors",
863
  "model.layers.28.mlp.up_proj.zeros": "model.safetensors",
864
  "model.layers.28.post_attention_layernorm.weight": "model.safetensors",
865
- "model.layers.28.self_attn.k_proj.bias": "model.safetensors",
866
  "model.layers.28.self_attn.k_proj.channel_scale": "model.safetensors",
867
  "model.layers.28.self_attn.k_proj.q_perm": "model.safetensors",
868
  "model.layers.28.self_attn.k_proj.qweight": "model.safetensors",
@@ -873,13 +809,11 @@
873
  "model.layers.28.self_attn.o_proj.qweight": "model.safetensors",
874
  "model.layers.28.self_attn.o_proj.scales": "model.safetensors",
875
  "model.layers.28.self_attn.o_proj.zeros": "model.safetensors",
876
- "model.layers.28.self_attn.q_proj.bias": "model.safetensors",
877
  "model.layers.28.self_attn.q_proj.channel_scale": "model.safetensors",
878
  "model.layers.28.self_attn.q_proj.q_perm": "model.safetensors",
879
  "model.layers.28.self_attn.q_proj.qweight": "model.safetensors",
880
  "model.layers.28.self_attn.q_proj.scales": "model.safetensors",
881
  "model.layers.28.self_attn.q_proj.zeros": "model.safetensors",
882
- "model.layers.28.self_attn.v_proj.bias": "model.safetensors",
883
  "model.layers.28.self_attn.v_proj.channel_scale": "model.safetensors",
884
  "model.layers.28.self_attn.v_proj.q_perm": "model.safetensors",
885
  "model.layers.28.self_attn.v_proj.qweight": "model.safetensors",
@@ -902,7 +836,6 @@
902
  "model.layers.29.mlp.up_proj.scales": "model.safetensors",
903
  "model.layers.29.mlp.up_proj.zeros": "model.safetensors",
904
  "model.layers.29.post_attention_layernorm.weight": "model.safetensors",
905
- "model.layers.29.self_attn.k_proj.bias": "model.safetensors",
906
  "model.layers.29.self_attn.k_proj.channel_scale": "model.safetensors",
907
  "model.layers.29.self_attn.k_proj.q_perm": "model.safetensors",
908
  "model.layers.29.self_attn.k_proj.qweight": "model.safetensors",
@@ -913,13 +846,11 @@
913
  "model.layers.29.self_attn.o_proj.qweight": "model.safetensors",
914
  "model.layers.29.self_attn.o_proj.scales": "model.safetensors",
915
  "model.layers.29.self_attn.o_proj.zeros": "model.safetensors",
916
- "model.layers.29.self_attn.q_proj.bias": "model.safetensors",
917
  "model.layers.29.self_attn.q_proj.channel_scale": "model.safetensors",
918
  "model.layers.29.self_attn.q_proj.q_perm": "model.safetensors",
919
  "model.layers.29.self_attn.q_proj.qweight": "model.safetensors",
920
  "model.layers.29.self_attn.q_proj.scales": "model.safetensors",
921
  "model.layers.29.self_attn.q_proj.zeros": "model.safetensors",
922
- "model.layers.29.self_attn.v_proj.bias": "model.safetensors",
923
  "model.layers.29.self_attn.v_proj.channel_scale": "model.safetensors",
924
  "model.layers.29.self_attn.v_proj.q_perm": "model.safetensors",
925
  "model.layers.29.self_attn.v_proj.qweight": "model.safetensors",
@@ -942,7 +873,6 @@
942
  "model.layers.3.mlp.up_proj.scales": "model.safetensors",
943
  "model.layers.3.mlp.up_proj.zeros": "model.safetensors",
944
  "model.layers.3.post_attention_layernorm.weight": "model.safetensors",
945
- "model.layers.3.self_attn.k_proj.bias": "model.safetensors",
946
  "model.layers.3.self_attn.k_proj.channel_scale": "model.safetensors",
947
  "model.layers.3.self_attn.k_proj.q_perm": "model.safetensors",
948
  "model.layers.3.self_attn.k_proj.qweight": "model.safetensors",
@@ -953,13 +883,11 @@
953
  "model.layers.3.self_attn.o_proj.qweight": "model.safetensors",
954
  "model.layers.3.self_attn.o_proj.scales": "model.safetensors",
955
  "model.layers.3.self_attn.o_proj.zeros": "model.safetensors",
956
- "model.layers.3.self_attn.q_proj.bias": "model.safetensors",
957
  "model.layers.3.self_attn.q_proj.channel_scale": "model.safetensors",
958
  "model.layers.3.self_attn.q_proj.q_perm": "model.safetensors",
959
  "model.layers.3.self_attn.q_proj.qweight": "model.safetensors",
960
  "model.layers.3.self_attn.q_proj.scales": "model.safetensors",
961
  "model.layers.3.self_attn.q_proj.zeros": "model.safetensors",
962
- "model.layers.3.self_attn.v_proj.bias": "model.safetensors",
963
  "model.layers.3.self_attn.v_proj.channel_scale": "model.safetensors",
964
  "model.layers.3.self_attn.v_proj.q_perm": "model.safetensors",
965
  "model.layers.3.self_attn.v_proj.qweight": "model.safetensors",
@@ -982,7 +910,6 @@
982
  "model.layers.30.mlp.up_proj.scales": "model.safetensors",
983
  "model.layers.30.mlp.up_proj.zeros": "model.safetensors",
984
  "model.layers.30.post_attention_layernorm.weight": "model.safetensors",
985
- "model.layers.30.self_attn.k_proj.bias": "model.safetensors",
986
  "model.layers.30.self_attn.k_proj.channel_scale": "model.safetensors",
987
  "model.layers.30.self_attn.k_proj.q_perm": "model.safetensors",
988
  "model.layers.30.self_attn.k_proj.qweight": "model.safetensors",
@@ -993,13 +920,11 @@
993
  "model.layers.30.self_attn.o_proj.qweight": "model.safetensors",
994
  "model.layers.30.self_attn.o_proj.scales": "model.safetensors",
995
  "model.layers.30.self_attn.o_proj.zeros": "model.safetensors",
996
- "model.layers.30.self_attn.q_proj.bias": "model.safetensors",
997
  "model.layers.30.self_attn.q_proj.channel_scale": "model.safetensors",
998
  "model.layers.30.self_attn.q_proj.q_perm": "model.safetensors",
999
  "model.layers.30.self_attn.q_proj.qweight": "model.safetensors",
1000
  "model.layers.30.self_attn.q_proj.scales": "model.safetensors",
1001
  "model.layers.30.self_attn.q_proj.zeros": "model.safetensors",
1002
- "model.layers.30.self_attn.v_proj.bias": "model.safetensors",
1003
  "model.layers.30.self_attn.v_proj.channel_scale": "model.safetensors",
1004
  "model.layers.30.self_attn.v_proj.q_perm": "model.safetensors",
1005
  "model.layers.30.self_attn.v_proj.qweight": "model.safetensors",
@@ -1022,7 +947,6 @@
1022
  "model.layers.31.mlp.up_proj.scales": "model.safetensors",
1023
  "model.layers.31.mlp.up_proj.zeros": "model.safetensors",
1024
  "model.layers.31.post_attention_layernorm.weight": "model.safetensors",
1025
- "model.layers.31.self_attn.k_proj.bias": "model.safetensors",
1026
  "model.layers.31.self_attn.k_proj.channel_scale": "model.safetensors",
1027
  "model.layers.31.self_attn.k_proj.q_perm": "model.safetensors",
1028
  "model.layers.31.self_attn.k_proj.qweight": "model.safetensors",
@@ -1033,13 +957,11 @@
1033
  "model.layers.31.self_attn.o_proj.qweight": "model.safetensors",
1034
  "model.layers.31.self_attn.o_proj.scales": "model.safetensors",
1035
  "model.layers.31.self_attn.o_proj.zeros": "model.safetensors",
1036
- "model.layers.31.self_attn.q_proj.bias": "model.safetensors",
1037
  "model.layers.31.self_attn.q_proj.channel_scale": "model.safetensors",
1038
  "model.layers.31.self_attn.q_proj.q_perm": "model.safetensors",
1039
  "model.layers.31.self_attn.q_proj.qweight": "model.safetensors",
1040
  "model.layers.31.self_attn.q_proj.scales": "model.safetensors",
1041
  "model.layers.31.self_attn.q_proj.zeros": "model.safetensors",
1042
- "model.layers.31.self_attn.v_proj.bias": "model.safetensors",
1043
  "model.layers.31.self_attn.v_proj.channel_scale": "model.safetensors",
1044
  "model.layers.31.self_attn.v_proj.q_perm": "model.safetensors",
1045
  "model.layers.31.self_attn.v_proj.qweight": "model.safetensors",
@@ -1062,7 +984,6 @@
1062
  "model.layers.4.mlp.up_proj.scales": "model.safetensors",
1063
  "model.layers.4.mlp.up_proj.zeros": "model.safetensors",
1064
  "model.layers.4.post_attention_layernorm.weight": "model.safetensors",
1065
- "model.layers.4.self_attn.k_proj.bias": "model.safetensors",
1066
  "model.layers.4.self_attn.k_proj.channel_scale": "model.safetensors",
1067
  "model.layers.4.self_attn.k_proj.q_perm": "model.safetensors",
1068
  "model.layers.4.self_attn.k_proj.qweight": "model.safetensors",
@@ -1073,13 +994,11 @@
1073
  "model.layers.4.self_attn.o_proj.qweight": "model.safetensors",
1074
  "model.layers.4.self_attn.o_proj.scales": "model.safetensors",
1075
  "model.layers.4.self_attn.o_proj.zeros": "model.safetensors",
1076
- "model.layers.4.self_attn.q_proj.bias": "model.safetensors",
1077
  "model.layers.4.self_attn.q_proj.channel_scale": "model.safetensors",
1078
  "model.layers.4.self_attn.q_proj.q_perm": "model.safetensors",
1079
  "model.layers.4.self_attn.q_proj.qweight": "model.safetensors",
1080
  "model.layers.4.self_attn.q_proj.scales": "model.safetensors",
1081
  "model.layers.4.self_attn.q_proj.zeros": "model.safetensors",
1082
- "model.layers.4.self_attn.v_proj.bias": "model.safetensors",
1083
  "model.layers.4.self_attn.v_proj.channel_scale": "model.safetensors",
1084
  "model.layers.4.self_attn.v_proj.q_perm": "model.safetensors",
1085
  "model.layers.4.self_attn.v_proj.qweight": "model.safetensors",
@@ -1102,7 +1021,6 @@
1102
  "model.layers.5.mlp.up_proj.scales": "model.safetensors",
1103
  "model.layers.5.mlp.up_proj.zeros": "model.safetensors",
1104
  "model.layers.5.post_attention_layernorm.weight": "model.safetensors",
1105
- "model.layers.5.self_attn.k_proj.bias": "model.safetensors",
1106
  "model.layers.5.self_attn.k_proj.channel_scale": "model.safetensors",
1107
  "model.layers.5.self_attn.k_proj.q_perm": "model.safetensors",
1108
  "model.layers.5.self_attn.k_proj.qweight": "model.safetensors",
@@ -1113,13 +1031,11 @@
1113
  "model.layers.5.self_attn.o_proj.qweight": "model.safetensors",
1114
  "model.layers.5.self_attn.o_proj.scales": "model.safetensors",
1115
  "model.layers.5.self_attn.o_proj.zeros": "model.safetensors",
1116
- "model.layers.5.self_attn.q_proj.bias": "model.safetensors",
1117
  "model.layers.5.self_attn.q_proj.channel_scale": "model.safetensors",
1118
  "model.layers.5.self_attn.q_proj.q_perm": "model.safetensors",
1119
  "model.layers.5.self_attn.q_proj.qweight": "model.safetensors",
1120
  "model.layers.5.self_attn.q_proj.scales": "model.safetensors",
1121
  "model.layers.5.self_attn.q_proj.zeros": "model.safetensors",
1122
- "model.layers.5.self_attn.v_proj.bias": "model.safetensors",
1123
  "model.layers.5.self_attn.v_proj.channel_scale": "model.safetensors",
1124
  "model.layers.5.self_attn.v_proj.q_perm": "model.safetensors",
1125
  "model.layers.5.self_attn.v_proj.qweight": "model.safetensors",
@@ -1142,7 +1058,6 @@
1142
  "model.layers.6.mlp.up_proj.scales": "model.safetensors",
1143
  "model.layers.6.mlp.up_proj.zeros": "model.safetensors",
1144
  "model.layers.6.post_attention_layernorm.weight": "model.safetensors",
1145
- "model.layers.6.self_attn.k_proj.bias": "model.safetensors",
1146
  "model.layers.6.self_attn.k_proj.channel_scale": "model.safetensors",
1147
  "model.layers.6.self_attn.k_proj.q_perm": "model.safetensors",
1148
  "model.layers.6.self_attn.k_proj.qweight": "model.safetensors",
@@ -1153,13 +1068,11 @@
1153
  "model.layers.6.self_attn.o_proj.qweight": "model.safetensors",
1154
  "model.layers.6.self_attn.o_proj.scales": "model.safetensors",
1155
  "model.layers.6.self_attn.o_proj.zeros": "model.safetensors",
1156
- "model.layers.6.self_attn.q_proj.bias": "model.safetensors",
1157
  "model.layers.6.self_attn.q_proj.channel_scale": "model.safetensors",
1158
  "model.layers.6.self_attn.q_proj.q_perm": "model.safetensors",
1159
  "model.layers.6.self_attn.q_proj.qweight": "model.safetensors",
1160
  "model.layers.6.self_attn.q_proj.scales": "model.safetensors",
1161
  "model.layers.6.self_attn.q_proj.zeros": "model.safetensors",
1162
- "model.layers.6.self_attn.v_proj.bias": "model.safetensors",
1163
  "model.layers.6.self_attn.v_proj.channel_scale": "model.safetensors",
1164
  "model.layers.6.self_attn.v_proj.q_perm": "model.safetensors",
1165
  "model.layers.6.self_attn.v_proj.qweight": "model.safetensors",
@@ -1182,7 +1095,6 @@
1182
  "model.layers.7.mlp.up_proj.scales": "model.safetensors",
1183
  "model.layers.7.mlp.up_proj.zeros": "model.safetensors",
1184
  "model.layers.7.post_attention_layernorm.weight": "model.safetensors",
1185
- "model.layers.7.self_attn.k_proj.bias": "model.safetensors",
1186
  "model.layers.7.self_attn.k_proj.channel_scale": "model.safetensors",
1187
  "model.layers.7.self_attn.k_proj.q_perm": "model.safetensors",
1188
  "model.layers.7.self_attn.k_proj.qweight": "model.safetensors",
@@ -1193,13 +1105,11 @@
1193
  "model.layers.7.self_attn.o_proj.qweight": "model.safetensors",
1194
  "model.layers.7.self_attn.o_proj.scales": "model.safetensors",
1195
  "model.layers.7.self_attn.o_proj.zeros": "model.safetensors",
1196
- "model.layers.7.self_attn.q_proj.bias": "model.safetensors",
1197
  "model.layers.7.self_attn.q_proj.channel_scale": "model.safetensors",
1198
  "model.layers.7.self_attn.q_proj.q_perm": "model.safetensors",
1199
  "model.layers.7.self_attn.q_proj.qweight": "model.safetensors",
1200
  "model.layers.7.self_attn.q_proj.scales": "model.safetensors",
1201
  "model.layers.7.self_attn.q_proj.zeros": "model.safetensors",
1202
- "model.layers.7.self_attn.v_proj.bias": "model.safetensors",
1203
  "model.layers.7.self_attn.v_proj.channel_scale": "model.safetensors",
1204
  "model.layers.7.self_attn.v_proj.q_perm": "model.safetensors",
1205
  "model.layers.7.self_attn.v_proj.qweight": "model.safetensors",
@@ -1222,7 +1132,6 @@
1222
  "model.layers.8.mlp.up_proj.scales": "model.safetensors",
1223
  "model.layers.8.mlp.up_proj.zeros": "model.safetensors",
1224
  "model.layers.8.post_attention_layernorm.weight": "model.safetensors",
1225
- "model.layers.8.self_attn.k_proj.bias": "model.safetensors",
1226
  "model.layers.8.self_attn.k_proj.channel_scale": "model.safetensors",
1227
  "model.layers.8.self_attn.k_proj.q_perm": "model.safetensors",
1228
  "model.layers.8.self_attn.k_proj.qweight": "model.safetensors",
@@ -1233,13 +1142,11 @@
1233
  "model.layers.8.self_attn.o_proj.qweight": "model.safetensors",
1234
  "model.layers.8.self_attn.o_proj.scales": "model.safetensors",
1235
  "model.layers.8.self_attn.o_proj.zeros": "model.safetensors",
1236
- "model.layers.8.self_attn.q_proj.bias": "model.safetensors",
1237
  "model.layers.8.self_attn.q_proj.channel_scale": "model.safetensors",
1238
  "model.layers.8.self_attn.q_proj.q_perm": "model.safetensors",
1239
  "model.layers.8.self_attn.q_proj.qweight": "model.safetensors",
1240
  "model.layers.8.self_attn.q_proj.scales": "model.safetensors",
1241
  "model.layers.8.self_attn.q_proj.zeros": "model.safetensors",
1242
- "model.layers.8.self_attn.v_proj.bias": "model.safetensors",
1243
  "model.layers.8.self_attn.v_proj.channel_scale": "model.safetensors",
1244
  "model.layers.8.self_attn.v_proj.q_perm": "model.safetensors",
1245
  "model.layers.8.self_attn.v_proj.qweight": "model.safetensors",
@@ -1262,7 +1169,6 @@
1262
  "model.layers.9.mlp.up_proj.scales": "model.safetensors",
1263
  "model.layers.9.mlp.up_proj.zeros": "model.safetensors",
1264
  "model.layers.9.post_attention_layernorm.weight": "model.safetensors",
1265
- "model.layers.9.self_attn.k_proj.bias": "model.safetensors",
1266
  "model.layers.9.self_attn.k_proj.channel_scale": "model.safetensors",
1267
  "model.layers.9.self_attn.k_proj.q_perm": "model.safetensors",
1268
  "model.layers.9.self_attn.k_proj.qweight": "model.safetensors",
@@ -1273,13 +1179,11 @@
1273
  "model.layers.9.self_attn.o_proj.qweight": "model.safetensors",
1274
  "model.layers.9.self_attn.o_proj.scales": "model.safetensors",
1275
  "model.layers.9.self_attn.o_proj.zeros": "model.safetensors",
1276
- "model.layers.9.self_attn.q_proj.bias": "model.safetensors",
1277
  "model.layers.9.self_attn.q_proj.channel_scale": "model.safetensors",
1278
  "model.layers.9.self_attn.q_proj.q_perm": "model.safetensors",
1279
  "model.layers.9.self_attn.q_proj.qweight": "model.safetensors",
1280
  "model.layers.9.self_attn.q_proj.scales": "model.safetensors",
1281
  "model.layers.9.self_attn.q_proj.zeros": "model.safetensors",
1282
- "model.layers.9.self_attn.v_proj.bias": "model.safetensors",
1283
  "model.layers.9.self_attn.v_proj.channel_scale": "model.safetensors",
1284
  "model.layers.9.self_attn.v_proj.q_perm": "model.safetensors",
1285
  "model.layers.9.self_attn.v_proj.qweight": "model.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 3357941760
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model.safetensors",
 
22
  "model.layers.0.mlp.up_proj.scales": "model.safetensors",
23
  "model.layers.0.mlp.up_proj.zeros": "model.safetensors",
24
  "model.layers.0.post_attention_layernorm.weight": "model.safetensors",
 
25
  "model.layers.0.self_attn.k_proj.channel_scale": "model.safetensors",
26
  "model.layers.0.self_attn.k_proj.q_perm": "model.safetensors",
27
  "model.layers.0.self_attn.k_proj.qweight": "model.safetensors",
 
32
  "model.layers.0.self_attn.o_proj.qweight": "model.safetensors",
33
  "model.layers.0.self_attn.o_proj.scales": "model.safetensors",
34
  "model.layers.0.self_attn.o_proj.zeros": "model.safetensors",
 
35
  "model.layers.0.self_attn.q_proj.channel_scale": "model.safetensors",
36
  "model.layers.0.self_attn.q_proj.q_perm": "model.safetensors",
37
  "model.layers.0.self_attn.q_proj.qweight": "model.safetensors",
38
  "model.layers.0.self_attn.q_proj.scales": "model.safetensors",
39
  "model.layers.0.self_attn.q_proj.zeros": "model.safetensors",
 
40
  "model.layers.0.self_attn.v_proj.channel_scale": "model.safetensors",
41
  "model.layers.0.self_attn.v_proj.q_perm": "model.safetensors",
42
  "model.layers.0.self_attn.v_proj.qweight": "model.safetensors",
 
59
  "model.layers.1.mlp.up_proj.scales": "model.safetensors",
60
  "model.layers.1.mlp.up_proj.zeros": "model.safetensors",
61
  "model.layers.1.post_attention_layernorm.weight": "model.safetensors",
 
62
  "model.layers.1.self_attn.k_proj.channel_scale": "model.safetensors",
63
  "model.layers.1.self_attn.k_proj.q_perm": "model.safetensors",
64
  "model.layers.1.self_attn.k_proj.qweight": "model.safetensors",
 
69
  "model.layers.1.self_attn.o_proj.qweight": "model.safetensors",
70
  "model.layers.1.self_attn.o_proj.scales": "model.safetensors",
71
  "model.layers.1.self_attn.o_proj.zeros": "model.safetensors",
 
72
  "model.layers.1.self_attn.q_proj.channel_scale": "model.safetensors",
73
  "model.layers.1.self_attn.q_proj.q_perm": "model.safetensors",
74
  "model.layers.1.self_attn.q_proj.qweight": "model.safetensors",
75
  "model.layers.1.self_attn.q_proj.scales": "model.safetensors",
76
  "model.layers.1.self_attn.q_proj.zeros": "model.safetensors",
 
77
  "model.layers.1.self_attn.v_proj.channel_scale": "model.safetensors",
78
  "model.layers.1.self_attn.v_proj.q_perm": "model.safetensors",
79
  "model.layers.1.self_attn.v_proj.qweight": "model.safetensors",
 
96
  "model.layers.10.mlp.up_proj.scales": "model.safetensors",
97
  "model.layers.10.mlp.up_proj.zeros": "model.safetensors",
98
  "model.layers.10.post_attention_layernorm.weight": "model.safetensors",
 
99
  "model.layers.10.self_attn.k_proj.channel_scale": "model.safetensors",
100
  "model.layers.10.self_attn.k_proj.q_perm": "model.safetensors",
101
  "model.layers.10.self_attn.k_proj.qweight": "model.safetensors",
 
106
  "model.layers.10.self_attn.o_proj.qweight": "model.safetensors",
107
  "model.layers.10.self_attn.o_proj.scales": "model.safetensors",
108
  "model.layers.10.self_attn.o_proj.zeros": "model.safetensors",
 
109
  "model.layers.10.self_attn.q_proj.channel_scale": "model.safetensors",
110
  "model.layers.10.self_attn.q_proj.q_perm": "model.safetensors",
111
  "model.layers.10.self_attn.q_proj.qweight": "model.safetensors",
112
  "model.layers.10.self_attn.q_proj.scales": "model.safetensors",
113
  "model.layers.10.self_attn.q_proj.zeros": "model.safetensors",
 
114
  "model.layers.10.self_attn.v_proj.channel_scale": "model.safetensors",
115
  "model.layers.10.self_attn.v_proj.q_perm": "model.safetensors",
116
  "model.layers.10.self_attn.v_proj.qweight": "model.safetensors",
 
133
  "model.layers.11.mlp.up_proj.scales": "model.safetensors",
134
  "model.layers.11.mlp.up_proj.zeros": "model.safetensors",
135
  "model.layers.11.post_attention_layernorm.weight": "model.safetensors",
 
136
  "model.layers.11.self_attn.k_proj.channel_scale": "model.safetensors",
137
  "model.layers.11.self_attn.k_proj.q_perm": "model.safetensors",
138
  "model.layers.11.self_attn.k_proj.qweight": "model.safetensors",
 
143
  "model.layers.11.self_attn.o_proj.qweight": "model.safetensors",
144
  "model.layers.11.self_attn.o_proj.scales": "model.safetensors",
145
  "model.layers.11.self_attn.o_proj.zeros": "model.safetensors",
 
146
  "model.layers.11.self_attn.q_proj.channel_scale": "model.safetensors",
147
  "model.layers.11.self_attn.q_proj.q_perm": "model.safetensors",
148
  "model.layers.11.self_attn.q_proj.qweight": "model.safetensors",
149
  "model.layers.11.self_attn.q_proj.scales": "model.safetensors",
150
  "model.layers.11.self_attn.q_proj.zeros": "model.safetensors",
 
151
  "model.layers.11.self_attn.v_proj.channel_scale": "model.safetensors",
152
  "model.layers.11.self_attn.v_proj.q_perm": "model.safetensors",
153
  "model.layers.11.self_attn.v_proj.qweight": "model.safetensors",
 
170
  "model.layers.12.mlp.up_proj.scales": "model.safetensors",
171
  "model.layers.12.mlp.up_proj.zeros": "model.safetensors",
172
  "model.layers.12.post_attention_layernorm.weight": "model.safetensors",
 
173
  "model.layers.12.self_attn.k_proj.channel_scale": "model.safetensors",
174
  "model.layers.12.self_attn.k_proj.q_perm": "model.safetensors",
175
  "model.layers.12.self_attn.k_proj.qweight": "model.safetensors",
 
180
  "model.layers.12.self_attn.o_proj.qweight": "model.safetensors",
181
  "model.layers.12.self_attn.o_proj.scales": "model.safetensors",
182
  "model.layers.12.self_attn.o_proj.zeros": "model.safetensors",
 
183
  "model.layers.12.self_attn.q_proj.channel_scale": "model.safetensors",
184
  "model.layers.12.self_attn.q_proj.q_perm": "model.safetensors",
185
  "model.layers.12.self_attn.q_proj.qweight": "model.safetensors",
186
  "model.layers.12.self_attn.q_proj.scales": "model.safetensors",
187
  "model.layers.12.self_attn.q_proj.zeros": "model.safetensors",
 
188
  "model.layers.12.self_attn.v_proj.channel_scale": "model.safetensors",
189
  "model.layers.12.self_attn.v_proj.q_perm": "model.safetensors",
190
  "model.layers.12.self_attn.v_proj.qweight": "model.safetensors",
 
207
  "model.layers.13.mlp.up_proj.scales": "model.safetensors",
208
  "model.layers.13.mlp.up_proj.zeros": "model.safetensors",
209
  "model.layers.13.post_attention_layernorm.weight": "model.safetensors",
 
210
  "model.layers.13.self_attn.k_proj.channel_scale": "model.safetensors",
211
  "model.layers.13.self_attn.k_proj.q_perm": "model.safetensors",
212
  "model.layers.13.self_attn.k_proj.qweight": "model.safetensors",
 
217
  "model.layers.13.self_attn.o_proj.qweight": "model.safetensors",
218
  "model.layers.13.self_attn.o_proj.scales": "model.safetensors",
219
  "model.layers.13.self_attn.o_proj.zeros": "model.safetensors",
 
220
  "model.layers.13.self_attn.q_proj.channel_scale": "model.safetensors",
221
  "model.layers.13.self_attn.q_proj.q_perm": "model.safetensors",
222
  "model.layers.13.self_attn.q_proj.qweight": "model.safetensors",
223
  "model.layers.13.self_attn.q_proj.scales": "model.safetensors",
224
  "model.layers.13.self_attn.q_proj.zeros": "model.safetensors",
 
225
  "model.layers.13.self_attn.v_proj.channel_scale": "model.safetensors",
226
  "model.layers.13.self_attn.v_proj.q_perm": "model.safetensors",
227
  "model.layers.13.self_attn.v_proj.qweight": "model.safetensors",
 
244
  "model.layers.14.mlp.up_proj.scales": "model.safetensors",
245
  "model.layers.14.mlp.up_proj.zeros": "model.safetensors",
246
  "model.layers.14.post_attention_layernorm.weight": "model.safetensors",
 
247
  "model.layers.14.self_attn.k_proj.channel_scale": "model.safetensors",
248
  "model.layers.14.self_attn.k_proj.q_perm": "model.safetensors",
249
  "model.layers.14.self_attn.k_proj.qweight": "model.safetensors",
 
254
  "model.layers.14.self_attn.o_proj.qweight": "model.safetensors",
255
  "model.layers.14.self_attn.o_proj.scales": "model.safetensors",
256
  "model.layers.14.self_attn.o_proj.zeros": "model.safetensors",
 
257
  "model.layers.14.self_attn.q_proj.channel_scale": "model.safetensors",
258
  "model.layers.14.self_attn.q_proj.q_perm": "model.safetensors",
259
  "model.layers.14.self_attn.q_proj.qweight": "model.safetensors",
260
  "model.layers.14.self_attn.q_proj.scales": "model.safetensors",
261
  "model.layers.14.self_attn.q_proj.zeros": "model.safetensors",
 
262
  "model.layers.14.self_attn.v_proj.channel_scale": "model.safetensors",
263
  "model.layers.14.self_attn.v_proj.q_perm": "model.safetensors",
264
  "model.layers.14.self_attn.v_proj.qweight": "model.safetensors",
 
281
  "model.layers.15.mlp.up_proj.scales": "model.safetensors",
282
  "model.layers.15.mlp.up_proj.zeros": "model.safetensors",
283
  "model.layers.15.post_attention_layernorm.weight": "model.safetensors",
 
284
  "model.layers.15.self_attn.k_proj.channel_scale": "model.safetensors",
285
  "model.layers.15.self_attn.k_proj.q_perm": "model.safetensors",
286
  "model.layers.15.self_attn.k_proj.qweight": "model.safetensors",
 
291
  "model.layers.15.self_attn.o_proj.qweight": "model.safetensors",
292
  "model.layers.15.self_attn.o_proj.scales": "model.safetensors",
293
  "model.layers.15.self_attn.o_proj.zeros": "model.safetensors",
 
294
  "model.layers.15.self_attn.q_proj.channel_scale": "model.safetensors",
295
  "model.layers.15.self_attn.q_proj.q_perm": "model.safetensors",
296
  "model.layers.15.self_attn.q_proj.qweight": "model.safetensors",
297
  "model.layers.15.self_attn.q_proj.scales": "model.safetensors",
298
  "model.layers.15.self_attn.q_proj.zeros": "model.safetensors",
 
299
  "model.layers.15.self_attn.v_proj.channel_scale": "model.safetensors",
300
  "model.layers.15.self_attn.v_proj.q_perm": "model.safetensors",
301
  "model.layers.15.self_attn.v_proj.qweight": "model.safetensors",
 
318
  "model.layers.16.mlp.up_proj.scales": "model.safetensors",
319
  "model.layers.16.mlp.up_proj.zeros": "model.safetensors",
320
  "model.layers.16.post_attention_layernorm.weight": "model.safetensors",
 
321
  "model.layers.16.self_attn.k_proj.channel_scale": "model.safetensors",
322
  "model.layers.16.self_attn.k_proj.q_perm": "model.safetensors",
323
  "model.layers.16.self_attn.k_proj.qweight": "model.safetensors",
 
328
  "model.layers.16.self_attn.o_proj.qweight": "model.safetensors",
329
  "model.layers.16.self_attn.o_proj.scales": "model.safetensors",
330
  "model.layers.16.self_attn.o_proj.zeros": "model.safetensors",
 
331
  "model.layers.16.self_attn.q_proj.channel_scale": "model.safetensors",
332
  "model.layers.16.self_attn.q_proj.q_perm": "model.safetensors",
333
  "model.layers.16.self_attn.q_proj.qweight": "model.safetensors",
334
  "model.layers.16.self_attn.q_proj.scales": "model.safetensors",
335
  "model.layers.16.self_attn.q_proj.zeros": "model.safetensors",
 
336
  "model.layers.16.self_attn.v_proj.channel_scale": "model.safetensors",
337
  "model.layers.16.self_attn.v_proj.q_perm": "model.safetensors",
338
  "model.layers.16.self_attn.v_proj.qweight": "model.safetensors",
 
355
  "model.layers.17.mlp.up_proj.scales": "model.safetensors",
356
  "model.layers.17.mlp.up_proj.zeros": "model.safetensors",
357
  "model.layers.17.post_attention_layernorm.weight": "model.safetensors",
 
358
  "model.layers.17.self_attn.k_proj.channel_scale": "model.safetensors",
359
  "model.layers.17.self_attn.k_proj.q_perm": "model.safetensors",
360
  "model.layers.17.self_attn.k_proj.qweight": "model.safetensors",
 
365
  "model.layers.17.self_attn.o_proj.qweight": "model.safetensors",
366
  "model.layers.17.self_attn.o_proj.scales": "model.safetensors",
367
  "model.layers.17.self_attn.o_proj.zeros": "model.safetensors",
 
368
  "model.layers.17.self_attn.q_proj.channel_scale": "model.safetensors",
369
  "model.layers.17.self_attn.q_proj.q_perm": "model.safetensors",
370
  "model.layers.17.self_attn.q_proj.qweight": "model.safetensors",
371
  "model.layers.17.self_attn.q_proj.scales": "model.safetensors",
372
  "model.layers.17.self_attn.q_proj.zeros": "model.safetensors",
 
373
  "model.layers.17.self_attn.v_proj.channel_scale": "model.safetensors",
374
  "model.layers.17.self_attn.v_proj.q_perm": "model.safetensors",
375
  "model.layers.17.self_attn.v_proj.qweight": "model.safetensors",
 
392
  "model.layers.18.mlp.up_proj.scales": "model.safetensors",
393
  "model.layers.18.mlp.up_proj.zeros": "model.safetensors",
394
  "model.layers.18.post_attention_layernorm.weight": "model.safetensors",
 
395
  "model.layers.18.self_attn.k_proj.channel_scale": "model.safetensors",
396
  "model.layers.18.self_attn.k_proj.q_perm": "model.safetensors",
397
  "model.layers.18.self_attn.k_proj.qweight": "model.safetensors",
 
402
  "model.layers.18.self_attn.o_proj.qweight": "model.safetensors",
403
  "model.layers.18.self_attn.o_proj.scales": "model.safetensors",
404
  "model.layers.18.self_attn.o_proj.zeros": "model.safetensors",
 
405
  "model.layers.18.self_attn.q_proj.channel_scale": "model.safetensors",
406
  "model.layers.18.self_attn.q_proj.q_perm": "model.safetensors",
407
  "model.layers.18.self_attn.q_proj.qweight": "model.safetensors",
408
  "model.layers.18.self_attn.q_proj.scales": "model.safetensors",
409
  "model.layers.18.self_attn.q_proj.zeros": "model.safetensors",
 
410
  "model.layers.18.self_attn.v_proj.channel_scale": "model.safetensors",
411
  "model.layers.18.self_attn.v_proj.q_perm": "model.safetensors",
412
  "model.layers.18.self_attn.v_proj.qweight": "model.safetensors",
 
429
  "model.layers.19.mlp.up_proj.scales": "model.safetensors",
430
  "model.layers.19.mlp.up_proj.zeros": "model.safetensors",
431
  "model.layers.19.post_attention_layernorm.weight": "model.safetensors",
 
432
  "model.layers.19.self_attn.k_proj.channel_scale": "model.safetensors",
433
  "model.layers.19.self_attn.k_proj.q_perm": "model.safetensors",
434
  "model.layers.19.self_attn.k_proj.qweight": "model.safetensors",
 
439
  "model.layers.19.self_attn.o_proj.qweight": "model.safetensors",
440
  "model.layers.19.self_attn.o_proj.scales": "model.safetensors",
441
  "model.layers.19.self_attn.o_proj.zeros": "model.safetensors",
 
442
  "model.layers.19.self_attn.q_proj.channel_scale": "model.safetensors",
443
  "model.layers.19.self_attn.q_proj.q_perm": "model.safetensors",
444
  "model.layers.19.self_attn.q_proj.qweight": "model.safetensors",
445
  "model.layers.19.self_attn.q_proj.scales": "model.safetensors",
446
  "model.layers.19.self_attn.q_proj.zeros": "model.safetensors",
 
447
  "model.layers.19.self_attn.v_proj.channel_scale": "model.safetensors",
448
  "model.layers.19.self_attn.v_proj.q_perm": "model.safetensors",
449
  "model.layers.19.self_attn.v_proj.qweight": "model.safetensors",
 
466
  "model.layers.2.mlp.up_proj.scales": "model.safetensors",
467
  "model.layers.2.mlp.up_proj.zeros": "model.safetensors",
468
  "model.layers.2.post_attention_layernorm.weight": "model.safetensors",
 
469
  "model.layers.2.self_attn.k_proj.channel_scale": "model.safetensors",
470
  "model.layers.2.self_attn.k_proj.q_perm": "model.safetensors",
471
  "model.layers.2.self_attn.k_proj.qweight": "model.safetensors",
 
476
  "model.layers.2.self_attn.o_proj.qweight": "model.safetensors",
477
  "model.layers.2.self_attn.o_proj.scales": "model.safetensors",
478
  "model.layers.2.self_attn.o_proj.zeros": "model.safetensors",
 
479
  "model.layers.2.self_attn.q_proj.channel_scale": "model.safetensors",
480
  "model.layers.2.self_attn.q_proj.q_perm": "model.safetensors",
481
  "model.layers.2.self_attn.q_proj.qweight": "model.safetensors",
482
  "model.layers.2.self_attn.q_proj.scales": "model.safetensors",
483
  "model.layers.2.self_attn.q_proj.zeros": "model.safetensors",
 
484
  "model.layers.2.self_attn.v_proj.channel_scale": "model.safetensors",
485
  "model.layers.2.self_attn.v_proj.q_perm": "model.safetensors",
486
  "model.layers.2.self_attn.v_proj.qweight": "model.safetensors",
 
503
  "model.layers.20.mlp.up_proj.scales": "model.safetensors",
504
  "model.layers.20.mlp.up_proj.zeros": "model.safetensors",
505
  "model.layers.20.post_attention_layernorm.weight": "model.safetensors",
 
506
  "model.layers.20.self_attn.k_proj.channel_scale": "model.safetensors",
507
  "model.layers.20.self_attn.k_proj.q_perm": "model.safetensors",
508
  "model.layers.20.self_attn.k_proj.qweight": "model.safetensors",
 
513
  "model.layers.20.self_attn.o_proj.qweight": "model.safetensors",
514
  "model.layers.20.self_attn.o_proj.scales": "model.safetensors",
515
  "model.layers.20.self_attn.o_proj.zeros": "model.safetensors",
 
516
  "model.layers.20.self_attn.q_proj.channel_scale": "model.safetensors",
517
  "model.layers.20.self_attn.q_proj.q_perm": "model.safetensors",
518
  "model.layers.20.self_attn.q_proj.qweight": "model.safetensors",
519
  "model.layers.20.self_attn.q_proj.scales": "model.safetensors",
520
  "model.layers.20.self_attn.q_proj.zeros": "model.safetensors",
 
521
  "model.layers.20.self_attn.v_proj.channel_scale": "model.safetensors",
522
  "model.layers.20.self_attn.v_proj.q_perm": "model.safetensors",
523
  "model.layers.20.self_attn.v_proj.qweight": "model.safetensors",
 
540
  "model.layers.21.mlp.up_proj.scales": "model.safetensors",
541
  "model.layers.21.mlp.up_proj.zeros": "model.safetensors",
542
  "model.layers.21.post_attention_layernorm.weight": "model.safetensors",
 
543
  "model.layers.21.self_attn.k_proj.channel_scale": "model.safetensors",
544
  "model.layers.21.self_attn.k_proj.q_perm": "model.safetensors",
545
  "model.layers.21.self_attn.k_proj.qweight": "model.safetensors",
 
550
  "model.layers.21.self_attn.o_proj.qweight": "model.safetensors",
551
  "model.layers.21.self_attn.o_proj.scales": "model.safetensors",
552
  "model.layers.21.self_attn.o_proj.zeros": "model.safetensors",
 
553
  "model.layers.21.self_attn.q_proj.channel_scale": "model.safetensors",
554
  "model.layers.21.self_attn.q_proj.q_perm": "model.safetensors",
555
  "model.layers.21.self_attn.q_proj.qweight": "model.safetensors",
556
  "model.layers.21.self_attn.q_proj.scales": "model.safetensors",
557
  "model.layers.21.self_attn.q_proj.zeros": "model.safetensors",
 
558
  "model.layers.21.self_attn.v_proj.channel_scale": "model.safetensors",
559
  "model.layers.21.self_attn.v_proj.q_perm": "model.safetensors",
560
  "model.layers.21.self_attn.v_proj.qweight": "model.safetensors",
 
577
  "model.layers.22.mlp.up_proj.scales": "model.safetensors",
578
  "model.layers.22.mlp.up_proj.zeros": "model.safetensors",
579
  "model.layers.22.post_attention_layernorm.weight": "model.safetensors",
 
580
  "model.layers.22.self_attn.k_proj.channel_scale": "model.safetensors",
581
  "model.layers.22.self_attn.k_proj.q_perm": "model.safetensors",
582
  "model.layers.22.self_attn.k_proj.qweight": "model.safetensors",
 
587
  "model.layers.22.self_attn.o_proj.qweight": "model.safetensors",
588
  "model.layers.22.self_attn.o_proj.scales": "model.safetensors",
589
  "model.layers.22.self_attn.o_proj.zeros": "model.safetensors",
 
590
  "model.layers.22.self_attn.q_proj.channel_scale": "model.safetensors",
591
  "model.layers.22.self_attn.q_proj.q_perm": "model.safetensors",
592
  "model.layers.22.self_attn.q_proj.qweight": "model.safetensors",
593
  "model.layers.22.self_attn.q_proj.scales": "model.safetensors",
594
  "model.layers.22.self_attn.q_proj.zeros": "model.safetensors",
 
595
  "model.layers.22.self_attn.v_proj.channel_scale": "model.safetensors",
596
  "model.layers.22.self_attn.v_proj.q_perm": "model.safetensors",
597
  "model.layers.22.self_attn.v_proj.qweight": "model.safetensors",
 
614
  "model.layers.23.mlp.up_proj.scales": "model.safetensors",
615
  "model.layers.23.mlp.up_proj.zeros": "model.safetensors",
616
  "model.layers.23.post_attention_layernorm.weight": "model.safetensors",
 
617
  "model.layers.23.self_attn.k_proj.channel_scale": "model.safetensors",
618
  "model.layers.23.self_attn.k_proj.q_perm": "model.safetensors",
619
  "model.layers.23.self_attn.k_proj.qweight": "model.safetensors",
 
624
  "model.layers.23.self_attn.o_proj.qweight": "model.safetensors",
625
  "model.layers.23.self_attn.o_proj.scales": "model.safetensors",
626
  "model.layers.23.self_attn.o_proj.zeros": "model.safetensors",
 
627
  "model.layers.23.self_attn.q_proj.channel_scale": "model.safetensors",
628
  "model.layers.23.self_attn.q_proj.q_perm": "model.safetensors",
629
  "model.layers.23.self_attn.q_proj.qweight": "model.safetensors",
630
  "model.layers.23.self_attn.q_proj.scales": "model.safetensors",
631
  "model.layers.23.self_attn.q_proj.zeros": "model.safetensors",
 
632
  "model.layers.23.self_attn.v_proj.channel_scale": "model.safetensors",
633
  "model.layers.23.self_attn.v_proj.q_perm": "model.safetensors",
634
  "model.layers.23.self_attn.v_proj.qweight": "model.safetensors",
 
651
  "model.layers.24.mlp.up_proj.scales": "model.safetensors",
652
  "model.layers.24.mlp.up_proj.zeros": "model.safetensors",
653
  "model.layers.24.post_attention_layernorm.weight": "model.safetensors",
 
654
  "model.layers.24.self_attn.k_proj.channel_scale": "model.safetensors",
655
  "model.layers.24.self_attn.k_proj.q_perm": "model.safetensors",
656
  "model.layers.24.self_attn.k_proj.qweight": "model.safetensors",
 
661
  "model.layers.24.self_attn.o_proj.qweight": "model.safetensors",
662
  "model.layers.24.self_attn.o_proj.scales": "model.safetensors",
663
  "model.layers.24.self_attn.o_proj.zeros": "model.safetensors",
 
664
  "model.layers.24.self_attn.q_proj.channel_scale": "model.safetensors",
665
  "model.layers.24.self_attn.q_proj.q_perm": "model.safetensors",
666
  "model.layers.24.self_attn.q_proj.qweight": "model.safetensors",
667
  "model.layers.24.self_attn.q_proj.scales": "model.safetensors",
668
  "model.layers.24.self_attn.q_proj.zeros": "model.safetensors",
 
669
  "model.layers.24.self_attn.v_proj.channel_scale": "model.safetensors",
670
  "model.layers.24.self_attn.v_proj.q_perm": "model.safetensors",
671
  "model.layers.24.self_attn.v_proj.qweight": "model.safetensors",
 
688
  "model.layers.25.mlp.up_proj.scales": "model.safetensors",
689
  "model.layers.25.mlp.up_proj.zeros": "model.safetensors",
690
  "model.layers.25.post_attention_layernorm.weight": "model.safetensors",
 
691
  "model.layers.25.self_attn.k_proj.channel_scale": "model.safetensors",
692
  "model.layers.25.self_attn.k_proj.q_perm": "model.safetensors",
693
  "model.layers.25.self_attn.k_proj.qweight": "model.safetensors",
 
698
  "model.layers.25.self_attn.o_proj.qweight": "model.safetensors",
699
  "model.layers.25.self_attn.o_proj.scales": "model.safetensors",
700
  "model.layers.25.self_attn.o_proj.zeros": "model.safetensors",
 
701
  "model.layers.25.self_attn.q_proj.channel_scale": "model.safetensors",
702
  "model.layers.25.self_attn.q_proj.q_perm": "model.safetensors",
703
  "model.layers.25.self_attn.q_proj.qweight": "model.safetensors",
704
  "model.layers.25.self_attn.q_proj.scales": "model.safetensors",
705
  "model.layers.25.self_attn.q_proj.zeros": "model.safetensors",
 
706
  "model.layers.25.self_attn.v_proj.channel_scale": "model.safetensors",
707
  "model.layers.25.self_attn.v_proj.q_perm": "model.safetensors",
708
  "model.layers.25.self_attn.v_proj.qweight": "model.safetensors",
 
725
  "model.layers.26.mlp.up_proj.scales": "model.safetensors",
726
  "model.layers.26.mlp.up_proj.zeros": "model.safetensors",
727
  "model.layers.26.post_attention_layernorm.weight": "model.safetensors",
 
728
  "model.layers.26.self_attn.k_proj.channel_scale": "model.safetensors",
729
  "model.layers.26.self_attn.k_proj.q_perm": "model.safetensors",
730
  "model.layers.26.self_attn.k_proj.qweight": "model.safetensors",
 
735
  "model.layers.26.self_attn.o_proj.qweight": "model.safetensors",
736
  "model.layers.26.self_attn.o_proj.scales": "model.safetensors",
737
  "model.layers.26.self_attn.o_proj.zeros": "model.safetensors",
 
738
  "model.layers.26.self_attn.q_proj.channel_scale": "model.safetensors",
739
  "model.layers.26.self_attn.q_proj.q_perm": "model.safetensors",
740
  "model.layers.26.self_attn.q_proj.qweight": "model.safetensors",
741
  "model.layers.26.self_attn.q_proj.scales": "model.safetensors",
742
  "model.layers.26.self_attn.q_proj.zeros": "model.safetensors",
 
743
  "model.layers.26.self_attn.v_proj.channel_scale": "model.safetensors",
744
  "model.layers.26.self_attn.v_proj.q_perm": "model.safetensors",
745
  "model.layers.26.self_attn.v_proj.qweight": "model.safetensors",
 
762
  "model.layers.27.mlp.up_proj.scales": "model.safetensors",
763
  "model.layers.27.mlp.up_proj.zeros": "model.safetensors",
764
  "model.layers.27.post_attention_layernorm.weight": "model.safetensors",
 
765
  "model.layers.27.self_attn.k_proj.channel_scale": "model.safetensors",
766
  "model.layers.27.self_attn.k_proj.q_perm": "model.safetensors",
767
  "model.layers.27.self_attn.k_proj.qweight": "model.safetensors",
 
772
  "model.layers.27.self_attn.o_proj.qweight": "model.safetensors",
773
  "model.layers.27.self_attn.o_proj.scales": "model.safetensors",
774
  "model.layers.27.self_attn.o_proj.zeros": "model.safetensors",
 
775
  "model.layers.27.self_attn.q_proj.channel_scale": "model.safetensors",
776
  "model.layers.27.self_attn.q_proj.q_perm": "model.safetensors",
777
  "model.layers.27.self_attn.q_proj.qweight": "model.safetensors",
778
  "model.layers.27.self_attn.q_proj.scales": "model.safetensors",
779
  "model.layers.27.self_attn.q_proj.zeros": "model.safetensors",
 
780
  "model.layers.27.self_attn.v_proj.channel_scale": "model.safetensors",
781
  "model.layers.27.self_attn.v_proj.q_perm": "model.safetensors",
782
  "model.layers.27.self_attn.v_proj.qweight": "model.safetensors",
 
799
  "model.layers.28.mlp.up_proj.scales": "model.safetensors",
800
  "model.layers.28.mlp.up_proj.zeros": "model.safetensors",
801
  "model.layers.28.post_attention_layernorm.weight": "model.safetensors",
 
802
  "model.layers.28.self_attn.k_proj.channel_scale": "model.safetensors",
803
  "model.layers.28.self_attn.k_proj.q_perm": "model.safetensors",
804
  "model.layers.28.self_attn.k_proj.qweight": "model.safetensors",
 
809
  "model.layers.28.self_attn.o_proj.qweight": "model.safetensors",
810
  "model.layers.28.self_attn.o_proj.scales": "model.safetensors",
811
  "model.layers.28.self_attn.o_proj.zeros": "model.safetensors",
 
812
  "model.layers.28.self_attn.q_proj.channel_scale": "model.safetensors",
813
  "model.layers.28.self_attn.q_proj.q_perm": "model.safetensors",
814
  "model.layers.28.self_attn.q_proj.qweight": "model.safetensors",
815
  "model.layers.28.self_attn.q_proj.scales": "model.safetensors",
816
  "model.layers.28.self_attn.q_proj.zeros": "model.safetensors",
 
817
  "model.layers.28.self_attn.v_proj.channel_scale": "model.safetensors",
818
  "model.layers.28.self_attn.v_proj.q_perm": "model.safetensors",
819
  "model.layers.28.self_attn.v_proj.qweight": "model.safetensors",
 
836
  "model.layers.29.mlp.up_proj.scales": "model.safetensors",
837
  "model.layers.29.mlp.up_proj.zeros": "model.safetensors",
838
  "model.layers.29.post_attention_layernorm.weight": "model.safetensors",
 
839
  "model.layers.29.self_attn.k_proj.channel_scale": "model.safetensors",
840
  "model.layers.29.self_attn.k_proj.q_perm": "model.safetensors",
841
  "model.layers.29.self_attn.k_proj.qweight": "model.safetensors",
 
846
  "model.layers.29.self_attn.o_proj.qweight": "model.safetensors",
847
  "model.layers.29.self_attn.o_proj.scales": "model.safetensors",
848
  "model.layers.29.self_attn.o_proj.zeros": "model.safetensors",
 
849
  "model.layers.29.self_attn.q_proj.channel_scale": "model.safetensors",
850
  "model.layers.29.self_attn.q_proj.q_perm": "model.safetensors",
851
  "model.layers.29.self_attn.q_proj.qweight": "model.safetensors",
852
  "model.layers.29.self_attn.q_proj.scales": "model.safetensors",
853
  "model.layers.29.self_attn.q_proj.zeros": "model.safetensors",
 
854
  "model.layers.29.self_attn.v_proj.channel_scale": "model.safetensors",
855
  "model.layers.29.self_attn.v_proj.q_perm": "model.safetensors",
856
  "model.layers.29.self_attn.v_proj.qweight": "model.safetensors",
 
873
  "model.layers.3.mlp.up_proj.scales": "model.safetensors",
874
  "model.layers.3.mlp.up_proj.zeros": "model.safetensors",
875
  "model.layers.3.post_attention_layernorm.weight": "model.safetensors",
 
876
  "model.layers.3.self_attn.k_proj.channel_scale": "model.safetensors",
877
  "model.layers.3.self_attn.k_proj.q_perm": "model.safetensors",
878
  "model.layers.3.self_attn.k_proj.qweight": "model.safetensors",
 
883
  "model.layers.3.self_attn.o_proj.qweight": "model.safetensors",
884
  "model.layers.3.self_attn.o_proj.scales": "model.safetensors",
885
  "model.layers.3.self_attn.o_proj.zeros": "model.safetensors",
 
886
  "model.layers.3.self_attn.q_proj.channel_scale": "model.safetensors",
887
  "model.layers.3.self_attn.q_proj.q_perm": "model.safetensors",
888
  "model.layers.3.self_attn.q_proj.qweight": "model.safetensors",
889
  "model.layers.3.self_attn.q_proj.scales": "model.safetensors",
890
  "model.layers.3.self_attn.q_proj.zeros": "model.safetensors",
 
891
  "model.layers.3.self_attn.v_proj.channel_scale": "model.safetensors",
892
  "model.layers.3.self_attn.v_proj.q_perm": "model.safetensors",
893
  "model.layers.3.self_attn.v_proj.qweight": "model.safetensors",
 
910
  "model.layers.30.mlp.up_proj.scales": "model.safetensors",
911
  "model.layers.30.mlp.up_proj.zeros": "model.safetensors",
912
  "model.layers.30.post_attention_layernorm.weight": "model.safetensors",
 
913
  "model.layers.30.self_attn.k_proj.channel_scale": "model.safetensors",
914
  "model.layers.30.self_attn.k_proj.q_perm": "model.safetensors",
915
  "model.layers.30.self_attn.k_proj.qweight": "model.safetensors",
 
920
  "model.layers.30.self_attn.o_proj.qweight": "model.safetensors",
921
  "model.layers.30.self_attn.o_proj.scales": "model.safetensors",
922
  "model.layers.30.self_attn.o_proj.zeros": "model.safetensors",
 
923
  "model.layers.30.self_attn.q_proj.channel_scale": "model.safetensors",
924
  "model.layers.30.self_attn.q_proj.q_perm": "model.safetensors",
925
  "model.layers.30.self_attn.q_proj.qweight": "model.safetensors",
926
  "model.layers.30.self_attn.q_proj.scales": "model.safetensors",
927
  "model.layers.30.self_attn.q_proj.zeros": "model.safetensors",
 
928
  "model.layers.30.self_attn.v_proj.channel_scale": "model.safetensors",
929
  "model.layers.30.self_attn.v_proj.q_perm": "model.safetensors",
930
  "model.layers.30.self_attn.v_proj.qweight": "model.safetensors",
 
947
  "model.layers.31.mlp.up_proj.scales": "model.safetensors",
948
  "model.layers.31.mlp.up_proj.zeros": "model.safetensors",
949
  "model.layers.31.post_attention_layernorm.weight": "model.safetensors",
 
950
  "model.layers.31.self_attn.k_proj.channel_scale": "model.safetensors",
951
  "model.layers.31.self_attn.k_proj.q_perm": "model.safetensors",
952
  "model.layers.31.self_attn.k_proj.qweight": "model.safetensors",
 
957
  "model.layers.31.self_attn.o_proj.qweight": "model.safetensors",
958
  "model.layers.31.self_attn.o_proj.scales": "model.safetensors",
959
  "model.layers.31.self_attn.o_proj.zeros": "model.safetensors",
 
960
  "model.layers.31.self_attn.q_proj.channel_scale": "model.safetensors",
961
  "model.layers.31.self_attn.q_proj.q_perm": "model.safetensors",
962
  "model.layers.31.self_attn.q_proj.qweight": "model.safetensors",
963
  "model.layers.31.self_attn.q_proj.scales": "model.safetensors",
964
  "model.layers.31.self_attn.q_proj.zeros": "model.safetensors",
 
965
  "model.layers.31.self_attn.v_proj.channel_scale": "model.safetensors",
966
  "model.layers.31.self_attn.v_proj.q_perm": "model.safetensors",
967
  "model.layers.31.self_attn.v_proj.qweight": "model.safetensors",
 
984
  "model.layers.4.mlp.up_proj.scales": "model.safetensors",
985
  "model.layers.4.mlp.up_proj.zeros": "model.safetensors",
986
  "model.layers.4.post_attention_layernorm.weight": "model.safetensors",
 
987
  "model.layers.4.self_attn.k_proj.channel_scale": "model.safetensors",
988
  "model.layers.4.self_attn.k_proj.q_perm": "model.safetensors",
989
  "model.layers.4.self_attn.k_proj.qweight": "model.safetensors",
 
994
  "model.layers.4.self_attn.o_proj.qweight": "model.safetensors",
995
  "model.layers.4.self_attn.o_proj.scales": "model.safetensors",
996
  "model.layers.4.self_attn.o_proj.zeros": "model.safetensors",
 
997
  "model.layers.4.self_attn.q_proj.channel_scale": "model.safetensors",
998
  "model.layers.4.self_attn.q_proj.q_perm": "model.safetensors",
999
  "model.layers.4.self_attn.q_proj.qweight": "model.safetensors",
1000
  "model.layers.4.self_attn.q_proj.scales": "model.safetensors",
1001
  "model.layers.4.self_attn.q_proj.zeros": "model.safetensors",
 
1002
  "model.layers.4.self_attn.v_proj.channel_scale": "model.safetensors",
1003
  "model.layers.4.self_attn.v_proj.q_perm": "model.safetensors",
1004
  "model.layers.4.self_attn.v_proj.qweight": "model.safetensors",
 
1021
  "model.layers.5.mlp.up_proj.scales": "model.safetensors",
1022
  "model.layers.5.mlp.up_proj.zeros": "model.safetensors",
1023
  "model.layers.5.post_attention_layernorm.weight": "model.safetensors",
 
1024
  "model.layers.5.self_attn.k_proj.channel_scale": "model.safetensors",
1025
  "model.layers.5.self_attn.k_proj.q_perm": "model.safetensors",
1026
  "model.layers.5.self_attn.k_proj.qweight": "model.safetensors",
 
1031
  "model.layers.5.self_attn.o_proj.qweight": "model.safetensors",
1032
  "model.layers.5.self_attn.o_proj.scales": "model.safetensors",
1033
  "model.layers.5.self_attn.o_proj.zeros": "model.safetensors",
 
1034
  "model.layers.5.self_attn.q_proj.channel_scale": "model.safetensors",
1035
  "model.layers.5.self_attn.q_proj.q_perm": "model.safetensors",
1036
  "model.layers.5.self_attn.q_proj.qweight": "model.safetensors",
1037
  "model.layers.5.self_attn.q_proj.scales": "model.safetensors",
1038
  "model.layers.5.self_attn.q_proj.zeros": "model.safetensors",
 
1039
  "model.layers.5.self_attn.v_proj.channel_scale": "model.safetensors",
1040
  "model.layers.5.self_attn.v_proj.q_perm": "model.safetensors",
1041
  "model.layers.5.self_attn.v_proj.qweight": "model.safetensors",
 
1058
  "model.layers.6.mlp.up_proj.scales": "model.safetensors",
1059
  "model.layers.6.mlp.up_proj.zeros": "model.safetensors",
1060
  "model.layers.6.post_attention_layernorm.weight": "model.safetensors",
 
1061
  "model.layers.6.self_attn.k_proj.channel_scale": "model.safetensors",
1062
  "model.layers.6.self_attn.k_proj.q_perm": "model.safetensors",
1063
  "model.layers.6.self_attn.k_proj.qweight": "model.safetensors",
 
1068
  "model.layers.6.self_attn.o_proj.qweight": "model.safetensors",
1069
  "model.layers.6.self_attn.o_proj.scales": "model.safetensors",
1070
  "model.layers.6.self_attn.o_proj.zeros": "model.safetensors",
 
1071
  "model.layers.6.self_attn.q_proj.channel_scale": "model.safetensors",
1072
  "model.layers.6.self_attn.q_proj.q_perm": "model.safetensors",
1073
  "model.layers.6.self_attn.q_proj.qweight": "model.safetensors",
1074
  "model.layers.6.self_attn.q_proj.scales": "model.safetensors",
1075
  "model.layers.6.self_attn.q_proj.zeros": "model.safetensors",
 
1076
  "model.layers.6.self_attn.v_proj.channel_scale": "model.safetensors",
1077
  "model.layers.6.self_attn.v_proj.q_perm": "model.safetensors",
1078
  "model.layers.6.self_attn.v_proj.qweight": "model.safetensors",
 
1095
  "model.layers.7.mlp.up_proj.scales": "model.safetensors",
1096
  "model.layers.7.mlp.up_proj.zeros": "model.safetensors",
1097
  "model.layers.7.post_attention_layernorm.weight": "model.safetensors",
 
1098
  "model.layers.7.self_attn.k_proj.channel_scale": "model.safetensors",
1099
  "model.layers.7.self_attn.k_proj.q_perm": "model.safetensors",
1100
  "model.layers.7.self_attn.k_proj.qweight": "model.safetensors",
 
1105
  "model.layers.7.self_attn.o_proj.qweight": "model.safetensors",
1106
  "model.layers.7.self_attn.o_proj.scales": "model.safetensors",
1107
  "model.layers.7.self_attn.o_proj.zeros": "model.safetensors",
 
1108
  "model.layers.7.self_attn.q_proj.channel_scale": "model.safetensors",
1109
  "model.layers.7.self_attn.q_proj.q_perm": "model.safetensors",
1110
  "model.layers.7.self_attn.q_proj.qweight": "model.safetensors",
1111
  "model.layers.7.self_attn.q_proj.scales": "model.safetensors",
1112
  "model.layers.7.self_attn.q_proj.zeros": "model.safetensors",
 
1113
  "model.layers.7.self_attn.v_proj.channel_scale": "model.safetensors",
1114
  "model.layers.7.self_attn.v_proj.q_perm": "model.safetensors",
1115
  "model.layers.7.self_attn.v_proj.qweight": "model.safetensors",
 
1132
  "model.layers.8.mlp.up_proj.scales": "model.safetensors",
1133
  "model.layers.8.mlp.up_proj.zeros": "model.safetensors",
1134
  "model.layers.8.post_attention_layernorm.weight": "model.safetensors",
 
1135
  "model.layers.8.self_attn.k_proj.channel_scale": "model.safetensors",
1136
  "model.layers.8.self_attn.k_proj.q_perm": "model.safetensors",
1137
  "model.layers.8.self_attn.k_proj.qweight": "model.safetensors",
 
1142
  "model.layers.8.self_attn.o_proj.qweight": "model.safetensors",
1143
  "model.layers.8.self_attn.o_proj.scales": "model.safetensors",
1144
  "model.layers.8.self_attn.o_proj.zeros": "model.safetensors",
 
1145
  "model.layers.8.self_attn.q_proj.channel_scale": "model.safetensors",
1146
  "model.layers.8.self_attn.q_proj.q_perm": "model.safetensors",
1147
  "model.layers.8.self_attn.q_proj.qweight": "model.safetensors",
1148
  "model.layers.8.self_attn.q_proj.scales": "model.safetensors",
1149
  "model.layers.8.self_attn.q_proj.zeros": "model.safetensors",
 
1150
  "model.layers.8.self_attn.v_proj.channel_scale": "model.safetensors",
1151
  "model.layers.8.self_attn.v_proj.q_perm": "model.safetensors",
1152
  "model.layers.8.self_attn.v_proj.qweight": "model.safetensors",
 
1169
  "model.layers.9.mlp.up_proj.scales": "model.safetensors",
1170
  "model.layers.9.mlp.up_proj.zeros": "model.safetensors",
1171
  "model.layers.9.post_attention_layernorm.weight": "model.safetensors",
 
1172
  "model.layers.9.self_attn.k_proj.channel_scale": "model.safetensors",
1173
  "model.layers.9.self_attn.k_proj.q_perm": "model.safetensors",
1174
  "model.layers.9.self_attn.k_proj.qweight": "model.safetensors",
 
1179
  "model.layers.9.self_attn.o_proj.qweight": "model.safetensors",
1180
  "model.layers.9.self_attn.o_proj.scales": "model.safetensors",
1181
  "model.layers.9.self_attn.o_proj.zeros": "model.safetensors",
 
1182
  "model.layers.9.self_attn.q_proj.channel_scale": "model.safetensors",
1183
  "model.layers.9.self_attn.q_proj.q_perm": "model.safetensors",
1184
  "model.layers.9.self_attn.q_proj.qweight": "model.safetensors",
1185
  "model.layers.9.self_attn.q_proj.scales": "model.safetensors",
1186
  "model.layers.9.self_attn.q_proj.zeros": "model.safetensors",
 
1187
  "model.layers.9.self_attn.v_proj.channel_scale": "model.safetensors",
1188
  "model.layers.9.self_attn.v_proj.q_perm": "model.safetensors",
1189
  "model.layers.9.self_attn.v_proj.qweight": "model.safetensors",
quant_strategy.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "measurement": {
3
  "model.layers.0": {
4
- "accuracy": 0.8198099136352539,
5
- "total_bits": 450454080,
6
  "q_proj": {
7
  "group_size": {
8
  "2": 64
@@ -89,8 +89,8 @@
89
  }
90
  },
91
  "model.layers.1": {
92
- "accuracy": 0.8719034194946289,
93
- "total_bits": 450454080,
94
  "q_proj": {
95
  "group_size": {
96
  "2": 64
@@ -105,10 +105,10 @@
105
  },
106
  "k_proj": {
107
  "group_size": {
108
- "2": 64
109
  },
110
  "bits": [
111
- 2
112
  ],
113
  "bits_prop": [
114
  1
@@ -129,10 +129,10 @@
129
  },
130
  "o_proj": {
131
  "group_size": {
132
- "2": 64
133
  },
134
  "bits": [
135
- 2
136
  ],
137
  "bits_prop": [
138
  1
@@ -165,10 +165,10 @@
165
  },
166
  "down_proj": {
167
  "group_size": {
168
- "2": 64
169
  },
170
  "bits": [
171
- 2
172
  ],
173
  "bits_prop": [
174
  1
@@ -177,8 +177,8 @@
177
  }
178
  },
179
  "model.layers.2": {
180
- "accuracy": 0.8325738906860352,
181
- "total_bits": 450454080,
182
  "q_proj": {
183
  "group_size": {
184
  "2": 64
@@ -265,8 +265,8 @@
265
  }
266
  },
267
  "model.layers.3": {
268
- "accuracy": 0.8738632202148438,
269
- "total_bits": 450454080,
270
  "q_proj": {
271
  "group_size": {
272
  "2": 64
@@ -341,10 +341,10 @@
341
  },
342
  "down_proj": {
343
  "group_size": {
344
- "2": 64
345
  },
346
  "bits": [
347
- 2
348
  ],
349
  "bits_prop": [
350
  1
@@ -353,8 +353,8 @@
353
  }
354
  },
355
  "model.layers.4": {
356
- "accuracy": 0.8625121116638184,
357
- "total_bits": 450454080,
358
  "q_proj": {
359
  "group_size": {
360
  "2": 64
@@ -393,10 +393,10 @@
393
  },
394
  "o_proj": {
395
  "group_size": {
396
- "2": 64
397
  },
398
  "bits": [
399
- 2
400
  ],
401
  "bits_prop": [
402
  1
@@ -441,8 +441,8 @@
441
  }
442
  },
443
  "model.layers.5": {
444
- "accuracy": 0.8608803749084473,
445
- "total_bits": 450454080,
446
  "q_proj": {
447
  "group_size": {
448
  "2": 64
@@ -457,10 +457,10 @@
457
  },
458
  "k_proj": {
459
  "group_size": {
460
- "2": 64
461
  },
462
  "bits": [
463
- 2
464
  ],
465
  "bits_prop": [
466
  1
@@ -529,8 +529,8 @@
529
  }
530
  },
531
  "model.layers.6": {
532
- "accuracy": 0.8563823699951172,
533
- "total_bits": 450454080,
534
  "q_proj": {
535
  "group_size": {
536
  "2": 64
@@ -605,10 +605,10 @@
605
  },
606
  "down_proj": {
607
  "group_size": {
608
- "2": 64
609
  },
610
  "bits": [
611
- 2
612
  ],
613
  "bits_prop": [
614
  1
@@ -617,8 +617,8 @@
617
  }
618
  },
619
  "model.layers.7": {
620
- "accuracy": 0.8504223823547363,
621
- "total_bits": 483482688,
622
  "q_proj": {
623
  "group_size": {
624
  "2": 64
@@ -693,10 +693,10 @@
693
  },
694
  "down_proj": {
695
  "group_size": {
696
- "2": 64
697
  },
698
  "bits": [
699
- 2
700
  ],
701
  "bits_prop": [
702
  1
@@ -705,8 +705,8 @@
705
  }
706
  },
707
  "model.layers.8": {
708
- "accuracy": 0.9129691123962402,
709
- "total_bits": 539218464,
710
  "q_proj": {
711
  "group_size": {
712
  "2": 64
@@ -793,8 +793,8 @@
793
  }
794
  },
795
  "model.layers.9": {
796
- "accuracy": 0.8902812004089355,
797
- "total_bits": 450454080,
798
  "q_proj": {
799
  "group_size": {
800
  "2": 64
@@ -809,10 +809,10 @@
809
  },
810
  "k_proj": {
811
  "group_size": {
812
- "2": 64
813
  },
814
  "bits": [
815
- 2
816
  ],
817
  "bits_prop": [
818
  1
@@ -881,8 +881,8 @@
881
  }
882
  },
883
  "model.layers.10": {
884
- "accuracy": 0.8880372047424316,
885
- "total_bits": 450454080,
886
  "q_proj": {
887
  "group_size": {
888
  "2": 64
@@ -969,8 +969,8 @@
969
  }
970
  },
971
  "model.layers.11": {
972
- "accuracy": 0.8887085914611816,
973
- "total_bits": 450454080,
974
  "q_proj": {
975
  "group_size": {
976
  "2": 64
@@ -1057,8 +1057,8 @@
1057
  }
1058
  },
1059
  "model.layers.12": {
1060
- "accuracy": 0.8856921195983887,
1061
- "total_bits": 450454080,
1062
  "q_proj": {
1063
  "group_size": {
1064
  "2": 64
@@ -1073,10 +1073,10 @@
1073
  },
1074
  "k_proj": {
1075
  "group_size": {
1076
- "2": 64
1077
  },
1078
  "bits": [
1079
- 2
1080
  ],
1081
  "bits_prop": [
1082
  1
@@ -1145,8 +1145,8 @@
1145
  }
1146
  },
1147
  "model.layers.13": {
1148
- "accuracy": 0.8820700645446777,
1149
- "total_bits": 450454080,
1150
  "q_proj": {
1151
  "group_size": {
1152
  "2": 64
@@ -1161,10 +1161,10 @@
1161
  },
1162
  "k_proj": {
1163
  "group_size": {
1164
- "2": 64
1165
  },
1166
  "bits": [
1167
- 2
1168
  ],
1169
  "bits_prop": [
1170
  1
@@ -1233,8 +1233,8 @@
1233
  }
1234
  },
1235
  "model.layers.14": {
1236
- "accuracy": 0.9193291664123535,
1237
- "total_bits": 572247072,
1238
  "q_proj": {
1239
  "group_size": {
1240
  "2": 64
@@ -1249,10 +1249,10 @@
1249
  },
1250
  "k_proj": {
1251
  "group_size": {
1252
- "2": 64
1253
  },
1254
  "bits": [
1255
- 2
1256
  ],
1257
  "bits_prop": [
1258
  1
@@ -1273,10 +1273,10 @@
1273
  },
1274
  "o_proj": {
1275
  "group_size": {
1276
- "4": 128
1277
  },
1278
  "bits": [
1279
- 4
1280
  ],
1281
  "bits_prop": [
1282
  1
@@ -1309,10 +1309,10 @@
1309
  },
1310
  "down_proj": {
1311
  "group_size": {
1312
- "4": 128
1313
  },
1314
  "bits": [
1315
- 4
1316
  ],
1317
  "bits_prop": [
1318
  1
@@ -1321,8 +1321,8 @@
1321
  }
1322
  },
1323
  "model.layers.15": {
1324
- "accuracy": 0.9107174873352051,
1325
- "total_bits": 572247072,
1326
  "q_proj": {
1327
  "group_size": {
1328
  "2": 64
@@ -1337,10 +1337,10 @@
1337
  },
1338
  "k_proj": {
1339
  "group_size": {
1340
- "2": 64
1341
  },
1342
  "bits": [
1343
- 2
1344
  ],
1345
  "bits_prop": [
1346
  1
@@ -1361,10 +1361,10 @@
1361
  },
1362
  "o_proj": {
1363
  "group_size": {
1364
- "4": 128
1365
  },
1366
  "bits": [
1367
- 4
1368
  ],
1369
  "bits_prop": [
1370
  1
@@ -1397,10 +1397,10 @@
1397
  },
1398
  "down_proj": {
1399
  "group_size": {
1400
- "4": 128
1401
  },
1402
  "bits": [
1403
- 4
1404
  ],
1405
  "bits_prop": [
1406
  1
@@ -1409,8 +1409,8 @@
1409
  }
1410
  },
1411
  "model.layers.16": {
1412
- "accuracy": 0.9100451469421387,
1413
- "total_bits": 572247072,
1414
  "q_proj": {
1415
  "group_size": {
1416
  "2": 64
@@ -1449,10 +1449,10 @@
1449
  },
1450
  "o_proj": {
1451
  "group_size": {
1452
- "4": 128
1453
  },
1454
  "bits": [
1455
- 4
1456
  ],
1457
  "bits_prop": [
1458
  1
@@ -1497,8 +1497,8 @@
1497
  }
1498
  },
1499
  "model.layers.17": {
1500
- "accuracy": 0.908327579498291,
1501
- "total_bits": 572247072,
1502
  "q_proj": {
1503
  "group_size": {
1504
  "2": 64
@@ -1513,10 +1513,10 @@
1513
  },
1514
  "k_proj": {
1515
  "group_size": {
1516
- "2": 64
1517
  },
1518
  "bits": [
1519
- 2
1520
  ],
1521
  "bits_prop": [
1522
  1
@@ -1585,14 +1585,14 @@
1585
  }
1586
  },
1587
  "model.layers.18": {
1588
- "accuracy": 0.9288191795349121,
1589
- "total_bits": 661014048,
1590
  "q_proj": {
1591
  "group_size": {
1592
- "2": 64
1593
  },
1594
  "bits": [
1595
- 2
1596
  ],
1597
  "bits_prop": [
1598
  1
@@ -1601,10 +1601,10 @@
1601
  },
1602
  "k_proj": {
1603
  "group_size": {
1604
- "2": 64
1605
  },
1606
  "bits": [
1607
- 2
1608
  ],
1609
  "bits_prop": [
1610
  1
@@ -1637,10 +1637,10 @@
1637
  },
1638
  "up_proj": {
1639
  "group_size": {
1640
- "4": 128
1641
  },
1642
  "bits": [
1643
- 4
1644
  ],
1645
  "bits_prop": [
1646
  1
@@ -1673,8 +1673,8 @@
1673
  }
1674
  },
1675
  "model.layers.19": {
1676
- "accuracy": 0.9297795295715332,
1677
- "total_bits": 661014048,
1678
  "q_proj": {
1679
  "group_size": {
1680
  "2": 64
@@ -1761,14 +1761,14 @@
1761
  }
1762
  },
1763
  "model.layers.20": {
1764
- "accuracy": 0.9339859485626221,
1765
- "total_bits": 661014048,
1766
  "q_proj": {
1767
  "group_size": {
1768
- "2": 64
1769
  },
1770
  "bits": [
1771
- 2
1772
  ],
1773
  "bits_prop": [
1774
  1
@@ -1849,14 +1849,14 @@
1849
  }
1850
  },
1851
  "model.layers.21": {
1852
- "accuracy": 0.9743473529815674,
1853
- "total_bits": 815838240,
1854
  "q_proj": {
1855
  "group_size": {
1856
- "4": 128
1857
  },
1858
  "bits": [
1859
- 4
1860
  ],
1861
  "bits_prop": [
1862
  1
@@ -1937,14 +1937,14 @@
1937
  }
1938
  },
1939
  "model.layers.22": {
1940
- "accuracy": 0.9624457359313965,
1941
- "total_bits": 749781024,
1942
  "q_proj": {
1943
  "group_size": {
1944
- "2": 64
1945
  },
1946
  "bits": [
1947
- 2
1948
  ],
1949
  "bits_prop": [
1950
  1
@@ -1953,10 +1953,10 @@
1953
  },
1954
  "k_proj": {
1955
  "group_size": {
1956
- "2": 64
1957
  },
1958
  "bits": [
1959
- 2
1960
  ],
1961
  "bits_prop": [
1962
  1
@@ -2025,8 +2025,8 @@
2025
  }
2026
  },
2027
  "model.layers.23": {
2028
- "accuracy": 0.9775146245956421,
2029
- "total_bits": 815838240,
2030
  "q_proj": {
2031
  "group_size": {
2032
  "4": 128
@@ -2113,14 +2113,14 @@
2113
  }
2114
  },
2115
  "model.layers.24": {
2116
- "accuracy": 0.9725011587142944,
2117
- "total_bits": 782809632,
2118
  "q_proj": {
2119
  "group_size": {
2120
- "2": 64
2121
  },
2122
  "bits": [
2123
- 2
2124
  ],
2125
  "bits_prop": [
2126
  1
@@ -2201,14 +2201,14 @@
2201
  }
2202
  },
2203
  "model.layers.25": {
2204
- "accuracy": 0.9676313400268555,
2205
- "total_bits": 749781024,
2206
  "q_proj": {
2207
  "group_size": {
2208
- "2": 64
2209
  },
2210
  "bits": [
2211
- 2
2212
  ],
2213
  "bits_prop": [
2214
  1
@@ -2217,10 +2217,10 @@
2217
  },
2218
  "k_proj": {
2219
  "group_size": {
2220
- "2": 64
2221
  },
2222
  "bits": [
2223
- 2
2224
  ],
2225
  "bits_prop": [
2226
  1
@@ -2289,14 +2289,14 @@
2289
  }
2290
  },
2291
  "model.layers.26": {
2292
- "accuracy": 0.9747145175933838,
2293
- "total_bits": 782809632,
2294
  "q_proj": {
2295
  "group_size": {
2296
- "2": 64
2297
  },
2298
  "bits": [
2299
- 2
2300
  ],
2301
  "bits_prop": [
2302
  1
@@ -2377,8 +2377,8 @@
2377
  }
2378
  },
2379
  "model.layers.27": {
2380
- "accuracy": 0.9794363975524902,
2381
- "total_bits": 815838240,
2382
  "q_proj": {
2383
  "group_size": {
2384
  "4": 128
@@ -2465,8 +2465,8 @@
2465
  }
2466
  },
2467
  "model.layers.28": {
2468
- "accuracy": 0.9793131351470947,
2469
- "total_bits": 815838240,
2470
  "q_proj": {
2471
  "group_size": {
2472
  "4": 128
@@ -2553,8 +2553,8 @@
2553
  }
2554
  },
2555
  "model.layers.29": {
2556
- "accuracy": 0.9778343439102173,
2557
- "total_bits": 815838240,
2558
  "q_proj": {
2559
  "group_size": {
2560
  "4": 128
@@ -2641,8 +2641,8 @@
2641
  }
2642
  },
2643
  "model.layers.30": {
2644
- "accuracy": 0.9739029407501221,
2645
- "total_bits": 815838240,
2646
  "q_proj": {
2647
  "group_size": {
2648
  "4": 128
@@ -2729,8 +2729,8 @@
2729
  }
2730
  },
2731
  "model.layers.31": {
2732
- "accuracy": 0.9666062593460083,
2733
- "total_bits": 749781024,
2734
  "q_proj": {
2735
  "group_size": {
2736
  "2": 64
@@ -2745,10 +2745,10 @@
2745
  },
2746
  "k_proj": {
2747
  "group_size": {
2748
- "2": 64
2749
  },
2750
  "bits": [
2751
- 2
2752
  ],
2753
  "bits_prop": [
2754
  1
 
1
  {
2
  "measurement": {
3
  "model.layers.0": {
4
+ "accuracy": 0.9242749214172363,
5
+ "total_bits": 360997440,
6
  "q_proj": {
7
  "group_size": {
8
  "2": 64
 
89
  }
90
  },
91
  "model.layers.1": {
92
+ "accuracy": 0.9216856956481934,
93
+ "total_bits": 486917664,
94
  "q_proj": {
95
  "group_size": {
96
  "2": 64
 
105
  },
106
  "k_proj": {
107
  "group_size": {
108
+ "4": 128
109
  },
110
  "bits": [
111
+ 4
112
  ],
113
  "bits_prop": [
114
  1
 
129
  },
130
  "o_proj": {
131
  "group_size": {
132
+ "4": 128
133
  },
134
  "bits": [
135
+ 4
136
  ],
137
  "bits_prop": [
138
  1
 
165
  },
166
  "down_proj": {
167
  "group_size": {
168
+ "4": 128
169
  },
170
  "bits": [
171
+ 4
172
  ],
173
  "bits_prop": [
174
  1
 
177
  }
178
  },
179
  "model.layers.2": {
180
+ "accuracy": 0.8546795845031738,
181
+ "total_bits": 360997440,
182
  "q_proj": {
183
  "group_size": {
184
  "2": 64
 
265
  }
266
  },
267
  "model.layers.3": {
268
+ "accuracy": 0.9084997177124023,
269
+ "total_bits": 449761824,
270
  "q_proj": {
271
  "group_size": {
272
  "2": 64
 
341
  },
342
  "down_proj": {
343
  "group_size": {
344
+ "4": 128
345
  },
346
  "bits": [
347
+ 4
348
  ],
349
  "bits_prop": [
350
  1
 
353
  }
354
  },
355
  "model.layers.4": {
356
+ "accuracy": 0.8643641471862793,
357
+ "total_bits": 394026048,
358
  "q_proj": {
359
  "group_size": {
360
  "2": 64
 
393
  },
394
  "o_proj": {
395
  "group_size": {
396
+ "4": 128
397
  },
398
  "bits": [
399
+ 4
400
  ],
401
  "bits_prop": [
402
  1
 
441
  }
442
  },
443
  "model.layers.5": {
444
+ "accuracy": 0.8657441139221191,
445
+ "total_bits": 365124672,
446
  "q_proj": {
447
  "group_size": {
448
  "2": 64
 
457
  },
458
  "k_proj": {
459
  "group_size": {
460
+ "4": 128
461
  },
462
  "bits": [
463
+ 4
464
  ],
465
  "bits_prop": [
466
  1
 
529
  }
530
  },
531
  "model.layers.6": {
532
+ "accuracy": 0.877474308013916,
533
+ "total_bits": 449761824,
534
  "q_proj": {
535
  "group_size": {
536
  "2": 64
 
605
  },
606
  "down_proj": {
607
  "group_size": {
608
+ "4": 128
609
  },
610
  "bits": [
611
+ 4
612
  ],
613
  "bits_prop": [
614
  1
 
617
  }
618
  },
619
  "model.layers.7": {
620
+ "accuracy": 0.8887453079223633,
621
+ "total_bits": 482790432,
622
  "q_proj": {
623
  "group_size": {
624
  "2": 64
 
693
  },
694
  "down_proj": {
695
  "group_size": {
696
+ "4": 128
697
  },
698
  "bits": [
699
+ 4
700
  ],
701
  "bits_prop": [
702
  1
 
705
  }
706
  },
707
  "model.layers.8": {
708
+ "accuracy": 0.9228010177612305,
709
+ "total_bits": 449761824,
710
  "q_proj": {
711
  "group_size": {
712
  "2": 64
 
793
  }
794
  },
795
  "model.layers.9": {
796
+ "accuracy": 0.9577234387397766,
797
+ "total_bits": 365124672,
798
  "q_proj": {
799
  "group_size": {
800
  "2": 64
 
809
  },
810
  "k_proj": {
811
  "group_size": {
812
+ "4": 128
813
  },
814
  "bits": [
815
+ 4
816
  ],
817
  "bits_prop": [
818
  1
 
881
  }
882
  },
883
  "model.layers.10": {
884
+ "accuracy": 0.9458887577056885,
885
+ "total_bits": 360997440,
886
  "q_proj": {
887
  "group_size": {
888
  "2": 64
 
969
  }
970
  },
971
  "model.layers.11": {
972
+ "accuracy": 0.9322950839996338,
973
+ "total_bits": 360997440,
974
  "q_proj": {
975
  "group_size": {
976
  "2": 64
 
1057
  }
1058
  },
1059
  "model.layers.12": {
1060
+ "accuracy": 0.9404451847076416,
1061
+ "total_bits": 365124672,
1062
  "q_proj": {
1063
  "group_size": {
1064
  "2": 64
 
1073
  },
1074
  "k_proj": {
1075
  "group_size": {
1076
+ "4": 128
1077
  },
1078
  "bits": [
1079
+ 4
1080
  ],
1081
  "bits_prop": [
1082
  1
 
1145
  }
1146
  },
1147
  "model.layers.13": {
1148
+ "accuracy": 0.9363645315170288,
1149
+ "total_bits": 365124672,
1150
  "q_proj": {
1151
  "group_size": {
1152
  "2": 64
 
1161
  },
1162
  "k_proj": {
1163
  "group_size": {
1164
+ "4": 128
1165
  },
1166
  "bits": [
1167
+ 4
1168
  ],
1169
  "bits_prop": [
1170
  1
 
1233
  }
1234
  },
1235
  "model.layers.14": {
1236
+ "accuracy": 0.9359749555587769,
1237
+ "total_bits": 365124672,
1238
  "q_proj": {
1239
  "group_size": {
1240
  "2": 64
 
1249
  },
1250
  "k_proj": {
1251
  "group_size": {
1252
+ "4": 128
1253
  },
1254
  "bits": [
1255
+ 4
1256
  ],
1257
  "bits_prop": [
1258
  1
 
1273
  },
1274
  "o_proj": {
1275
  "group_size": {
1276
+ "2": 64
1277
  },
1278
  "bits": [
1279
+ 2
1280
  ],
1281
  "bits_prop": [
1282
  1
 
1309
  },
1310
  "down_proj": {
1311
  "group_size": {
1312
+ "2": 64
1313
  },
1314
  "bits": [
1315
+ 2
1316
  ],
1317
  "bits_prop": [
1318
  1
 
1321
  }
1322
  },
1323
  "model.layers.15": {
1324
+ "accuracy": 0.9322938919067383,
1325
+ "total_bits": 365124672,
1326
  "q_proj": {
1327
  "group_size": {
1328
  "2": 64
 
1337
  },
1338
  "k_proj": {
1339
  "group_size": {
1340
+ "4": 128
1341
  },
1342
  "bits": [
1343
+ 4
1344
  ],
1345
  "bits_prop": [
1346
  1
 
1361
  },
1362
  "o_proj": {
1363
  "group_size": {
1364
+ "2": 64
1365
  },
1366
  "bits": [
1367
+ 2
1368
  ],
1369
  "bits_prop": [
1370
  1
 
1397
  },
1398
  "down_proj": {
1399
  "group_size": {
1400
+ "2": 64
1401
  },
1402
  "bits": [
1403
+ 2
1404
  ],
1405
  "bits_prop": [
1406
  1
 
1409
  }
1410
  },
1411
  "model.layers.16": {
1412
+ "accuracy": 0.939303994178772,
1413
+ "total_bits": 449761824,
1414
  "q_proj": {
1415
  "group_size": {
1416
  "2": 64
 
1449
  },
1450
  "o_proj": {
1451
  "group_size": {
1452
+ "2": 64
1453
  },
1454
  "bits": [
1455
+ 2
1456
  ],
1457
  "bits_prop": [
1458
  1
 
1497
  }
1498
  },
1499
  "model.layers.17": {
1500
+ "accuracy": 0.9451323747634888,
1501
+ "total_bits": 486917664,
1502
  "q_proj": {
1503
  "group_size": {
1504
  "2": 64
 
1513
  },
1514
  "k_proj": {
1515
  "group_size": {
1516
+ "4": 128
1517
  },
1518
  "bits": [
1519
+ 4
1520
  ],
1521
  "bits_prop": [
1522
  1
 
1585
  }
1586
  },
1587
  "model.layers.18": {
1588
+ "accuracy": 0.9493275880813599,
1589
+ "total_bits": 519946272,
1590
  "q_proj": {
1591
  "group_size": {
1592
+ "4": 128
1593
  },
1594
  "bits": [
1595
+ 4
1596
  ],
1597
  "bits_prop": [
1598
  1
 
1601
  },
1602
  "k_proj": {
1603
  "group_size": {
1604
+ "4": 128
1605
  },
1606
  "bits": [
1607
+ 4
1608
  ],
1609
  "bits_prop": [
1610
  1
 
1637
  },
1638
  "up_proj": {
1639
  "group_size": {
1640
+ "2": 64
1641
  },
1642
  "bits": [
1643
+ 2
1644
  ],
1645
  "bits_prop": [
1646
  1
 
1673
  }
1674
  },
1675
  "model.layers.19": {
1676
+ "accuracy": 0.9514966011047363,
1677
+ "total_bits": 571557408,
1678
  "q_proj": {
1679
  "group_size": {
1680
  "2": 64
 
1761
  }
1762
  },
1763
  "model.layers.20": {
1764
+ "accuracy": 0.955375075340271,
1765
+ "total_bits": 604586016,
1766
  "q_proj": {
1767
  "group_size": {
1768
+ "4": 128
1769
  },
1770
  "bits": [
1771
+ 4
1772
  ],
1773
  "bits_prop": [
1774
  1
 
1849
  }
1850
  },
1851
  "model.layers.21": {
1852
+ "accuracy": 0.9731628894805908,
1853
+ "total_bits": 664451616,
1854
  "q_proj": {
1855
  "group_size": {
1856
+ "2": 64
1857
  },
1858
  "bits": [
1859
+ 2
1860
  ],
1861
  "bits_prop": [
1862
  1
 
1937
  }
1938
  },
1939
  "model.layers.22": {
1940
+ "accuracy": 0.9785275459289551,
1941
+ "total_bits": 697480224,
1942
  "q_proj": {
1943
  "group_size": {
1944
+ "4": 128
1945
  },
1946
  "bits": [
1947
+ 4
1948
  ],
1949
  "bits_prop": [
1950
  1
 
1953
  },
1954
  "k_proj": {
1955
  "group_size": {
1956
+ "4": 128
1957
  },
1958
  "bits": [
1959
+ 4
1960
  ],
1961
  "bits_prop": [
1962
  1
 
2025
  }
2026
  },
2027
  "model.layers.23": {
2028
+ "accuracy": 0.9788622856140137,
2029
+ "total_bits": 697480224,
2030
  "q_proj": {
2031
  "group_size": {
2032
  "4": 128
 
2113
  }
2114
  },
2115
  "model.layers.24": {
2116
+ "accuracy": 0.9794007539749146,
2117
+ "total_bits": 697480224,
2118
  "q_proj": {
2119
  "group_size": {
2120
+ "4": 128
2121
  },
2122
  "bits": [
2123
+ 4
2124
  ],
2125
  "bits_prop": [
2126
  1
 
2201
  }
2202
  },
2203
  "model.layers.25": {
2204
+ "accuracy": 0.9806145429611206,
2205
+ "total_bits": 697480224,
2206
  "q_proj": {
2207
  "group_size": {
2208
+ "4": 128
2209
  },
2210
  "bits": [
2211
+ 4
2212
  ],
2213
  "bits_prop": [
2214
  1
 
2217
  },
2218
  "k_proj": {
2219
  "group_size": {
2220
+ "4": 128
2221
  },
2222
  "bits": [
2223
+ 4
2224
  ],
2225
  "bits_prop": [
2226
  1
 
2289
  }
2290
  },
2291
  "model.layers.26": {
2292
+ "accuracy": 0.9806764125823975,
2293
+ "total_bits": 697480224,
2294
  "q_proj": {
2295
  "group_size": {
2296
+ "4": 128
2297
  },
2298
  "bits": [
2299
+ 4
2300
  ],
2301
  "bits_prop": [
2302
  1
 
2377
  }
2378
  },
2379
  "model.layers.27": {
2380
+ "accuracy": 0.9815640449523926,
2381
+ "total_bits": 697480224,
2382
  "q_proj": {
2383
  "group_size": {
2384
  "4": 128
 
2465
  }
2466
  },
2467
  "model.layers.28": {
2468
+ "accuracy": 0.9820178747177124,
2469
+ "total_bits": 697480224,
2470
  "q_proj": {
2471
  "group_size": {
2472
  "4": 128
 
2553
  }
2554
  },
2555
  "model.layers.29": {
2556
+ "accuracy": 0.9836413264274597,
2557
+ "total_bits": 697480224,
2558
  "q_proj": {
2559
  "group_size": {
2560
  "4": 128
 
2641
  }
2642
  },
2643
  "model.layers.30": {
2644
+ "accuracy": 0.9838729500770569,
2645
+ "total_bits": 697480224,
2646
  "q_proj": {
2647
  "group_size": {
2648
  "4": 128
 
2729
  }
2730
  },
2731
  "model.layers.31": {
2732
+ "accuracy": 0.9427725076675415,
2733
+ "total_bits": 664451616,
2734
  "q_proj": {
2735
  "group_size": {
2736
  "2": 64
 
2745
  },
2746
  "k_proj": {
2747
  "group_size": {
2748
+ "4": 128
2749
  },
2750
  "bits": [
2751
+ 4
2752
  ],
2753
  "bits_prop": [
2754
  1
special_tokens_map.json CHANGED
@@ -1,19 +1,34 @@
1
  {
2
  "additional_special_tokens": [
3
  "<|im_start|>",
4
- "<|im_end|>"
 
5
  ],
 
 
 
 
 
 
 
6
  "eos_token": {
7
- "content": "<|im_end|>",
8
  "lstrip": false,
9
- "normalized": false,
10
  "rstrip": false,
11
  "single_word": false
12
  },
13
  "pad_token": {
14
- "content": "<|endoftext|>",
 
 
 
 
 
 
 
15
  "lstrip": false,
16
- "normalized": false,
17
  "rstrip": false,
18
  "single_word": false
19
  }
 
1
  {
2
  "additional_special_tokens": [
3
  "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|im_sep|>"
6
  ],
7
+ "bos_token": {
8
+ "content": "<|startoftext|>",
9
+ "lstrip": false,
10
+ "normalized": true,
11
+ "rstrip": false,
12
+ "single_word": false
13
+ },
14
  "eos_token": {
15
+ "content": "<|endoftext|>",
16
  "lstrip": false,
17
+ "normalized": true,
18
  "rstrip": false,
19
  "single_word": false
20
  },
21
  "pad_token": {
22
+ "content": "<unk>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ },
28
+ "unk_token": {
29
+ "content": "<unk>",
30
  "lstrip": false,
31
+ "normalized": true,
32
  "rstrip": false,
33
  "single_word": false
34
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:386c49cf943d71aa110361135338c50e38beeff0a66593480421f37b319e1a39
3
+ size 1033105
tokenizer_config.json CHANGED
@@ -1,15 +1,33 @@
1
  {
2
- "add_prefix_space": false,
 
 
3
  "added_tokens_decoder": {
4
- "151643": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "content": "<|endoftext|>",
6
  "lstrip": false,
7
- "normalized": false,
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
11
  },
12
- "151644": {
13
  "content": "<|im_start|>",
14
  "lstrip": false,
15
  "normalized": false,
@@ -17,27 +35,39 @@
17
  "single_word": false,
18
  "special": true
19
  },
20
- "151645": {
21
  "content": "<|im_end|>",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
25
  "single_word": false,
26
  "special": true
 
 
 
 
 
 
 
 
27
  }
28
  },
29
  "additional_special_tokens": [
30
  "<|im_start|>",
31
- "<|im_end|>"
 
32
  ],
33
- "bos_token": null,
34
- "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
  "clean_up_tokenization_spaces": false,
36
- "eos_token": "<|im_end|>",
37
- "errors": "replace",
38
- "model_max_length": 32768,
39
- "pad_token": "<|endoftext|>",
40
- "split_special_tokens": false,
41
- "tokenizer_class": "Qwen2Tokenizer",
42
- "unk_token": null
 
 
 
43
  }
 
1
  {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
  "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": true,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<|startoftext|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
  "content": "<|endoftext|>",
24
  "lstrip": false,
25
+ "normalized": true,
26
  "rstrip": false,
27
  "single_word": false,
28
  "special": true
29
  },
30
+ "6": {
31
  "content": "<|im_start|>",
32
  "lstrip": false,
33
  "normalized": false,
 
35
  "single_word": false,
36
  "special": true
37
  },
38
+ "7": {
39
  "content": "<|im_end|>",
40
  "lstrip": false,
41
  "normalized": false,
42
  "rstrip": false,
43
  "single_word": false,
44
  "special": true
45
+ },
46
+ "8": {
47
+ "content": "<|im_sep|>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": false,
51
+ "single_word": false,
52
+ "special": true
53
  }
54
  },
55
  "additional_special_tokens": [
56
  "<|im_start|>",
57
+ "<|im_end|>",
58
+ "<|im_sep|>"
59
  ],
60
+ "bos_token": "<|startoftext|>",
61
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
62
  "clean_up_tokenization_spaces": false,
63
+ "eos_token": "<|endoftext|>",
64
+ "legacy": true,
65
+ "model_max_length": 4096,
66
+ "pad_token": "<unk>",
67
+ "padding_side": "right",
68
+ "sp_model_kwargs": {},
69
+ "spaces_between_special_tokens": false,
70
+ "tokenizer_class": "LlamaTokenizer",
71
+ "unk_token": "<unk>",
72
+ "use_default_system_prompt": true
73
  }