Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

README.md +2 -2
config.json +12 -12
model.safetensors +2 -2
model.safetensors.index.json +1 -97
quant_strategy.json +126 -126
special_tokens_map.json +20 -5
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +45 -15

README.md CHANGED Viewed

@@ -5,8 +5,8 @@ tags:
 ---
 # GreenBitAI/Qwen-1.5-7B-Chat-layer-mix-bpw-3.0-mlx
-This quantized low-bit model was converted to MLX format from [`GreenBitAI/Qwen-1.5-7B-Chat-layer-mix-bpw-3.0`]().
-Refer to the [original model card](https://huggingface.co/GreenBitAI/Qwen-1.5-7B-Chat-layer-mix-bpw-3.0) for more details on the model.
 ## Use with mlx
 ```bash

 ---
 # GreenBitAI/Qwen-1.5-7B-Chat-layer-mix-bpw-3.0-mlx
+This quantized low-bit model was converted to MLX format from [`GreenBitAI/01-Yi-6B-Chat-layer-mix-bpw-3.0`]().
+Refer to the [original model card](https://huggingface.co/GreenBitAI/01-Yi-6B-Chat-layer-mix-bpw-3.0) for more details on the model.
 ## Use with mlx
 ```bash

config.json CHANGED Viewed

@@ -1,12 +1,13 @@
 {
     "add_cross_attention": false,
     "architectures": [
-        "Qwen2ForCausalLM"
     ],
     "attention_dropout": 0.0,
     "bad_words_ids": null,
     "begin_suppress_tokens": null,
-    "bos_token_id": 151643,
     "chunk_size_feed_forward": 0,
     "cross_attention_hidden_size": null,
     "decoder_start_token_id": null,
@@ -14,7 +15,7 @@
     "do_sample": false,
     "early_stopping": false,
     "encoder_no_repeat_ngram_size": 0,
-    "eos_token_id": 151645,
     "exponential_decay_length_penalty": null,
     "finetuning_task": null,
     "forced_bos_token_id": null,
@@ -35,22 +36,22 @@
     },
     "length_penalty": 1.0,
     "max_length": 20,
-    "max_position_embeddings": 32768,
-    "max_window_layers": 28,
     "min_length": 0,
-    "model_type": "qwen2",
     "no_repeat_ngram_size": 0,
     "num_attention_heads": 32,
     "num_beam_groups": 1,
     "num_beams": 1,
     "num_hidden_layers": 32,
-    "num_key_value_heads": 32,
     "num_return_sequences": 1,
     "output_attentions": false,
     "output_hidden_states": false,
     "output_scores": false,
     "pad_token_id": null,
     "prefix": null,
     "problem_type": null,
     "pruned_heads": {},
     "quantization": {
@@ -61,10 +62,10 @@
     "repetition_penalty": 1.0,
     "return_dict": true,
     "return_dict_in_generate": false,
-    "rms_norm_eps": 1e-06,
-    "rope_theta": 1000000.0,
     "sep_token_id": null,
-    "sliding_window": 32768,
     "suppress_tokens": null,
     "task_specific_params": null,
     "temperature": 1.0,
@@ -80,6 +81,5 @@
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
-    "use_sliding_window": false,
-    "vocab_size": 151936
 }

 {
     "add_cross_attention": false,
     "architectures": [
+        "LlamaForCausalLM"
     ],
+    "attention_bias": false,
     "attention_dropout": 0.0,
     "bad_words_ids": null,
     "begin_suppress_tokens": null,
+    "bos_token_id": 1,
     "chunk_size_feed_forward": 0,
     "cross_attention_hidden_size": null,
     "decoder_start_token_id": null,
     "do_sample": false,
     "early_stopping": false,
     "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 2,
     "exponential_decay_length_penalty": null,
     "finetuning_task": null,
     "forced_bos_token_id": null,
     },
     "length_penalty": 1.0,
     "max_length": 20,
+    "max_position_embeddings": 4096,
     "min_length": 0,
+    "model_type": "llama",
     "no_repeat_ngram_size": 0,
     "num_attention_heads": 32,
     "num_beam_groups": 1,
     "num_beams": 1,
     "num_hidden_layers": 32,
+    "num_key_value_heads": 4,
     "num_return_sequences": 1,
     "output_attentions": false,
     "output_hidden_states": false,
     "output_scores": false,
     "pad_token_id": null,
     "prefix": null,
+    "pretraining_tp": 1,
     "problem_type": null,
     "pruned_heads": {},
     "quantization": {
     "repetition_penalty": 1.0,
     "return_dict": true,
     "return_dict_in_generate": false,
+    "rms_norm_eps": 1e-05,
+    "rope_scaling": null,
+    "rope_theta": 5000000.0,
     "sep_token_id": null,
     "suppress_tokens": null,
     "task_specific_params": null,
     "temperature": 1.0,
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
+    "vocab_size": 64000
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fe3b05b579c86f2ae5522c22941fffdab484da14633a47a6d40624334c1892dd
-size 5191621850

 version https://git-lfs.github.com/spec/v1
+oid sha256:21a8d50bdf8fbd1540698c8f71db74b9de56d6a375c5d676122c490a05b15432
+size 3358077486

model.safetensors.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "metadata": {
-        "total_size": 5191475200
     },
     "weight_map": {
         "lm_head.weight": "model.safetensors",
@@ -22,7 +22,6 @@
         "model.layers.0.mlp.up_proj.scales": "model.safetensors",
         "model.layers.0.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.0.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.0.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.0.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.0.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.0.self_attn.k_proj.qweight": "model.safetensors",
@@ -33,13 +32,11 @@
         "model.layers.0.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.0.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.0.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.0.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.0.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.0.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.0.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.0.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.0.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.0.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.0.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.0.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.0.self_attn.v_proj.qweight": "model.safetensors",
@@ -62,7 +59,6 @@
         "model.layers.1.mlp.up_proj.scales": "model.safetensors",
         "model.layers.1.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.1.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.1.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.1.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.1.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.1.self_attn.k_proj.qweight": "model.safetensors",
@@ -73,13 +69,11 @@
         "model.layers.1.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.1.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.1.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.1.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.1.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.1.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.1.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.1.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.1.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.1.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.1.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.1.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.1.self_attn.v_proj.qweight": "model.safetensors",
@@ -102,7 +96,6 @@
         "model.layers.10.mlp.up_proj.scales": "model.safetensors",
         "model.layers.10.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.10.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.10.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.10.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.10.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.10.self_attn.k_proj.qweight": "model.safetensors",
@@ -113,13 +106,11 @@
         "model.layers.10.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.10.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.10.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.10.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.10.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.10.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.10.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.10.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.10.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.10.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.10.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.10.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.10.self_attn.v_proj.qweight": "model.safetensors",
@@ -142,7 +133,6 @@
         "model.layers.11.mlp.up_proj.scales": "model.safetensors",
         "model.layers.11.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.11.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.11.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.11.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.11.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.11.self_attn.k_proj.qweight": "model.safetensors",
@@ -153,13 +143,11 @@
         "model.layers.11.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.11.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.11.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.11.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.11.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.11.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.11.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.11.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.11.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.11.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.11.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.11.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.11.self_attn.v_proj.qweight": "model.safetensors",
@@ -182,7 +170,6 @@
         "model.layers.12.mlp.up_proj.scales": "model.safetensors",
         "model.layers.12.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.12.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.12.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.12.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.12.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.12.self_attn.k_proj.qweight": "model.safetensors",
@@ -193,13 +180,11 @@
         "model.layers.12.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.12.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.12.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.12.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.12.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.12.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.12.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.12.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.12.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.12.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.12.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.12.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.12.self_attn.v_proj.qweight": "model.safetensors",
@@ -222,7 +207,6 @@
         "model.layers.13.mlp.up_proj.scales": "model.safetensors",
         "model.layers.13.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.13.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.13.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.13.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.13.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.13.self_attn.k_proj.qweight": "model.safetensors",
@@ -233,13 +217,11 @@
         "model.layers.13.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.13.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.13.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.13.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.13.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.13.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.13.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.13.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.13.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.13.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.13.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.13.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.13.self_attn.v_proj.qweight": "model.safetensors",
@@ -262,7 +244,6 @@
         "model.layers.14.mlp.up_proj.scales": "model.safetensors",
         "model.layers.14.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.14.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.14.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.14.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.14.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.14.self_attn.k_proj.qweight": "model.safetensors",
@@ -273,13 +254,11 @@
         "model.layers.14.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.14.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.14.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.14.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.14.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.14.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.14.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.14.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.14.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.14.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.14.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.14.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.14.self_attn.v_proj.qweight": "model.safetensors",
@@ -302,7 +281,6 @@
         "model.layers.15.mlp.up_proj.scales": "model.safetensors",
         "model.layers.15.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.15.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.15.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.15.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.15.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.15.self_attn.k_proj.qweight": "model.safetensors",
@@ -313,13 +291,11 @@
         "model.layers.15.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.15.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.15.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.15.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.15.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.15.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.15.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.15.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.15.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.15.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.15.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.15.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.15.self_attn.v_proj.qweight": "model.safetensors",
@@ -342,7 +318,6 @@
         "model.layers.16.mlp.up_proj.scales": "model.safetensors",
         "model.layers.16.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.16.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.16.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.16.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.16.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.16.self_attn.k_proj.qweight": "model.safetensors",
@@ -353,13 +328,11 @@
         "model.layers.16.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.16.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.16.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.16.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.16.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.16.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.16.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.16.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.16.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.16.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.16.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.16.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.16.self_attn.v_proj.qweight": "model.safetensors",
@@ -382,7 +355,6 @@
         "model.layers.17.mlp.up_proj.scales": "model.safetensors",
         "model.layers.17.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.17.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.17.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.17.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.17.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.17.self_attn.k_proj.qweight": "model.safetensors",
@@ -393,13 +365,11 @@
         "model.layers.17.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.17.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.17.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.17.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.17.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.17.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.17.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.17.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.17.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.17.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.17.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.17.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.17.self_attn.v_proj.qweight": "model.safetensors",
@@ -422,7 +392,6 @@
         "model.layers.18.mlp.up_proj.scales": "model.safetensors",
         "model.layers.18.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.18.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.18.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.18.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.18.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.18.self_attn.k_proj.qweight": "model.safetensors",
@@ -433,13 +402,11 @@
         "model.layers.18.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.18.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.18.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.18.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.18.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.18.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.18.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.18.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.18.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.18.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.18.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.18.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.18.self_attn.v_proj.qweight": "model.safetensors",
@@ -462,7 +429,6 @@
         "model.layers.19.mlp.up_proj.scales": "model.safetensors",
         "model.layers.19.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.19.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.19.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.19.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.19.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.19.self_attn.k_proj.qweight": "model.safetensors",
@@ -473,13 +439,11 @@
         "model.layers.19.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.19.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.19.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.19.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.19.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.19.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.19.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.19.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.19.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.19.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.19.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.19.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.19.self_attn.v_proj.qweight": "model.safetensors",
@@ -502,7 +466,6 @@
         "model.layers.2.mlp.up_proj.scales": "model.safetensors",
         "model.layers.2.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.2.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.2.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.2.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.2.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.2.self_attn.k_proj.qweight": "model.safetensors",
@@ -513,13 +476,11 @@
         "model.layers.2.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.2.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.2.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.2.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.2.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.2.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.2.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.2.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.2.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.2.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.2.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.2.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.2.self_attn.v_proj.qweight": "model.safetensors",
@@ -542,7 +503,6 @@
         "model.layers.20.mlp.up_proj.scales": "model.safetensors",
         "model.layers.20.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.20.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.20.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.20.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.20.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.20.self_attn.k_proj.qweight": "model.safetensors",
@@ -553,13 +513,11 @@
         "model.layers.20.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.20.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.20.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.20.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.20.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.20.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.20.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.20.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.20.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.20.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.20.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.20.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.20.self_attn.v_proj.qweight": "model.safetensors",
@@ -582,7 +540,6 @@
         "model.layers.21.mlp.up_proj.scales": "model.safetensors",
         "model.layers.21.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.21.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.21.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.21.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.21.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.21.self_attn.k_proj.qweight": "model.safetensors",
@@ -593,13 +550,11 @@
         "model.layers.21.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.21.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.21.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.21.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.21.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.21.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.21.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.21.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.21.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.21.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.21.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.21.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.21.self_attn.v_proj.qweight": "model.safetensors",
@@ -622,7 +577,6 @@
         "model.layers.22.mlp.up_proj.scales": "model.safetensors",
         "model.layers.22.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.22.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.22.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.22.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.22.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.22.self_attn.k_proj.qweight": "model.safetensors",
@@ -633,13 +587,11 @@
         "model.layers.22.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.22.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.22.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.22.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.22.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.22.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.22.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.22.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.22.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.22.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.22.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.22.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.22.self_attn.v_proj.qweight": "model.safetensors",
@@ -662,7 +614,6 @@
         "model.layers.23.mlp.up_proj.scales": "model.safetensors",
         "model.layers.23.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.23.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.23.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.23.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.23.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.23.self_attn.k_proj.qweight": "model.safetensors",
@@ -673,13 +624,11 @@
         "model.layers.23.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.23.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.23.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.23.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.23.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.23.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.23.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.23.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.23.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.23.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.23.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.23.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.23.self_attn.v_proj.qweight": "model.safetensors",
@@ -702,7 +651,6 @@
         "model.layers.24.mlp.up_proj.scales": "model.safetensors",
         "model.layers.24.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.24.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.24.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.24.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.24.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.24.self_attn.k_proj.qweight": "model.safetensors",
@@ -713,13 +661,11 @@
         "model.layers.24.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.24.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.24.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.24.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.24.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.24.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.24.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.24.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.24.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.24.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.24.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.24.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.24.self_attn.v_proj.qweight": "model.safetensors",
@@ -742,7 +688,6 @@
         "model.layers.25.mlp.up_proj.scales": "model.safetensors",
         "model.layers.25.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.25.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.25.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.25.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.25.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.25.self_attn.k_proj.qweight": "model.safetensors",
@@ -753,13 +698,11 @@
         "model.layers.25.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.25.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.25.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.25.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.25.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.25.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.25.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.25.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.25.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.25.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.25.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.25.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.25.self_attn.v_proj.qweight": "model.safetensors",
@@ -782,7 +725,6 @@
         "model.layers.26.mlp.up_proj.scales": "model.safetensors",
         "model.layers.26.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.26.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.26.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.26.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.26.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.26.self_attn.k_proj.qweight": "model.safetensors",
@@ -793,13 +735,11 @@
         "model.layers.26.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.26.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.26.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.26.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.26.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.26.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.26.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.26.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.26.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.26.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.26.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.26.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.26.self_attn.v_proj.qweight": "model.safetensors",
@@ -822,7 +762,6 @@
         "model.layers.27.mlp.up_proj.scales": "model.safetensors",
         "model.layers.27.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.27.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.27.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.27.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.27.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.27.self_attn.k_proj.qweight": "model.safetensors",
@@ -833,13 +772,11 @@
         "model.layers.27.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.27.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.27.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.27.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.27.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.27.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.27.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.27.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.27.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.27.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.27.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.27.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.27.self_attn.v_proj.qweight": "model.safetensors",
@@ -862,7 +799,6 @@
         "model.layers.28.mlp.up_proj.scales": "model.safetensors",
         "model.layers.28.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.28.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.28.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.28.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.28.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.28.self_attn.k_proj.qweight": "model.safetensors",
@@ -873,13 +809,11 @@
         "model.layers.28.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.28.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.28.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.28.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.28.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.28.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.28.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.28.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.28.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.28.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.28.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.28.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.28.self_attn.v_proj.qweight": "model.safetensors",
@@ -902,7 +836,6 @@
         "model.layers.29.mlp.up_proj.scales": "model.safetensors",
         "model.layers.29.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.29.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.29.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.29.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.29.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.29.self_attn.k_proj.qweight": "model.safetensors",
@@ -913,13 +846,11 @@
         "model.layers.29.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.29.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.29.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.29.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.29.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.29.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.29.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.29.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.29.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.29.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.29.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.29.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.29.self_attn.v_proj.qweight": "model.safetensors",
@@ -942,7 +873,6 @@
         "model.layers.3.mlp.up_proj.scales": "model.safetensors",
         "model.layers.3.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.3.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.3.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.3.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.3.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.3.self_attn.k_proj.qweight": "model.safetensors",
@@ -953,13 +883,11 @@
         "model.layers.3.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.3.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.3.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.3.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.3.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.3.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.3.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.3.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.3.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.3.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.3.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.3.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.3.self_attn.v_proj.qweight": "model.safetensors",
@@ -982,7 +910,6 @@
         "model.layers.30.mlp.up_proj.scales": "model.safetensors",
         "model.layers.30.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.30.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.30.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.30.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.30.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.30.self_attn.k_proj.qweight": "model.safetensors",
@@ -993,13 +920,11 @@
         "model.layers.30.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.30.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.30.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.30.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.30.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.30.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.30.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.30.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.30.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.30.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.30.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.30.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.30.self_attn.v_proj.qweight": "model.safetensors",
@@ -1022,7 +947,6 @@
         "model.layers.31.mlp.up_proj.scales": "model.safetensors",
         "model.layers.31.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.31.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.31.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.31.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.31.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.31.self_attn.k_proj.qweight": "model.safetensors",
@@ -1033,13 +957,11 @@
         "model.layers.31.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.31.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.31.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.31.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.31.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.31.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.31.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.31.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.31.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.31.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.31.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.31.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.31.self_attn.v_proj.qweight": "model.safetensors",
@@ -1062,7 +984,6 @@
         "model.layers.4.mlp.up_proj.scales": "model.safetensors",
         "model.layers.4.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.4.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.4.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.4.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.4.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.4.self_attn.k_proj.qweight": "model.safetensors",
@@ -1073,13 +994,11 @@
         "model.layers.4.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.4.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.4.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.4.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.4.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.4.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.4.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.4.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.4.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.4.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.4.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.4.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.4.self_attn.v_proj.qweight": "model.safetensors",
@@ -1102,7 +1021,6 @@
         "model.layers.5.mlp.up_proj.scales": "model.safetensors",
         "model.layers.5.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.5.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.5.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.5.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.5.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.5.self_attn.k_proj.qweight": "model.safetensors",
@@ -1113,13 +1031,11 @@
         "model.layers.5.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.5.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.5.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.5.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.5.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.5.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.5.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.5.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.5.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.5.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.5.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.5.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.5.self_attn.v_proj.qweight": "model.safetensors",
@@ -1142,7 +1058,6 @@
         "model.layers.6.mlp.up_proj.scales": "model.safetensors",
         "model.layers.6.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.6.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.6.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.6.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.6.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.6.self_attn.k_proj.qweight": "model.safetensors",
@@ -1153,13 +1068,11 @@
         "model.layers.6.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.6.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.6.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.6.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.6.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.6.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.6.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.6.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.6.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.6.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.6.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.6.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.6.self_attn.v_proj.qweight": "model.safetensors",
@@ -1182,7 +1095,6 @@
         "model.layers.7.mlp.up_proj.scales": "model.safetensors",
         "model.layers.7.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.7.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.7.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.7.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.7.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.7.self_attn.k_proj.qweight": "model.safetensors",
@@ -1193,13 +1105,11 @@
         "model.layers.7.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.7.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.7.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.7.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.7.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.7.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.7.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.7.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.7.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.7.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.7.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.7.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.7.self_attn.v_proj.qweight": "model.safetensors",
@@ -1222,7 +1132,6 @@
         "model.layers.8.mlp.up_proj.scales": "model.safetensors",
         "model.layers.8.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.8.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.8.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.8.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.8.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.8.self_attn.k_proj.qweight": "model.safetensors",
@@ -1233,13 +1142,11 @@
         "model.layers.8.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.8.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.8.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.8.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.8.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.8.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.8.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.8.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.8.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.8.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.8.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.8.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.8.self_attn.v_proj.qweight": "model.safetensors",
@@ -1262,7 +1169,6 @@
         "model.layers.9.mlp.up_proj.scales": "model.safetensors",
         "model.layers.9.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.9.post_attention_layernorm.weight": "model.safetensors",
-        "model.layers.9.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.9.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.9.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.9.self_attn.k_proj.qweight": "model.safetensors",
@@ -1273,13 +1179,11 @@
         "model.layers.9.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.9.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.9.self_attn.o_proj.zeros": "model.safetensors",
-        "model.layers.9.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.9.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.9.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.9.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.9.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.9.self_attn.q_proj.zeros": "model.safetensors",
-        "model.layers.9.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.9.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.9.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.9.self_attn.v_proj.qweight": "model.safetensors",

 {
     "metadata": {
+        "total_size": 3357941760
     },
     "weight_map": {
         "lm_head.weight": "model.safetensors",
         "model.layers.0.mlp.up_proj.scales": "model.safetensors",
         "model.layers.0.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.0.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.0.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.0.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.0.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.0.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.0.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.0.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.0.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.0.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.0.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.0.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.0.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.0.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.0.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.0.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.1.mlp.up_proj.scales": "model.safetensors",
         "model.layers.1.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.1.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.1.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.1.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.1.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.1.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.1.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.1.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.1.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.1.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.1.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.1.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.1.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.1.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.1.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.1.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.10.mlp.up_proj.scales": "model.safetensors",
         "model.layers.10.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.10.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.10.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.10.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.10.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.10.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.10.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.10.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.10.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.10.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.10.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.10.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.10.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.10.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.10.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.10.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.11.mlp.up_proj.scales": "model.safetensors",
         "model.layers.11.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.11.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.11.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.11.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.11.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.11.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.11.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.11.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.11.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.11.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.11.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.11.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.11.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.11.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.11.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.11.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.12.mlp.up_proj.scales": "model.safetensors",
         "model.layers.12.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.12.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.12.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.12.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.12.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.12.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.12.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.12.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.12.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.12.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.12.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.12.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.12.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.12.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.12.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.12.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.13.mlp.up_proj.scales": "model.safetensors",
         "model.layers.13.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.13.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.13.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.13.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.13.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.13.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.13.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.13.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.13.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.13.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.13.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.13.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.13.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.13.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.13.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.13.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.14.mlp.up_proj.scales": "model.safetensors",
         "model.layers.14.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.14.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.14.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.14.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.14.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.14.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.14.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.14.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.14.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.14.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.14.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.14.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.14.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.14.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.14.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.14.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.15.mlp.up_proj.scales": "model.safetensors",
         "model.layers.15.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.15.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.15.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.15.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.15.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.15.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.15.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.15.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.15.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.15.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.15.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.15.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.15.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.15.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.15.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.15.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.16.mlp.up_proj.scales": "model.safetensors",
         "model.layers.16.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.16.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.16.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.16.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.16.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.16.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.16.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.16.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.16.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.16.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.16.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.16.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.16.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.16.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.16.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.16.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.17.mlp.up_proj.scales": "model.safetensors",
         "model.layers.17.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.17.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.17.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.17.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.17.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.17.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.17.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.17.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.17.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.17.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.17.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.17.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.17.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.17.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.17.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.17.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.18.mlp.up_proj.scales": "model.safetensors",
         "model.layers.18.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.18.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.18.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.18.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.18.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.18.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.18.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.18.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.18.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.18.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.18.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.18.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.18.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.18.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.18.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.18.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.19.mlp.up_proj.scales": "model.safetensors",
         "model.layers.19.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.19.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.19.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.19.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.19.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.19.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.19.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.19.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.19.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.19.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.19.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.19.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.19.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.19.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.19.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.19.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.2.mlp.up_proj.scales": "model.safetensors",
         "model.layers.2.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.2.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.2.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.2.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.2.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.2.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.2.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.2.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.2.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.2.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.2.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.2.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.2.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.2.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.2.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.2.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.20.mlp.up_proj.scales": "model.safetensors",
         "model.layers.20.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.20.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.20.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.20.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.20.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.20.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.20.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.20.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.20.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.20.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.20.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.20.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.20.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.20.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.20.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.20.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.21.mlp.up_proj.scales": "model.safetensors",
         "model.layers.21.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.21.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.21.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.21.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.21.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.21.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.21.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.21.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.21.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.21.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.21.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.21.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.21.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.21.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.21.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.21.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.22.mlp.up_proj.scales": "model.safetensors",
         "model.layers.22.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.22.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.22.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.22.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.22.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.22.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.22.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.22.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.22.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.22.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.22.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.22.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.22.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.22.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.22.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.22.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.23.mlp.up_proj.scales": "model.safetensors",
         "model.layers.23.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.23.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.23.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.23.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.23.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.23.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.23.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.23.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.23.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.23.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.23.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.23.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.23.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.23.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.23.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.23.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.24.mlp.up_proj.scales": "model.safetensors",
         "model.layers.24.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.24.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.24.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.24.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.24.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.24.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.24.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.24.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.24.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.24.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.24.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.24.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.24.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.24.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.24.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.24.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.25.mlp.up_proj.scales": "model.safetensors",
         "model.layers.25.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.25.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.25.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.25.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.25.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.25.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.25.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.25.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.25.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.25.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.25.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.25.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.25.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.25.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.25.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.25.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.26.mlp.up_proj.scales": "model.safetensors",
         "model.layers.26.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.26.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.26.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.26.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.26.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.26.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.26.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.26.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.26.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.26.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.26.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.26.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.26.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.26.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.26.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.26.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.27.mlp.up_proj.scales": "model.safetensors",
         "model.layers.27.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.27.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.27.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.27.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.27.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.27.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.27.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.27.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.27.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.27.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.27.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.27.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.27.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.27.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.27.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.27.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.28.mlp.up_proj.scales": "model.safetensors",
         "model.layers.28.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.28.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.28.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.28.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.28.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.28.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.28.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.28.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.28.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.28.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.28.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.28.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.28.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.28.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.28.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.28.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.29.mlp.up_proj.scales": "model.safetensors",
         "model.layers.29.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.29.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.29.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.29.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.29.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.29.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.29.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.29.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.29.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.29.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.29.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.29.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.29.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.29.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.29.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.29.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.3.mlp.up_proj.scales": "model.safetensors",
         "model.layers.3.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.3.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.3.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.3.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.3.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.3.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.3.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.3.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.3.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.3.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.3.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.3.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.3.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.3.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.3.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.3.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.30.mlp.up_proj.scales": "model.safetensors",
         "model.layers.30.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.30.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.30.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.30.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.30.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.30.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.30.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.30.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.30.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.30.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.30.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.30.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.30.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.30.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.30.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.30.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.31.mlp.up_proj.scales": "model.safetensors",
         "model.layers.31.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.31.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.31.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.31.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.31.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.31.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.31.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.31.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.31.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.31.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.31.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.31.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.31.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.31.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.31.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.31.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.4.mlp.up_proj.scales": "model.safetensors",
         "model.layers.4.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.4.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.4.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.4.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.4.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.4.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.4.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.4.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.4.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.4.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.4.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.4.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.4.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.4.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.4.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.4.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.5.mlp.up_proj.scales": "model.safetensors",
         "model.layers.5.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.5.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.5.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.5.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.5.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.5.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.5.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.5.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.5.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.5.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.5.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.5.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.5.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.5.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.5.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.5.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.6.mlp.up_proj.scales": "model.safetensors",
         "model.layers.6.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.6.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.6.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.6.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.6.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.6.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.6.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.6.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.6.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.6.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.6.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.6.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.6.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.6.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.6.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.6.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.7.mlp.up_proj.scales": "model.safetensors",
         "model.layers.7.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.7.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.7.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.7.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.7.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.7.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.7.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.7.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.7.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.7.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.7.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.7.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.7.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.7.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.7.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.7.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.8.mlp.up_proj.scales": "model.safetensors",
         "model.layers.8.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.8.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.8.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.8.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.8.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.8.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.8.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.8.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.8.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.8.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.8.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.8.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.8.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.8.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.8.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.8.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.9.mlp.up_proj.scales": "model.safetensors",
         "model.layers.9.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.9.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.9.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.9.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.9.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.9.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.9.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.9.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.9.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.9.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.9.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.9.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.9.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.9.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.9.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.9.self_attn.v_proj.qweight": "model.safetensors",

quant_strategy.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "measurement": {
         "model.layers.0": {
-            "accuracy": 0.8198099136352539,
-            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -89,8 +89,8 @@
             }
         },
         "model.layers.1": {
-            "accuracy": 0.8719034194946289,
-            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -105,10 +105,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -129,10 +129,10 @@
             },
             "o_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -165,10 +165,10 @@
             },
             "down_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -177,8 +177,8 @@
             }
         },
         "model.layers.2": {
-            "accuracy": 0.8325738906860352,
-            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -265,8 +265,8 @@
             }
         },
         "model.layers.3": {
-            "accuracy": 0.8738632202148438,
-            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -341,10 +341,10 @@
             },
             "down_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -353,8 +353,8 @@
             }
         },
         "model.layers.4": {
-            "accuracy": 0.8625121116638184,
-            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -393,10 +393,10 @@
             },
             "o_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -441,8 +441,8 @@
             }
         },
         "model.layers.5": {
-            "accuracy": 0.8608803749084473,
-            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -457,10 +457,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -529,8 +529,8 @@
             }
         },
         "model.layers.6": {
-            "accuracy": 0.8563823699951172,
-            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -605,10 +605,10 @@
             },
             "down_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -617,8 +617,8 @@
             }
         },
         "model.layers.7": {
-            "accuracy": 0.8504223823547363,
-            "total_bits": 483482688,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -693,10 +693,10 @@
             },
             "down_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -705,8 +705,8 @@
             }
         },
         "model.layers.8": {
-            "accuracy": 0.9129691123962402,
-            "total_bits": 539218464,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -793,8 +793,8 @@
             }
         },
         "model.layers.9": {
-            "accuracy": 0.8902812004089355,
-            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -809,10 +809,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -881,8 +881,8 @@
             }
         },
         "model.layers.10": {
-            "accuracy": 0.8880372047424316,
-            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -969,8 +969,8 @@
             }
         },
         "model.layers.11": {
-            "accuracy": 0.8887085914611816,
-            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -1057,8 +1057,8 @@
             }
         },
         "model.layers.12": {
-            "accuracy": 0.8856921195983887,
-            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -1073,10 +1073,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -1145,8 +1145,8 @@
             }
         },
         "model.layers.13": {
-            "accuracy": 0.8820700645446777,
-            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -1161,10 +1161,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -1233,8 +1233,8 @@
             }
         },
         "model.layers.14": {
-            "accuracy": 0.9193291664123535,
-            "total_bits": 572247072,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -1249,10 +1249,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -1273,10 +1273,10 @@
             },
             "o_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -1309,10 +1309,10 @@
             },
             "down_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -1321,8 +1321,8 @@
             }
         },
         "model.layers.15": {
-            "accuracy": 0.9107174873352051,
-            "total_bits": 572247072,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -1337,10 +1337,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -1361,10 +1361,10 @@
             },
             "o_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -1397,10 +1397,10 @@
             },
             "down_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -1409,8 +1409,8 @@
             }
         },
         "model.layers.16": {
-            "accuracy": 0.9100451469421387,
-            "total_bits": 572247072,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -1449,10 +1449,10 @@
             },
             "o_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -1497,8 +1497,8 @@
             }
         },
         "model.layers.17": {
-            "accuracy": 0.908327579498291,
-            "total_bits": 572247072,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -1513,10 +1513,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -1585,14 +1585,14 @@
             }
         },
         "model.layers.18": {
-            "accuracy": 0.9288191795349121,
-            "total_bits": 661014048,
             "q_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -1601,10 +1601,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -1637,10 +1637,10 @@
             },
             "up_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -1673,8 +1673,8 @@
             }
         },
         "model.layers.19": {
-            "accuracy": 0.9297795295715332,
-            "total_bits": 661014048,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -1761,14 +1761,14 @@
             }
         },
         "model.layers.20": {
-            "accuracy": 0.9339859485626221,
-            "total_bits": 661014048,
             "q_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -1849,14 +1849,14 @@
             }
         },
         "model.layers.21": {
-            "accuracy": 0.9743473529815674,
-            "total_bits": 815838240,
             "q_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -1937,14 +1937,14 @@
             }
         },
         "model.layers.22": {
-            "accuracy": 0.9624457359313965,
-            "total_bits": 749781024,
             "q_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -1953,10 +1953,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -2025,8 +2025,8 @@
             }
         },
         "model.layers.23": {
-            "accuracy": 0.9775146245956421,
-            "total_bits": 815838240,
             "q_proj": {
                 "group_size": {
                     "4": 128
@@ -2113,14 +2113,14 @@
             }
         },
         "model.layers.24": {
-            "accuracy": 0.9725011587142944,
-            "total_bits": 782809632,
             "q_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -2201,14 +2201,14 @@
             }
         },
         "model.layers.25": {
-            "accuracy": 0.9676313400268555,
-            "total_bits": 749781024,
             "q_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -2217,10 +2217,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -2289,14 +2289,14 @@
             }
         },
         "model.layers.26": {
-            "accuracy": 0.9747145175933838,
-            "total_bits": 782809632,
             "q_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -2377,8 +2377,8 @@
             }
         },
         "model.layers.27": {
-            "accuracy": 0.9794363975524902,
-            "total_bits": 815838240,
             "q_proj": {
                 "group_size": {
                     "4": 128
@@ -2465,8 +2465,8 @@
             }
         },
         "model.layers.28": {
-            "accuracy": 0.9793131351470947,
-            "total_bits": 815838240,
             "q_proj": {
                 "group_size": {
                     "4": 128
@@ -2553,8 +2553,8 @@
             }
         },
         "model.layers.29": {
-            "accuracy": 0.9778343439102173,
-            "total_bits": 815838240,
             "q_proj": {
                 "group_size": {
                     "4": 128
@@ -2641,8 +2641,8 @@
             }
         },
         "model.layers.30": {
-            "accuracy": 0.9739029407501221,
-            "total_bits": 815838240,
             "q_proj": {
                 "group_size": {
                     "4": 128
@@ -2729,8 +2729,8 @@
             }
         },
         "model.layers.31": {
-            "accuracy": 0.9666062593460083,
-            "total_bits": 749781024,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -2745,10 +2745,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1

 {
     "measurement": {
         "model.layers.0": {
+            "accuracy": 0.9242749214172363,
+            "total_bits": 360997440,
             "q_proj": {
                 "group_size": {
                     "2": 64
             }
         },
         "model.layers.1": {
+            "accuracy": 0.9216856956481934,
+            "total_bits": 486917664,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "k_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             },
             "o_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             },
             "down_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.2": {
+            "accuracy": 0.8546795845031738,
+            "total_bits": 360997440,
             "q_proj": {
                 "group_size": {
                     "2": 64
             }
         },
         "model.layers.3": {
+            "accuracy": 0.9084997177124023,
+            "total_bits": 449761824,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "down_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.4": {
+            "accuracy": 0.8643641471862793,
+            "total_bits": 394026048,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "o_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.5": {
+            "accuracy": 0.8657441139221191,
+            "total_bits": 365124672,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "k_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.6": {
+            "accuracy": 0.877474308013916,
+            "total_bits": 449761824,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "down_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.7": {
+            "accuracy": 0.8887453079223633,
+            "total_bits": 482790432,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "down_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.8": {
+            "accuracy": 0.9228010177612305,
+            "total_bits": 449761824,
             "q_proj": {
                 "group_size": {
                     "2": 64
             }
         },
         "model.layers.9": {
+            "accuracy": 0.9577234387397766,
+            "total_bits": 365124672,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "k_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.10": {
+            "accuracy": 0.9458887577056885,
+            "total_bits": 360997440,
             "q_proj": {
                 "group_size": {
                     "2": 64
             }
         },
         "model.layers.11": {
+            "accuracy": 0.9322950839996338,
+            "total_bits": 360997440,
             "q_proj": {
                 "group_size": {
                     "2": 64
             }
         },
         "model.layers.12": {
+            "accuracy": 0.9404451847076416,
+            "total_bits": 365124672,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "k_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.13": {
+            "accuracy": 0.9363645315170288,
+            "total_bits": 365124672,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "k_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.14": {
+            "accuracy": 0.9359749555587769,
+            "total_bits": 365124672,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "k_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             },
             "o_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             },
             "down_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.15": {
+            "accuracy": 0.9322938919067383,
+            "total_bits": 365124672,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "k_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             },
             "o_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             },
             "down_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.16": {
+            "accuracy": 0.939303994178772,
+            "total_bits": 449761824,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "o_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.17": {
+            "accuracy": 0.9451323747634888,
+            "total_bits": 486917664,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "k_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.18": {
+            "accuracy": 0.9493275880813599,
+            "total_bits": 519946272,
             "q_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             },
             "k_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             },
             "up_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.19": {
+            "accuracy": 0.9514966011047363,
+            "total_bits": 571557408,
             "q_proj": {
                 "group_size": {
                     "2": 64
             }
         },
         "model.layers.20": {
+            "accuracy": 0.955375075340271,
+            "total_bits": 604586016,
             "q_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.21": {
+            "accuracy": 0.9731628894805908,
+            "total_bits": 664451616,
             "q_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.22": {
+            "accuracy": 0.9785275459289551,
+            "total_bits": 697480224,
             "q_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             },
             "k_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.23": {
+            "accuracy": 0.9788622856140137,
+            "total_bits": 697480224,
             "q_proj": {
                 "group_size": {
                     "4": 128
             }
         },
         "model.layers.24": {
+            "accuracy": 0.9794007539749146,
+            "total_bits": 697480224,
             "q_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.25": {
+            "accuracy": 0.9806145429611206,
+            "total_bits": 697480224,
             "q_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             },
             "k_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.26": {
+            "accuracy": 0.9806764125823975,
+            "total_bits": 697480224,
             "q_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.27": {
+            "accuracy": 0.9815640449523926,
+            "total_bits": 697480224,
             "q_proj": {
                 "group_size": {
                     "4": 128
             }
         },
         "model.layers.28": {
+            "accuracy": 0.9820178747177124,
+            "total_bits": 697480224,
             "q_proj": {
                 "group_size": {
                     "4": 128
             }
         },
         "model.layers.29": {
+            "accuracy": 0.9836413264274597,
+            "total_bits": 697480224,
             "q_proj": {
                 "group_size": {
                     "4": 128
             }
         },
         "model.layers.30": {
+            "accuracy": 0.9838729500770569,
+            "total_bits": 697480224,
             "q_proj": {
                 "group_size": {
                     "4": 128
             }
         },
         "model.layers.31": {
+            "accuracy": 0.9427725076675415,
+            "total_bits": 664451616,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "k_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1

special_tokens_map.json CHANGED Viewed

@@ -1,19 +1,34 @@
 {
   "additional_special_tokens": [
     "<|im_start|>",
-    "<|im_end|>"
   ],
   "eos_token": {
-    "content": "<|im_end|>",
     "lstrip": false,
-    "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
-    "content": "<|endoftext|>",
     "lstrip": false,
-    "normalized": false,
     "rstrip": false,
     "single_word": false
   }

 {
   "additional_special_tokens": [
     "<|im_start|>",
+    "<|im_end|>",
+    "<|im_sep|>"
   ],
+  "bos_token": {
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
   "eos_token": {
+    "content": "<|endoftext|>",
     "lstrip": false,
+    "normalized": true,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
     "lstrip": false,
+    "normalized": true,
     "rstrip": false,
     "single_word": false
   }

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:386c49cf943d71aa110361135338c50e38beeff0a66593480421f37b319e1a39
+size 1033105

tokenizer_config.json CHANGED Viewed

@@ -1,15 +1,33 @@
 {
-  "add_prefix_space": false,
   "added_tokens_decoder": {
-    "151643": {
       "content": "<|endoftext|>",
       "lstrip": false,
-      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "151644": {
       "content": "<|im_start|>",
       "lstrip": false,
       "normalized": false,
@@ -17,27 +35,39 @@
       "single_word": false,
       "special": true
     },
-    "151645": {
       "content": "<|im_end|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "additional_special_tokens": [
     "<|im_start|>",
-    "<|im_end|>"
   ],
-  "bos_token": null,
-  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,
-  "eos_token": "<|im_end|>",
-  "errors": "replace",
-  "model_max_length": 32768,
-  "pad_token": "<|endoftext|>",
-  "split_special_tokens": false,
-  "tokenizer_class": "Qwen2Tokenizer",
-  "unk_token": null
 }

 {
+  "add_bos_token": false,
+  "add_eos_token": false,
+  "add_prefix_space": true,
   "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<|startoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
       "content": "<|endoftext|>",
       "lstrip": false,
+      "normalized": true,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "6": {
       "content": "<|im_start|>",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "7": {
       "content": "<|im_end|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
+    },
+    "8": {
+      "content": "<|im_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
     }
   },
   "additional_special_tokens": [
     "<|im_start|>",
+    "<|im_end|>",
+    "<|im_sep|>"
   ],
+  "bos_token": "<|startoftext|>",
+  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "legacy": true,
+  "model_max_length": 4096,
+  "pad_token": "<unk>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": true
 }