Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

README.md +2 -2
config.json +12 -12
model.safetensors +2 -2
model.safetensors.index.json +97 -1
quant_strategy.json +126 -126
special_tokens_map.json +5 -20
tokenizer.json +0 -0
tokenizer_config.json +15 -45

README.md CHANGED Viewed

@@ -5,8 +5,8 @@ tags:
 ---
 # GreenBitAI/Qwen-1.5-7B-Chat-layer-mix-bpw-3.0-mlx
-This quantized low-bit model was converted to MLX format from [`GreenBitAI/01-Yi-6B-Chat-layer-mix-bpw-3.0`]().
-Refer to the [original model card](https://huggingface.co/GreenBitAI/01-Yi-6B-Chat-layer-mix-bpw-3.0) for more details on the model.
 ## Use with mlx
 ```bash

 ---
 # GreenBitAI/Qwen-1.5-7B-Chat-layer-mix-bpw-3.0-mlx
+This quantized low-bit model was converted to MLX format from [`GreenBitAI/Qwen-1.5-7B-Chat-layer-mix-bpw-3.0`]().
+Refer to the [original model card](https://huggingface.co/GreenBitAI/Qwen-1.5-7B-Chat-layer-mix-bpw-3.0) for more details on the model.
 ## Use with mlx
 ```bash

config.json CHANGED Viewed

@@ -1,13 +1,12 @@
 {
     "add_cross_attention": false,
     "architectures": [
-        "LlamaForCausalLM"
     ],
-    "attention_bias": false,
     "attention_dropout": 0.0,
     "bad_words_ids": null,
     "begin_suppress_tokens": null,
-    "bos_token_id": 1,
     "chunk_size_feed_forward": 0,
     "cross_attention_hidden_size": null,
     "decoder_start_token_id": null,
@@ -15,7 +14,7 @@
     "do_sample": false,
     "early_stopping": false,
     "encoder_no_repeat_ngram_size": 0,
-    "eos_token_id": 2,
     "exponential_decay_length_penalty": null,
     "finetuning_task": null,
     "forced_bos_token_id": null,
@@ -36,22 +35,22 @@
     },
     "length_penalty": 1.0,
     "max_length": 20,
-    "max_position_embeddings": 4096,
     "min_length": 0,
-    "model_type": "llama",
     "no_repeat_ngram_size": 0,
     "num_attention_heads": 32,
     "num_beam_groups": 1,
     "num_beams": 1,
     "num_hidden_layers": 32,
-    "num_key_value_heads": 4,
     "num_return_sequences": 1,
     "output_attentions": false,
     "output_hidden_states": false,
     "output_scores": false,
     "pad_token_id": null,
     "prefix": null,
-    "pretraining_tp": 1,
     "problem_type": null,
     "pruned_heads": {},
     "quantization": {
@@ -62,10 +61,10 @@
     "repetition_penalty": 1.0,
     "return_dict": true,
     "return_dict_in_generate": false,
-    "rms_norm_eps": 1e-05,
-    "rope_scaling": null,
-    "rope_theta": 5000000.0,
     "sep_token_id": null,
     "suppress_tokens": null,
     "task_specific_params": null,
     "temperature": 1.0,
@@ -81,5 +80,6 @@
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
-    "vocab_size": 64000
 }

 {
     "add_cross_attention": false,
     "architectures": [
+        "Qwen2ForCausalLM"
     ],
     "attention_dropout": 0.0,
     "bad_words_ids": null,
     "begin_suppress_tokens": null,
+    "bos_token_id": 151643,
     "chunk_size_feed_forward": 0,
     "cross_attention_hidden_size": null,
     "decoder_start_token_id": null,
     "do_sample": false,
     "early_stopping": false,
     "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 151645,
     "exponential_decay_length_penalty": null,
     "finetuning_task": null,
     "forced_bos_token_id": null,
     },
     "length_penalty": 1.0,
     "max_length": 20,
+    "max_position_embeddings": 32768,
+    "max_window_layers": 28,
     "min_length": 0,
+    "model_type": "qwen2",
     "no_repeat_ngram_size": 0,
     "num_attention_heads": 32,
     "num_beam_groups": 1,
     "num_beams": 1,
     "num_hidden_layers": 32,
+    "num_key_value_heads": 32,
     "num_return_sequences": 1,
     "output_attentions": false,
     "output_hidden_states": false,
     "output_scores": false,
     "pad_token_id": null,
     "prefix": null,
     "problem_type": null,
     "pruned_heads": {},
     "quantization": {
     "repetition_penalty": 1.0,
     "return_dict": true,
     "return_dict_in_generate": false,
+    "rms_norm_eps": 1e-06,
+    "rope_theta": 1000000.0,
     "sep_token_id": null,
+    "sliding_window": 32768,
     "suppress_tokens": null,
     "task_specific_params": null,
     "temperature": 1.0,
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
+    "use_sliding_window": false,
+    "vocab_size": 151936
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:21a8d50bdf8fbd1540698c8f71db74b9de56d6a375c5d676122c490a05b15432
-size 3358077486

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe3b05b579c86f2ae5522c22941fffdab484da14633a47a6d40624334c1892dd
+size 5191621850

model.safetensors.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "metadata": {
-        "total_size": 3357941760
     },
     "weight_map": {
         "lm_head.weight": "model.safetensors",
@@ -22,6 +22,7 @@
         "model.layers.0.mlp.up_proj.scales": "model.safetensors",
         "model.layers.0.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.0.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.0.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.0.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.0.self_attn.k_proj.qweight": "model.safetensors",
@@ -32,11 +33,13 @@
         "model.layers.0.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.0.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.0.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.0.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.0.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.0.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.0.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.0.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.0.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.0.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.0.self_attn.v_proj.qweight": "model.safetensors",
@@ -59,6 +62,7 @@
         "model.layers.1.mlp.up_proj.scales": "model.safetensors",
         "model.layers.1.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.1.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.1.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.1.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.1.self_attn.k_proj.qweight": "model.safetensors",
@@ -69,11 +73,13 @@
         "model.layers.1.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.1.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.1.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.1.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.1.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.1.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.1.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.1.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.1.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.1.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.1.self_attn.v_proj.qweight": "model.safetensors",
@@ -96,6 +102,7 @@
         "model.layers.10.mlp.up_proj.scales": "model.safetensors",
         "model.layers.10.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.10.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.10.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.10.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.10.self_attn.k_proj.qweight": "model.safetensors",
@@ -106,11 +113,13 @@
         "model.layers.10.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.10.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.10.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.10.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.10.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.10.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.10.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.10.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.10.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.10.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.10.self_attn.v_proj.qweight": "model.safetensors",
@@ -133,6 +142,7 @@
         "model.layers.11.mlp.up_proj.scales": "model.safetensors",
         "model.layers.11.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.11.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.11.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.11.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.11.self_attn.k_proj.qweight": "model.safetensors",
@@ -143,11 +153,13 @@
         "model.layers.11.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.11.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.11.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.11.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.11.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.11.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.11.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.11.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.11.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.11.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.11.self_attn.v_proj.qweight": "model.safetensors",
@@ -170,6 +182,7 @@
         "model.layers.12.mlp.up_proj.scales": "model.safetensors",
         "model.layers.12.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.12.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.12.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.12.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.12.self_attn.k_proj.qweight": "model.safetensors",
@@ -180,11 +193,13 @@
         "model.layers.12.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.12.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.12.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.12.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.12.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.12.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.12.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.12.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.12.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.12.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.12.self_attn.v_proj.qweight": "model.safetensors",
@@ -207,6 +222,7 @@
         "model.layers.13.mlp.up_proj.scales": "model.safetensors",
         "model.layers.13.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.13.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.13.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.13.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.13.self_attn.k_proj.qweight": "model.safetensors",
@@ -217,11 +233,13 @@
         "model.layers.13.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.13.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.13.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.13.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.13.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.13.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.13.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.13.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.13.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.13.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.13.self_attn.v_proj.qweight": "model.safetensors",
@@ -244,6 +262,7 @@
         "model.layers.14.mlp.up_proj.scales": "model.safetensors",
         "model.layers.14.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.14.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.14.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.14.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.14.self_attn.k_proj.qweight": "model.safetensors",
@@ -254,11 +273,13 @@
         "model.layers.14.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.14.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.14.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.14.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.14.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.14.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.14.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.14.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.14.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.14.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.14.self_attn.v_proj.qweight": "model.safetensors",
@@ -281,6 +302,7 @@
         "model.layers.15.mlp.up_proj.scales": "model.safetensors",
         "model.layers.15.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.15.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.15.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.15.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.15.self_attn.k_proj.qweight": "model.safetensors",
@@ -291,11 +313,13 @@
         "model.layers.15.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.15.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.15.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.15.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.15.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.15.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.15.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.15.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.15.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.15.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.15.self_attn.v_proj.qweight": "model.safetensors",
@@ -318,6 +342,7 @@
         "model.layers.16.mlp.up_proj.scales": "model.safetensors",
         "model.layers.16.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.16.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.16.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.16.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.16.self_attn.k_proj.qweight": "model.safetensors",
@@ -328,11 +353,13 @@
         "model.layers.16.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.16.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.16.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.16.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.16.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.16.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.16.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.16.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.16.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.16.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.16.self_attn.v_proj.qweight": "model.safetensors",
@@ -355,6 +382,7 @@
         "model.layers.17.mlp.up_proj.scales": "model.safetensors",
         "model.layers.17.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.17.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.17.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.17.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.17.self_attn.k_proj.qweight": "model.safetensors",
@@ -365,11 +393,13 @@
         "model.layers.17.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.17.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.17.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.17.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.17.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.17.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.17.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.17.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.17.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.17.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.17.self_attn.v_proj.qweight": "model.safetensors",
@@ -392,6 +422,7 @@
         "model.layers.18.mlp.up_proj.scales": "model.safetensors",
         "model.layers.18.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.18.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.18.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.18.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.18.self_attn.k_proj.qweight": "model.safetensors",
@@ -402,11 +433,13 @@
         "model.layers.18.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.18.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.18.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.18.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.18.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.18.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.18.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.18.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.18.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.18.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.18.self_attn.v_proj.qweight": "model.safetensors",
@@ -429,6 +462,7 @@
         "model.layers.19.mlp.up_proj.scales": "model.safetensors",
         "model.layers.19.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.19.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.19.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.19.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.19.self_attn.k_proj.qweight": "model.safetensors",
@@ -439,11 +473,13 @@
         "model.layers.19.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.19.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.19.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.19.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.19.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.19.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.19.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.19.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.19.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.19.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.19.self_attn.v_proj.qweight": "model.safetensors",
@@ -466,6 +502,7 @@
         "model.layers.2.mlp.up_proj.scales": "model.safetensors",
         "model.layers.2.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.2.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.2.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.2.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.2.self_attn.k_proj.qweight": "model.safetensors",
@@ -476,11 +513,13 @@
         "model.layers.2.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.2.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.2.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.2.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.2.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.2.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.2.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.2.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.2.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.2.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.2.self_attn.v_proj.qweight": "model.safetensors",
@@ -503,6 +542,7 @@
         "model.layers.20.mlp.up_proj.scales": "model.safetensors",
         "model.layers.20.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.20.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.20.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.20.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.20.self_attn.k_proj.qweight": "model.safetensors",
@@ -513,11 +553,13 @@
         "model.layers.20.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.20.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.20.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.20.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.20.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.20.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.20.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.20.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.20.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.20.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.20.self_attn.v_proj.qweight": "model.safetensors",
@@ -540,6 +582,7 @@
         "model.layers.21.mlp.up_proj.scales": "model.safetensors",
         "model.layers.21.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.21.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.21.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.21.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.21.self_attn.k_proj.qweight": "model.safetensors",
@@ -550,11 +593,13 @@
         "model.layers.21.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.21.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.21.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.21.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.21.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.21.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.21.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.21.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.21.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.21.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.21.self_attn.v_proj.qweight": "model.safetensors",
@@ -577,6 +622,7 @@
         "model.layers.22.mlp.up_proj.scales": "model.safetensors",
         "model.layers.22.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.22.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.22.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.22.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.22.self_attn.k_proj.qweight": "model.safetensors",
@@ -587,11 +633,13 @@
         "model.layers.22.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.22.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.22.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.22.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.22.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.22.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.22.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.22.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.22.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.22.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.22.self_attn.v_proj.qweight": "model.safetensors",
@@ -614,6 +662,7 @@
         "model.layers.23.mlp.up_proj.scales": "model.safetensors",
         "model.layers.23.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.23.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.23.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.23.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.23.self_attn.k_proj.qweight": "model.safetensors",
@@ -624,11 +673,13 @@
         "model.layers.23.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.23.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.23.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.23.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.23.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.23.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.23.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.23.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.23.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.23.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.23.self_attn.v_proj.qweight": "model.safetensors",
@@ -651,6 +702,7 @@
         "model.layers.24.mlp.up_proj.scales": "model.safetensors",
         "model.layers.24.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.24.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.24.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.24.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.24.self_attn.k_proj.qweight": "model.safetensors",
@@ -661,11 +713,13 @@
         "model.layers.24.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.24.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.24.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.24.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.24.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.24.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.24.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.24.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.24.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.24.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.24.self_attn.v_proj.qweight": "model.safetensors",
@@ -688,6 +742,7 @@
         "model.layers.25.mlp.up_proj.scales": "model.safetensors",
         "model.layers.25.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.25.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.25.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.25.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.25.self_attn.k_proj.qweight": "model.safetensors",
@@ -698,11 +753,13 @@
         "model.layers.25.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.25.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.25.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.25.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.25.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.25.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.25.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.25.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.25.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.25.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.25.self_attn.v_proj.qweight": "model.safetensors",
@@ -725,6 +782,7 @@
         "model.layers.26.mlp.up_proj.scales": "model.safetensors",
         "model.layers.26.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.26.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.26.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.26.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.26.self_attn.k_proj.qweight": "model.safetensors",
@@ -735,11 +793,13 @@
         "model.layers.26.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.26.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.26.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.26.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.26.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.26.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.26.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.26.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.26.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.26.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.26.self_attn.v_proj.qweight": "model.safetensors",
@@ -762,6 +822,7 @@
         "model.layers.27.mlp.up_proj.scales": "model.safetensors",
         "model.layers.27.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.27.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.27.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.27.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.27.self_attn.k_proj.qweight": "model.safetensors",
@@ -772,11 +833,13 @@
         "model.layers.27.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.27.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.27.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.27.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.27.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.27.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.27.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.27.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.27.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.27.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.27.self_attn.v_proj.qweight": "model.safetensors",
@@ -799,6 +862,7 @@
         "model.layers.28.mlp.up_proj.scales": "model.safetensors",
         "model.layers.28.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.28.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.28.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.28.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.28.self_attn.k_proj.qweight": "model.safetensors",
@@ -809,11 +873,13 @@
         "model.layers.28.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.28.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.28.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.28.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.28.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.28.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.28.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.28.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.28.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.28.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.28.self_attn.v_proj.qweight": "model.safetensors",
@@ -836,6 +902,7 @@
         "model.layers.29.mlp.up_proj.scales": "model.safetensors",
         "model.layers.29.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.29.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.29.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.29.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.29.self_attn.k_proj.qweight": "model.safetensors",
@@ -846,11 +913,13 @@
         "model.layers.29.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.29.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.29.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.29.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.29.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.29.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.29.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.29.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.29.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.29.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.29.self_attn.v_proj.qweight": "model.safetensors",
@@ -873,6 +942,7 @@
         "model.layers.3.mlp.up_proj.scales": "model.safetensors",
         "model.layers.3.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.3.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.3.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.3.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.3.self_attn.k_proj.qweight": "model.safetensors",
@@ -883,11 +953,13 @@
         "model.layers.3.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.3.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.3.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.3.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.3.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.3.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.3.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.3.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.3.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.3.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.3.self_attn.v_proj.qweight": "model.safetensors",
@@ -910,6 +982,7 @@
         "model.layers.30.mlp.up_proj.scales": "model.safetensors",
         "model.layers.30.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.30.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.30.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.30.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.30.self_attn.k_proj.qweight": "model.safetensors",
@@ -920,11 +993,13 @@
         "model.layers.30.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.30.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.30.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.30.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.30.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.30.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.30.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.30.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.30.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.30.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.30.self_attn.v_proj.qweight": "model.safetensors",
@@ -947,6 +1022,7 @@
         "model.layers.31.mlp.up_proj.scales": "model.safetensors",
         "model.layers.31.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.31.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.31.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.31.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.31.self_attn.k_proj.qweight": "model.safetensors",
@@ -957,11 +1033,13 @@
         "model.layers.31.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.31.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.31.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.31.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.31.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.31.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.31.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.31.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.31.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.31.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.31.self_attn.v_proj.qweight": "model.safetensors",
@@ -984,6 +1062,7 @@
         "model.layers.4.mlp.up_proj.scales": "model.safetensors",
         "model.layers.4.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.4.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.4.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.4.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.4.self_attn.k_proj.qweight": "model.safetensors",
@@ -994,11 +1073,13 @@
         "model.layers.4.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.4.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.4.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.4.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.4.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.4.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.4.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.4.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.4.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.4.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.4.self_attn.v_proj.qweight": "model.safetensors",
@@ -1021,6 +1102,7 @@
         "model.layers.5.mlp.up_proj.scales": "model.safetensors",
         "model.layers.5.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.5.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.5.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.5.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.5.self_attn.k_proj.qweight": "model.safetensors",
@@ -1031,11 +1113,13 @@
         "model.layers.5.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.5.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.5.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.5.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.5.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.5.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.5.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.5.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.5.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.5.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.5.self_attn.v_proj.qweight": "model.safetensors",
@@ -1058,6 +1142,7 @@
         "model.layers.6.mlp.up_proj.scales": "model.safetensors",
         "model.layers.6.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.6.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.6.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.6.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.6.self_attn.k_proj.qweight": "model.safetensors",
@@ -1068,11 +1153,13 @@
         "model.layers.6.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.6.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.6.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.6.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.6.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.6.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.6.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.6.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.6.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.6.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.6.self_attn.v_proj.qweight": "model.safetensors",
@@ -1095,6 +1182,7 @@
         "model.layers.7.mlp.up_proj.scales": "model.safetensors",
         "model.layers.7.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.7.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.7.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.7.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.7.self_attn.k_proj.qweight": "model.safetensors",
@@ -1105,11 +1193,13 @@
         "model.layers.7.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.7.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.7.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.7.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.7.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.7.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.7.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.7.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.7.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.7.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.7.self_attn.v_proj.qweight": "model.safetensors",
@@ -1132,6 +1222,7 @@
         "model.layers.8.mlp.up_proj.scales": "model.safetensors",
         "model.layers.8.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.8.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.8.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.8.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.8.self_attn.k_proj.qweight": "model.safetensors",
@@ -1142,11 +1233,13 @@
         "model.layers.8.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.8.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.8.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.8.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.8.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.8.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.8.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.8.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.8.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.8.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.8.self_attn.v_proj.qweight": "model.safetensors",
@@ -1169,6 +1262,7 @@
         "model.layers.9.mlp.up_proj.scales": "model.safetensors",
         "model.layers.9.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.9.post_attention_layernorm.weight": "model.safetensors",
         "model.layers.9.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.9.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.9.self_attn.k_proj.qweight": "model.safetensors",
@@ -1179,11 +1273,13 @@
         "model.layers.9.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.9.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.9.self_attn.o_proj.zeros": "model.safetensors",
         "model.layers.9.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.9.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.9.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.9.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.9.self_attn.q_proj.zeros": "model.safetensors",
         "model.layers.9.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.9.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.9.self_attn.v_proj.qweight": "model.safetensors",

 {
     "metadata": {
+        "total_size": 5191475200
     },
     "weight_map": {
         "lm_head.weight": "model.safetensors",
         "model.layers.0.mlp.up_proj.scales": "model.safetensors",
         "model.layers.0.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.0.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.0.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.0.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.0.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.0.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.0.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.0.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.0.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.0.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.0.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.0.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.0.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.0.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.0.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.0.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.0.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.0.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.0.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.1.mlp.up_proj.scales": "model.safetensors",
         "model.layers.1.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.1.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.1.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.1.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.1.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.1.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.1.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.1.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.1.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.1.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.1.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.1.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.1.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.1.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.1.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.1.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.1.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.1.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.1.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.10.mlp.up_proj.scales": "model.safetensors",
         "model.layers.10.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.10.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.10.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.10.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.10.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.10.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.10.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.10.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.10.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.10.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.10.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.10.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.10.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.10.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.10.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.10.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.10.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.10.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.10.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.11.mlp.up_proj.scales": "model.safetensors",
         "model.layers.11.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.11.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.11.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.11.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.11.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.11.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.11.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.11.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.11.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.11.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.11.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.11.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.11.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.11.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.11.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.11.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.11.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.11.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.11.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.12.mlp.up_proj.scales": "model.safetensors",
         "model.layers.12.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.12.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.12.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.12.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.12.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.12.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.12.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.12.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.12.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.12.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.12.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.12.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.12.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.12.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.12.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.12.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.12.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.12.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.12.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.13.mlp.up_proj.scales": "model.safetensors",
         "model.layers.13.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.13.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.13.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.13.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.13.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.13.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.13.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.13.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.13.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.13.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.13.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.13.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.13.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.13.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.13.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.13.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.13.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.13.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.13.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.14.mlp.up_proj.scales": "model.safetensors",
         "model.layers.14.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.14.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.14.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.14.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.14.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.14.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.14.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.14.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.14.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.14.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.14.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.14.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.14.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.14.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.14.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.14.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.14.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.14.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.14.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.15.mlp.up_proj.scales": "model.safetensors",
         "model.layers.15.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.15.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.15.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.15.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.15.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.15.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.15.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.15.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.15.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.15.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.15.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.15.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.15.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.15.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.15.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.15.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.15.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.15.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.15.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.16.mlp.up_proj.scales": "model.safetensors",
         "model.layers.16.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.16.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.16.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.16.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.16.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.16.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.16.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.16.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.16.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.16.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.16.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.16.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.16.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.16.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.16.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.16.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.16.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.16.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.16.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.17.mlp.up_proj.scales": "model.safetensors",
         "model.layers.17.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.17.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.17.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.17.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.17.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.17.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.17.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.17.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.17.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.17.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.17.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.17.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.17.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.17.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.17.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.17.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.17.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.17.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.17.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.18.mlp.up_proj.scales": "model.safetensors",
         "model.layers.18.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.18.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.18.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.18.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.18.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.18.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.18.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.18.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.18.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.18.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.18.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.18.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.18.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.18.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.18.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.18.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.18.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.18.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.18.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.19.mlp.up_proj.scales": "model.safetensors",
         "model.layers.19.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.19.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.19.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.19.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.19.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.19.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.19.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.19.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.19.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.19.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.19.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.19.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.19.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.19.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.19.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.19.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.19.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.19.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.19.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.2.mlp.up_proj.scales": "model.safetensors",
         "model.layers.2.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.2.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.2.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.2.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.2.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.2.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.2.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.2.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.2.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.2.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.2.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.2.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.2.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.2.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.2.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.2.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.2.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.2.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.2.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.20.mlp.up_proj.scales": "model.safetensors",
         "model.layers.20.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.20.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.20.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.20.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.20.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.20.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.20.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.20.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.20.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.20.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.20.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.20.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.20.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.20.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.20.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.20.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.20.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.20.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.20.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.21.mlp.up_proj.scales": "model.safetensors",
         "model.layers.21.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.21.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.21.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.21.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.21.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.21.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.21.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.21.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.21.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.21.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.21.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.21.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.21.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.21.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.21.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.21.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.21.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.21.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.21.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.22.mlp.up_proj.scales": "model.safetensors",
         "model.layers.22.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.22.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.22.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.22.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.22.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.22.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.22.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.22.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.22.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.22.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.22.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.22.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.22.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.22.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.22.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.22.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.22.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.22.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.22.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.23.mlp.up_proj.scales": "model.safetensors",
         "model.layers.23.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.23.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.23.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.23.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.23.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.23.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.23.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.23.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.23.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.23.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.23.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.23.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.23.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.23.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.23.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.23.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.23.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.23.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.23.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.24.mlp.up_proj.scales": "model.safetensors",
         "model.layers.24.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.24.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.24.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.24.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.24.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.24.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.24.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.24.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.24.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.24.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.24.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.24.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.24.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.24.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.24.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.24.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.24.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.24.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.24.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.25.mlp.up_proj.scales": "model.safetensors",
         "model.layers.25.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.25.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.25.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.25.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.25.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.25.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.25.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.25.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.25.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.25.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.25.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.25.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.25.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.25.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.25.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.25.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.25.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.25.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.25.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.26.mlp.up_proj.scales": "model.safetensors",
         "model.layers.26.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.26.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.26.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.26.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.26.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.26.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.26.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.26.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.26.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.26.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.26.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.26.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.26.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.26.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.26.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.26.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.26.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.26.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.26.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.27.mlp.up_proj.scales": "model.safetensors",
         "model.layers.27.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.27.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.27.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.27.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.27.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.27.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.27.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.27.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.27.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.27.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.27.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.27.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.27.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.27.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.27.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.27.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.27.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.27.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.27.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.28.mlp.up_proj.scales": "model.safetensors",
         "model.layers.28.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.28.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.28.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.28.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.28.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.28.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.28.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.28.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.28.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.28.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.28.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.28.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.28.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.28.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.28.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.28.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.28.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.28.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.28.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.29.mlp.up_proj.scales": "model.safetensors",
         "model.layers.29.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.29.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.29.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.29.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.29.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.29.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.29.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.29.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.29.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.29.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.29.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.29.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.29.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.29.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.29.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.29.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.29.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.29.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.29.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.3.mlp.up_proj.scales": "model.safetensors",
         "model.layers.3.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.3.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.3.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.3.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.3.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.3.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.3.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.3.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.3.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.3.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.3.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.3.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.3.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.3.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.3.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.3.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.3.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.3.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.3.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.30.mlp.up_proj.scales": "model.safetensors",
         "model.layers.30.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.30.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.30.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.30.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.30.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.30.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.30.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.30.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.30.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.30.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.30.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.30.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.30.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.30.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.30.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.30.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.30.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.30.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.30.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.31.mlp.up_proj.scales": "model.safetensors",
         "model.layers.31.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.31.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.31.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.31.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.31.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.31.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.31.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.31.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.31.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.31.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.31.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.31.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.31.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.31.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.31.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.31.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.31.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.31.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.31.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.4.mlp.up_proj.scales": "model.safetensors",
         "model.layers.4.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.4.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.4.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.4.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.4.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.4.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.4.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.4.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.4.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.4.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.4.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.4.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.4.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.4.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.4.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.4.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.4.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.4.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.4.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.5.mlp.up_proj.scales": "model.safetensors",
         "model.layers.5.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.5.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.5.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.5.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.5.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.5.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.5.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.5.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.5.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.5.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.5.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.5.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.5.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.5.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.5.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.5.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.5.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.5.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.5.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.6.mlp.up_proj.scales": "model.safetensors",
         "model.layers.6.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.6.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.6.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.6.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.6.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.6.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.6.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.6.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.6.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.6.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.6.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.6.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.6.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.6.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.6.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.6.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.6.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.6.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.6.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.7.mlp.up_proj.scales": "model.safetensors",
         "model.layers.7.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.7.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.7.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.7.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.7.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.7.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.7.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.7.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.7.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.7.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.7.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.7.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.7.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.7.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.7.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.7.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.7.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.7.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.7.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.8.mlp.up_proj.scales": "model.safetensors",
         "model.layers.8.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.8.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.8.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.8.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.8.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.8.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.8.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.8.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.8.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.8.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.8.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.8.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.8.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.8.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.8.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.8.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.8.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.8.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.8.self_attn.v_proj.qweight": "model.safetensors",
         "model.layers.9.mlp.up_proj.scales": "model.safetensors",
         "model.layers.9.mlp.up_proj.zeros": "model.safetensors",
         "model.layers.9.post_attention_layernorm.weight": "model.safetensors",
+        "model.layers.9.self_attn.k_proj.bias": "model.safetensors",
         "model.layers.9.self_attn.k_proj.channel_scale": "model.safetensors",
         "model.layers.9.self_attn.k_proj.q_perm": "model.safetensors",
         "model.layers.9.self_attn.k_proj.qweight": "model.safetensors",
         "model.layers.9.self_attn.o_proj.qweight": "model.safetensors",
         "model.layers.9.self_attn.o_proj.scales": "model.safetensors",
         "model.layers.9.self_attn.o_proj.zeros": "model.safetensors",
+        "model.layers.9.self_attn.q_proj.bias": "model.safetensors",
         "model.layers.9.self_attn.q_proj.channel_scale": "model.safetensors",
         "model.layers.9.self_attn.q_proj.q_perm": "model.safetensors",
         "model.layers.9.self_attn.q_proj.qweight": "model.safetensors",
         "model.layers.9.self_attn.q_proj.scales": "model.safetensors",
         "model.layers.9.self_attn.q_proj.zeros": "model.safetensors",
+        "model.layers.9.self_attn.v_proj.bias": "model.safetensors",
         "model.layers.9.self_attn.v_proj.channel_scale": "model.safetensors",
         "model.layers.9.self_attn.v_proj.q_perm": "model.safetensors",
         "model.layers.9.self_attn.v_proj.qweight": "model.safetensors",

quant_strategy.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "measurement": {
         "model.layers.0": {
-            "accuracy": 0.9242749214172363,
-            "total_bits": 360997440,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -89,8 +89,8 @@
             }
         },
         "model.layers.1": {
-            "accuracy": 0.9216856956481934,
-            "total_bits": 486917664,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -105,10 +105,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -129,10 +129,10 @@
             },
             "o_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -165,10 +165,10 @@
             },
             "down_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -177,8 +177,8 @@
             }
         },
         "model.layers.2": {
-            "accuracy": 0.8546795845031738,
-            "total_bits": 360997440,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -265,8 +265,8 @@
             }
         },
         "model.layers.3": {
-            "accuracy": 0.9084997177124023,
-            "total_bits": 449761824,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -341,10 +341,10 @@
             },
             "down_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -353,8 +353,8 @@
             }
         },
         "model.layers.4": {
-            "accuracy": 0.8643641471862793,
-            "total_bits": 394026048,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -393,10 +393,10 @@
             },
             "o_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -441,8 +441,8 @@
             }
         },
         "model.layers.5": {
-            "accuracy": 0.8657441139221191,
-            "total_bits": 365124672,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -457,10 +457,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -529,8 +529,8 @@
             }
         },
         "model.layers.6": {
-            "accuracy": 0.877474308013916,
-            "total_bits": 449761824,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -605,10 +605,10 @@
             },
             "down_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -617,8 +617,8 @@
             }
         },
         "model.layers.7": {
-            "accuracy": 0.8887453079223633,
-            "total_bits": 482790432,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -693,10 +693,10 @@
             },
             "down_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -705,8 +705,8 @@
             }
         },
         "model.layers.8": {
-            "accuracy": 0.9228010177612305,
-            "total_bits": 449761824,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -793,8 +793,8 @@
             }
         },
         "model.layers.9": {
-            "accuracy": 0.9577234387397766,
-            "total_bits": 365124672,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -809,10 +809,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -881,8 +881,8 @@
             }
         },
         "model.layers.10": {
-            "accuracy": 0.9458887577056885,
-            "total_bits": 360997440,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -969,8 +969,8 @@
             }
         },
         "model.layers.11": {
-            "accuracy": 0.9322950839996338,
-            "total_bits": 360997440,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -1057,8 +1057,8 @@
             }
         },
         "model.layers.12": {
-            "accuracy": 0.9404451847076416,
-            "total_bits": 365124672,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -1073,10 +1073,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -1145,8 +1145,8 @@
             }
         },
         "model.layers.13": {
-            "accuracy": 0.9363645315170288,
-            "total_bits": 365124672,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -1161,10 +1161,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -1233,8 +1233,8 @@
             }
         },
         "model.layers.14": {
-            "accuracy": 0.9359749555587769,
-            "total_bits": 365124672,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -1249,10 +1249,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -1273,10 +1273,10 @@
             },
             "o_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -1309,10 +1309,10 @@
             },
             "down_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -1321,8 +1321,8 @@
             }
         },
         "model.layers.15": {
-            "accuracy": 0.9322938919067383,
-            "total_bits": 365124672,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -1337,10 +1337,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -1361,10 +1361,10 @@
             },
             "o_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -1397,10 +1397,10 @@
             },
             "down_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -1409,8 +1409,8 @@
             }
         },
         "model.layers.16": {
-            "accuracy": 0.939303994178772,
-            "total_bits": 449761824,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -1449,10 +1449,10 @@
             },
             "o_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -1497,8 +1497,8 @@
             }
         },
         "model.layers.17": {
-            "accuracy": 0.9451323747634888,
-            "total_bits": 486917664,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -1513,10 +1513,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -1585,14 +1585,14 @@
             }
         },
         "model.layers.18": {
-            "accuracy": 0.9493275880813599,
-            "total_bits": 519946272,
             "q_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -1601,10 +1601,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -1637,10 +1637,10 @@
             },
             "up_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -1673,8 +1673,8 @@
             }
         },
         "model.layers.19": {
-            "accuracy": 0.9514966011047363,
-            "total_bits": 571557408,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -1761,14 +1761,14 @@
             }
         },
         "model.layers.20": {
-            "accuracy": 0.955375075340271,
-            "total_bits": 604586016,
             "q_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -1849,14 +1849,14 @@
             }
         },
         "model.layers.21": {
-            "accuracy": 0.9731628894805908,
-            "total_bits": 664451616,
             "q_proj": {
                 "group_size": {
-                    "2": 64
                 },
                 "bits": [
-                    2
                 ],
                 "bits_prop": [
                     1
@@ -1937,14 +1937,14 @@
             }
         },
         "model.layers.22": {
-            "accuracy": 0.9785275459289551,
-            "total_bits": 697480224,
             "q_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -1953,10 +1953,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -2025,8 +2025,8 @@
             }
         },
         "model.layers.23": {
-            "accuracy": 0.9788622856140137,
-            "total_bits": 697480224,
             "q_proj": {
                 "group_size": {
                     "4": 128
@@ -2113,14 +2113,14 @@
             }
         },
         "model.layers.24": {
-            "accuracy": 0.9794007539749146,
-            "total_bits": 697480224,
             "q_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -2201,14 +2201,14 @@
             }
         },
         "model.layers.25": {
-            "accuracy": 0.9806145429611206,
-            "total_bits": 697480224,
             "q_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -2217,10 +2217,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -2289,14 +2289,14 @@
             }
         },
         "model.layers.26": {
-            "accuracy": 0.9806764125823975,
-            "total_bits": 697480224,
             "q_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1
@@ -2377,8 +2377,8 @@
             }
         },
         "model.layers.27": {
-            "accuracy": 0.9815640449523926,
-            "total_bits": 697480224,
             "q_proj": {
                 "group_size": {
                     "4": 128
@@ -2465,8 +2465,8 @@
             }
         },
         "model.layers.28": {
-            "accuracy": 0.9820178747177124,
-            "total_bits": 697480224,
             "q_proj": {
                 "group_size": {
                     "4": 128
@@ -2553,8 +2553,8 @@
             }
         },
         "model.layers.29": {
-            "accuracy": 0.9836413264274597,
-            "total_bits": 697480224,
             "q_proj": {
                 "group_size": {
                     "4": 128
@@ -2641,8 +2641,8 @@
             }
         },
         "model.layers.30": {
-            "accuracy": 0.9838729500770569,
-            "total_bits": 697480224,
             "q_proj": {
                 "group_size": {
                     "4": 128
@@ -2729,8 +2729,8 @@
             }
         },
         "model.layers.31": {
-            "accuracy": 0.9427725076675415,
-            "total_bits": 664451616,
             "q_proj": {
                 "group_size": {
                     "2": 64
@@ -2745,10 +2745,10 @@
             },
             "k_proj": {
                 "group_size": {
-                    "4": 128
                 },
                 "bits": [
-                    4
                 ],
                 "bits_prop": [
                     1

 {
     "measurement": {
         "model.layers.0": {
+            "accuracy": 0.8198099136352539,
+            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
             }
         },
         "model.layers.1": {
+            "accuracy": 0.8719034194946289,
+            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "k_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             },
             "o_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             },
             "down_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.2": {
+            "accuracy": 0.8325738906860352,
+            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
             }
         },
         "model.layers.3": {
+            "accuracy": 0.8738632202148438,
+            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "down_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.4": {
+            "accuracy": 0.8625121116638184,
+            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "o_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.5": {
+            "accuracy": 0.8608803749084473,
+            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "k_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.6": {
+            "accuracy": 0.8563823699951172,
+            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "down_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.7": {
+            "accuracy": 0.8504223823547363,
+            "total_bits": 483482688,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "down_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.8": {
+            "accuracy": 0.9129691123962402,
+            "total_bits": 539218464,
             "q_proj": {
                 "group_size": {
                     "2": 64
             }
         },
         "model.layers.9": {
+            "accuracy": 0.8902812004089355,
+            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "k_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.10": {
+            "accuracy": 0.8880372047424316,
+            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
             }
         },
         "model.layers.11": {
+            "accuracy": 0.8887085914611816,
+            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
             }
         },
         "model.layers.12": {
+            "accuracy": 0.8856921195983887,
+            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "k_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.13": {
+            "accuracy": 0.8820700645446777,
+            "total_bits": 450454080,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "k_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.14": {
+            "accuracy": 0.9193291664123535,
+            "total_bits": 572247072,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "k_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             },
             "o_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             },
             "down_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.15": {
+            "accuracy": 0.9107174873352051,
+            "total_bits": 572247072,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "k_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             },
             "o_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             },
             "down_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.16": {
+            "accuracy": 0.9100451469421387,
+            "total_bits": 572247072,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "o_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.17": {
+            "accuracy": 0.908327579498291,
+            "total_bits": 572247072,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "k_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.18": {
+            "accuracy": 0.9288191795349121,
+            "total_bits": 661014048,
             "q_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             },
             "k_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             },
             "up_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.19": {
+            "accuracy": 0.9297795295715332,
+            "total_bits": 661014048,
             "q_proj": {
                 "group_size": {
                     "2": 64
             }
         },
         "model.layers.20": {
+            "accuracy": 0.9339859485626221,
+            "total_bits": 661014048,
             "q_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.21": {
+            "accuracy": 0.9743473529815674,
+            "total_bits": 815838240,
             "q_proj": {
                 "group_size": {
+                    "4": 128
                 },
                 "bits": [
+                    4
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.22": {
+            "accuracy": 0.9624457359313965,
+            "total_bits": 749781024,
             "q_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             },
             "k_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.23": {
+            "accuracy": 0.9775146245956421,
+            "total_bits": 815838240,
             "q_proj": {
                 "group_size": {
                     "4": 128
             }
         },
         "model.layers.24": {
+            "accuracy": 0.9725011587142944,
+            "total_bits": 782809632,
             "q_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.25": {
+            "accuracy": 0.9676313400268555,
+            "total_bits": 749781024,
             "q_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             },
             "k_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.26": {
+            "accuracy": 0.9747145175933838,
+            "total_bits": 782809632,
             "q_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1
             }
         },
         "model.layers.27": {
+            "accuracy": 0.9794363975524902,
+            "total_bits": 815838240,
             "q_proj": {
                 "group_size": {
                     "4": 128
             }
         },
         "model.layers.28": {
+            "accuracy": 0.9793131351470947,
+            "total_bits": 815838240,
             "q_proj": {
                 "group_size": {
                     "4": 128
             }
         },
         "model.layers.29": {
+            "accuracy": 0.9778343439102173,
+            "total_bits": 815838240,
             "q_proj": {
                 "group_size": {
                     "4": 128
             }
         },
         "model.layers.30": {
+            "accuracy": 0.9739029407501221,
+            "total_bits": 815838240,
             "q_proj": {
                 "group_size": {
                     "4": 128
             }
         },
         "model.layers.31": {
+            "accuracy": 0.9666062593460083,
+            "total_bits": 749781024,
             "q_proj": {
                 "group_size": {
                     "2": 64
             },
             "k_proj": {
                 "group_size": {
+                    "2": 64
                 },
                 "bits": [
+                    2
                 ],
                 "bits_prop": [
                     1

special_tokens_map.json CHANGED Viewed

@@ -1,34 +1,19 @@
 {
   "additional_special_tokens": [
     "<|im_start|>",
-    "<|im_end|>",
-    "<|im_sep|>"
   ],
-  "bos_token": {
-    "content": "<|startoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
   "eos_token": {
-    "content": "<|endoftext|>",
     "lstrip": false,
-    "normalized": true,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
-    "content": "<unk>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "unk_token": {
-    "content": "<unk>",
     "lstrip": false,
-    "normalized": true,
     "rstrip": false,
     "single_word": false
   }

 {
   "additional_special_tokens": [
     "<|im_start|>",
+    "<|im_end|>"
   ],
   "eos_token": {
+    "content": "<|im_end|>",
     "lstrip": false,
+    "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
+    "content": "<|endoftext|>",
     "lstrip": false,
+    "normalized": false,
     "rstrip": false,
     "single_word": false
   }

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -1,33 +1,15 @@
 {
-  "add_bos_token": false,
-  "add_eos_token": false,
-  "add_prefix_space": true,
   "added_tokens_decoder": {
-    "0": {
-      "content": "<unk>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "1": {
-      "content": "<|startoftext|>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "2": {
       "content": "<|endoftext|>",
       "lstrip": false,
-      "normalized": true,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "6": {
       "content": "<|im_start|>",
       "lstrip": false,
       "normalized": false,
@@ -35,39 +17,27 @@
       "single_word": false,
       "special": true
     },
-    "7": {
       "content": "<|im_end|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
-    },
-    "8": {
-      "content": "<|im_sep|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
     }
   },
   "additional_special_tokens": [
     "<|im_start|>",
-    "<|im_end|>",
-    "<|im_sep|>"
   ],
-  "bos_token": "<|startoftext|>",
-  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,
-  "eos_token": "<|endoftext|>",
-  "legacy": true,
-  "model_max_length": 4096,
-  "pad_token": "<unk>",
-  "padding_side": "right",
-  "sp_model_kwargs": {},
-  "spaces_between_special_tokens": false,
-  "tokenizer_class": "LlamaTokenizer",
-  "unk_token": "<unk>",
-  "use_default_system_prompt": true
 }

 {
+  "add_prefix_space": false,
   "added_tokens_decoder": {
+    "151643": {
       "content": "<|endoftext|>",
       "lstrip": false,
+      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "151644": {
       "content": "<|im_start|>",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "151645": {
       "content": "<|im_end|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "additional_special_tokens": [
     "<|im_start|>",
+    "<|im_end|>"
   ],
+  "bos_token": null,
+  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "model_max_length": 32768,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
 }