diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..6df079cab6326b2237d35ec1adc4215fdcc51a97 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+rubra-11b-h/rubra-11b-h.png filter=lfs diff=lfs merge=lfs -text
+rubra-11b-h.png filter=lfs diff=lfs merge=lfs -text
diff --git a/cal_data.safetensors b/cal_data.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cd988e09571b1e2a570a608f858c02db1d47b325
--- /dev/null
+++ b/cal_data.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08be1103ff8fcef33b570f3c0f5ae4cc7f9dc5c3f264105baa55fc9b132ed1be
+size 1638488
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..22c8db31fd74541581d5b0dc2cbc7c402724e0ba
--- /dev/null
+++ b/config.json
@@ -0,0 +1,26 @@
+{
+ "_name_or_path": "sanjay920/rubra-11b-h",
+ "architectures": [
+ "MistralForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 32768,
+ "model_type": "mistral",
+ "num_attention_heads": 32,
+ "num_hidden_layers": 48,
+ "num_key_value_heads": 8,
+ "rms_norm_eps": 1e-05,
+ "rope_theta": 1000000.0,
+ "sliding_window": null,
+ "tie_word_embeddings": false,
+ "torch_dtype": "float16",
+ "transformers_version": "4.38.2",
+ "use_cache": false,
+ "vocab_size": 32000
+}
diff --git a/example1.png b/example1.png
new file mode 100644
index 0000000000000000000000000000000000000000..e9f6f26a3ab7d0de6534adc2192454b2b1f92ecd
Binary files /dev/null and b/example1.png differ
diff --git a/example2.png b/example2.png
new file mode 100644
index 0000000000000000000000000000000000000000..1e0041d83e7c53413c66988c9bd3b5b5fa280b2f
Binary files /dev/null and b/example2.png differ
diff --git a/example3.png b/example3.png
new file mode 100644
index 0000000000000000000000000000000000000000..1ab190c3029d443abb020d670a5a7a44abe03f23
Binary files /dev/null and b/example3.png differ
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..81e4802e5819d7759c46acbe055c75e4b6d092c5
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,7 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "transformers_version": "4.38.2",
+ "use_cache": false
+}
diff --git a/hidden_states.safetensors b/hidden_states.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..aa20d36f1b33c321543ea28085a9bb4c8321834c
--- /dev/null
+++ b/hidden_states.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:159cc384aca646411e995e4443b10ad57f1d7e6520cf9a5903c6203bd52192cc
+size 1677730376
diff --git a/job_new.json b/job_new.json
new file mode 100644
index 0000000000000000000000000000000000000000..5b29633d7c4bfcac9a23aec380e19864f83fb113
--- /dev/null
+++ b/job_new.json
@@ -0,0 +1,98090 @@
+{
+ "in_dir": "base_model",
+ "out_dir": "sanjay920/rubra-11b-h-EXL2",
+ "cal_dataset": "wikitext-test.parquet",
+ "bits": 8.0,
+ "dataset_rows": 100,
+ "measurement_rows": 16,
+ "length": 2048,
+ "measurement_length": 2048,
+ "head_bits": 6,
+ "shard_size": 8192,
+ "compile_full": null,
+ "rope_scale": null,
+ "rope_alpha": null,
+ "output_measurement": null,
+ "progress": "finished",
+ "cal_filename": "sanjay920/rubra-11b-h-EXL2/cal_data.safetensors",
+ "last_module_idx": 98,
+ "measurement": {
+ "model.layers.0.self_attn": [
+ {
+ "accuracy": 0.902508161496371,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9203556086868048,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9246767205186188,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.955721165984869,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9563947499264032,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9568075467832386,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.967554040485993,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9687355454079807,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9720487990416586,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9737953173462301,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9780201958492398,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9794629843672737,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9802156471414492,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822191685670987,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892206196091138,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910724487854168,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913987399486359,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943463499366771,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976803997560637,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.0.mlp": [
+ {
+ "accuracy": 0.9123474769294262,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9179784115403891,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.932591964257881,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.937203103909269,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9628385086543858,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9679891671985388,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.975108077051118,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814978303038515,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9832107973634265,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9824423746322282,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846852865885012,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910700924519915,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924008402740583,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950336539186537,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954662031377666,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970542312948965,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985220660455525,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.1.self_attn": [
+ {
+ "accuracy": 0.8877861187793314,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.8973407302983105,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.910242407117039,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9385041804052889,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.940526916179806,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9435215112753212,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9515076652169228,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.955113283591345,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.963577882386744,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9648914394201711,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9705673614516854,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9749045115895569,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9721663881791756,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9771681335987523,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837308657588437,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887261725962162,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9859461099258624,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945900982129388,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963489300280344,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.1.mlp": [
+ {
+ "accuracy": 0.9527104110457003,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9563737579155713,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9567327869590372,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9568935022689402,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930489940161351,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939982455398422,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947059626574628,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970151603920385,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970192952168873,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996223299196572,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975017743272474,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980218067430542,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983664975079591,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998927123764588,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990109881728131,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991535865574406,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993296123193431,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.2.self_attn": [
+ {
+ "accuracy": 0.9912483744265046,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916435057821218,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923406311427243,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994128311634995,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952435296145268,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953209938539658,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969110778911272,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970286335446872,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972990020178258,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974499639647547,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974876438791398,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976576824410586,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979693320783554,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981310617731651,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988565697785816,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990829429698351,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991081247353577,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995378834355506,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997747411516684,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.2.mlp": [
+ {
+ "accuracy": 0.9892762480885722,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895454781362787,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912471331772394,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991794113710057,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947041008272208,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951274986669887,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958551824238384,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972624051006278,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975140962487785,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973148557328386,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976522701763315,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998632437454944,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988308300817152,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992609197433922,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993038972388604,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994499729345989,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998017110610817,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.3.self_attn": [
+ {
+ "accuracy": 0.9899887884967029,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904003122937866,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915672297938727,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936683645646553,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944629863894079,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945710314495955,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955734857358038,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956779086787719,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961270387721015,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962668074440444,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971315091243014,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974506823491538,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997335236883373,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976795297843637,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985245664138347,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988046068392578,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987181324831909,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995058817348763,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996684330690186,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.3.mlp": [
+ {
+ "accuracy": 0.9844154579914175,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847926673828624,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871780377579853,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879374770680442,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921912606514525,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928218296554405,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938397765217815,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959715837030672,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963449053175282,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960351230402011,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965353266888997,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997980712352728,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982754946904606,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989119462479721,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989720682242478,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991756232702755,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997108341558487,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.4.self_attn": [
+ {
+ "accuracy": 0.9940207607578486,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951273926417343,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978095898550237,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977955239373841,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976957584294723,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979161136798211,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977876108750934,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997963167646958,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997377075618715,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973086231620982,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998610572751204,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987351718518767,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986403053189861,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987657150850282,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992435346975981,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994336164018023,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992440667119808,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998596248324247,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998452412955885,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.4.mlp": [
+ {
+ "accuracy": 0.9953385644475929,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954606430546846,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961155957134906,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963309457525611,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976867814257275,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978648535179673,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981546685303329,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988047819279018,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989133847047924,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988297178279026,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989727466163458,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994054905910161,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994889207046072,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996792593592545,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996966957805853,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997519157095667,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9999101926136973,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.5.self_attn": [
+ {
+ "accuracy": 0.983027494745329,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988349880441092,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933699457033072,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933736611856148,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932925261673518,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959597528359154,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934077207872178,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996139597526053,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955893133592326,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955878545151791,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972478003473952,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997818165429635,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973545127431862,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977891548696789,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987297143379692,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999152827916987,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987308212730568,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997582192772825,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996598286616063,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.5.mlp": [
+ {
+ "accuracy": 0.9925510261964519,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992728061741218,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993811263149837,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941331359441392,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962669986125547,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965560747805284,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970157300849678,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980933387705591,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982620093869627,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981101373850834,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983399104385171,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990414018138836,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991743572754785,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994895925538003,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995117338021373,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995983660337515,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998637763601437,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.6.self_attn": [
+ {
+ "accuracy": 0.9853996349847876,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894411026616581,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904139981372282,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927006095531397,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937793352000881,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939195910119452,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951347391761374,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953175328264479,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995725197615684,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959138984268066,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967400987807196,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969654745509615,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969755911588436,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997284421682707,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982928356475895,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986039540162892,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985298825704376,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994038183504017,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996078892836522,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.6.mlp": [
+ {
+ "accuracy": 0.9796033757738769,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801610637223348,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834316545748152,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844096631277353,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898302512592636,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906708221533336,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99199359229533,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947536016406957,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952311203232966,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948356513341423,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954958254238591,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973706452437909,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977587403773214,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985758317998261,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986605149169918,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989306384450174,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996214626262372,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.7.self_attn": [
+ {
+ "accuracy": 0.9842456089681946,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847515997826122,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862019201391377,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989355675119441,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918334566464182,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919354065787047,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939575470634736,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941131755767856,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950063232099637,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995209871471161,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958428621175699,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962687853985699,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962730693951016,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966743008699268,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979651567628025,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982993875746615,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998358772485517,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992254695025622,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995699401733873,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.7.mlp": [
+ {
+ "accuracy": 0.974171947222203,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9748882604762912,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9789550169371068,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801530737895519,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871387034072541,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881842365139164,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898216370493174,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933750616037287,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939624306280166,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934718561416958,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994298821548,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966810355253983,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99716517124034,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982046157965669,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983098571392475,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998642562663008,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995165277887281,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.8.self_attn": [
+ {
+ "accuracy": 0.9817969363648444,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826170109445229,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842383282957599,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987987891305238,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902089073439129,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905349539476447,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925528938765638,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929857852112036,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941390909953043,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943753193656448,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953846742282622,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957563256757567,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958568579750136,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962565110181458,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977382431097794,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981046210305067,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982187150235404,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990283438819461,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995301622984698,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.8.mlp": [
+ {
+ "accuracy": 0.9699697830947116,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9707644692389295,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9758064048364758,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9772977469256148,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850295406067744,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862511573592201,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882942370604724,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922574244847056,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929685922397766,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924017082084902,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933631024032366,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961383405752713,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966958504082868,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979089612534153,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980363582653808,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984643486241112,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994418839887658,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.9.self_attn": [
+ {
+ "accuracy": 0.9762045053066686,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.977105101919733,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9792947630630806,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844983145594597,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880177224404179,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882629308849573,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916761830099858,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919323883077595,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927965526585467,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931856998300646,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940749995876104,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945208916324191,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947839853120968,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952225348097272,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970656104414957,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975598896999145,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978074637183454,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986692166348803,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994058420306828,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.9.mlp": [
+ {
+ "accuracy": 0.9653462056303397,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.966274723643437,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9718257325002924,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9734515016898513,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827394402818754,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841620186925866,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9863689955091104,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910840421216562,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918851483380422,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912260650889948,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923439234553371,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955300039000576,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996181691181846,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975773043261142,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977210198558168,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981782444083365,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993462364582228,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.10.self_attn": [
+ {
+ "accuracy": 0.9896986646344885,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898966330802068,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959875158820068,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959419664373854,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958830689574825,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994527042581467,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958289333299035,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945835779071786,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961835095891729,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961431270785397,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976346265902976,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984772361494834,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976802616729401,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998478227716987,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987404754647287,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989050509684603,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987427304295124,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999758682492029,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996692690583586,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.10.mlp": [
+ {
+ "accuracy": 0.990279221732635,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905298211961053,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920168473036028,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924921841884498,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951802256982774,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955459609918762,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961894396110438,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975046989129623,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977252853277605,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975598426244687,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978572657200857,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987597939252737,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989334050405887,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993266929595848,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993670692056185,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994898942059081,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998092828209337,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.11.self_attn": [
+ {
+ "accuracy": 0.9863239590195008,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868265291443095,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921606028510723,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919705191277899,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919389690621756,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992855364602292,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919611498771701,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929512560192961,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936951973941177,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936885720526334,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966282595996745,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969246424443554,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966480978473555,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969086252094712,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981692790461238,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984889770348673,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981699131167261,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995570989412954,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999439229904965,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.11.mlp": [
+ {
+ "accuracy": 0.9847793944063596,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985123383696191,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875411554821767,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882662810268812,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923551889369264,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929447612666991,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939775222155731,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996076546041877,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964278059342178,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961227821040666,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965966620657127,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980314106360311,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983075839991216,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989473513051053,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989980212376395,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991948430506454,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999716363789048,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.12.self_attn": [
+ {
+ "accuracy": 0.9758020673179999,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766555116511881,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9786176779307425,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9845216747489758,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873013857286423,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874568734667264,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911378039978445,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912875468144193,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920827563619241,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928605046588928,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936999409110285,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940654404636007,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942948371754028,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947681912162807,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968999695556704,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974256125715328,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977206327166641,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986770139730652,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993911658057186,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.12.mlp": [
+ {
+ "accuracy": 0.9619943019933999,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9630233785137534,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9692408089758828,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9710238851839676,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810652892338112,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826576914638281,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851201827987097,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902692650794052,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911376674135681,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990367868449539,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916139839915559,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950910237093922,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958211112534627,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973492388962768,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974947760347277,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979947993706446,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992910219734767,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.13.self_attn": [
+ {
+ "accuracy": 0.9732593579683453,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9740604794351384,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9758571569109336,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981850401032716,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858934246003628,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9859436851111241,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901865926804021,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901829248992726,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916712933336385,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991981672210386,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932556867133826,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936436033167411,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937982890987769,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943627052416559,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966125944047235,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971977410459658,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975617044838145,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998476260661846,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993510511812929,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.13.mlp": [
+ {
+ "accuracy": 0.9590617874637246,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9601758192293346,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9669503723271191,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9689558737445623,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9794857824454084,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812142355367541,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839909761212766,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894312581745908,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903528640279546,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895940163987689,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909067719127052,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946883149386849,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954700223461259,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997104218302411,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972932919627056,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997851840693329,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992196869279724,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.14.self_attn": [
+ {
+ "accuracy": 0.9674572005169466,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9686749550746754,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9713187958113849,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9791675666347146,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834151559043676,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9835956042516045,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892363072722219,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894226833130233,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904058027314022,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911344906722661,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915743695746642,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922297771845479,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926136705034878,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931706467177719,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959881600225344,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965825928520644,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972190943808528,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980772634007735,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992562690567866,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.14.mlp": [
+ {
+ "accuracy": 0.9578170392196625,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9590193158946931,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9663224390242249,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9685161245288327,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9788233999861404,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9806237743468955,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836092637851834,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889325238764286,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900051793083549,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892134199035354,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906160493264906,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944984828180168,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953196261485573,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969970610691234,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997200449812226,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978280807699775,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991946576228656,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.15.self_attn": [
+ {
+ "accuracy": 0.9639496663585305,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9654018925502896,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9679478466277942,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9765551248565316,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810993222054094,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812793986639008,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867310639237985,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869522373774089,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879012388410047,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883594005950727,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903323815669864,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910267862142064,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912635658401996,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921826082281768,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951284613925964,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960675196198281,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962311515701003,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977348528045695,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990211970216478,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.15.mlp": [
+ {
+ "accuracy": 0.9561710001435131,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9574084184132516,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9650795814814046,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9674192051170394,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9779465935425833,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9798049030359834,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829885881626979,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884562431252562,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895629284437746,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887702904525213,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902152858558111,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942774242081214,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995120030187536,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968639670987613,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970916908496292,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977569706679787,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991551881867053,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.16.self_attn": [
+ {
+ "accuracy": 0.9918902807403356,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919913714402355,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941640416509472,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994114655040903,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943733899563085,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995450929418439,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945171528088395,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954713895567693,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960881019214867,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961418636376038,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970261527196271,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997912951730541,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969646203535376,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978629015240585,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983468192367582,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987805694254348,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983534378916374,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996761174661515,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995716315679601,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.16.mlp": [
+ {
+ "accuracy": 0.9872070293640718,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875441331532784,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897534946794622,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904376343474723,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936092627176549,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941051893692929,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950599781586789,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966741525713587,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969771623436827,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967607702419627,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971554788498906,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983539172244491,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985822996895877,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999102601341292,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999159805731324,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993414144846611,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997426053332674,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.17.self_attn": [
+ {
+ "accuracy": 0.979622381972149,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.979827641043812,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915289613127243,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915508964331821,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917960677703377,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99220766252256,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919130146445241,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920798306411598,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899292784975842,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899077572044916,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952535879274365,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955962051753886,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952810344693717,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956986843317281,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968346282839775,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975842618150637,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968424138351111,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993985197106667,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992879528981575,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.17.mlp": [
+ {
+ "accuracy": 0.9795986886601895,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9800954010570422,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836475889314897,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847203819663264,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897230201167986,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905139947659336,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920523978071287,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946800437464844,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951683254330419,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947878780949395,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954185747192241,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973539462371264,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977197637344943,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985762029973557,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998654469876783,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989491958876897,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996122916345485,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.18.self_attn": [
+ {
+ "accuracy": 0.9591868193820119,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9606455501634628,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.963689126772806,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9717553314985707,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9779619486071169,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9790570001350716,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839622974977829,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852984003955498,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869856234290637,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874973931582645,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891311423270963,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899767622118816,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899715750943869,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910780027857982,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943656724644825,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953498630784452,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956311181304045,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976474752329523,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988193828030489,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.18.mlp": [
+ {
+ "accuracy": 0.9518720533233136,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9533123800065368,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9613713058643043,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9637906108982861,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9758038044674322,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.977887489949353,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812068799510598,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873612260562368,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885673976968974,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876663720351644,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892762696254067,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937061189557426,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946411842829548,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965408335992834,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967898455652175,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974644881876884,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999061718754092,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.19.self_attn": [
+ {
+ "accuracy": 0.9581109315622598,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9596416996791959,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9630198783706874,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9733511302620173,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9787798321340233,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9791236850433052,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867767564137466,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872424571658485,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881223333068192,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891258522984572,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893496042350307,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899621973163448,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904887008597143,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912755576951895,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994794098805869,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956135836691828,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996461629998521,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975838983955327,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990680614864687,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.19.mlp": [
+ {
+ "accuracy": 0.949402768863365,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9508629126939923,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9593443237245083,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9619821181986481,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9744504160480574,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766377885825932,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.980175971868448,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865899439901114,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878867646912113,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869770252262242,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9886702921357937,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933583022502717,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943471373990178,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996348384549492,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966181806812529,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973575899493881,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999004362798587,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.20.self_attn": [
+ {
+ "accuracy": 0.9531727249268442,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9553546705283225,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9591875285841525,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9703965260414407,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9756896772887558,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9762924946844578,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842725213966332,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850752123165876,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985899701656308,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9863111876766197,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987529011850711,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885381097556092,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9890030458336696,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899866348132491,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938212371780537,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994938170624664,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954652949527372,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972972613177262,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988004061815445,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.20.mlp": [
+ {
+ "accuracy": 0.9480401042383164,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.949517953209579,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9579005774576217,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9605092275887728,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9739315117476508,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9761737691005692,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9796711904928088,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861559904529713,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875617006327957,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866939535131678,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884115278837271,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931988872704096,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942056277068332,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962179714348167,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965195794648025,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972543855546974,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988709868557635,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.21.self_attn": [
+ {
+ "accuracy": 0.9528840403072536,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9542537578381598,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9580731589812785,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9668758142506704,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9747469594003633,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9754544387105852,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98189686704427,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828780224779621,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843287441181019,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9857511474983767,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870679471641779,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881277907988988,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880640183691867,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891430772840977,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934388622350525,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947039625258185,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948006026097573,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971943795535481,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985773553344188,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.21.mlp": [
+ {
+ "accuracy": 0.9452786394394934,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9468083463143557,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9557567997835577,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9585219516884536,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9725894997827709,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9748451801715419,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9785158903105184,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9856567675014958,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869675827212632,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9860588647425175,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878197305952199,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928987653984223,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939260903047398,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960985295183491,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963896564295283,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971529023896437,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989376296944101,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.22.self_attn": [
+ {
+ "accuracy": 0.9910135175450705,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912824938655831,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941338914213702,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941643851052504,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945628636050969,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953863266273402,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948620661452878,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955346850911155,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961977519269567,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962104187288787,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976131010480458,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973233057680773,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976833624823485,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997326233657077,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980814326991094,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988377549670986,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980935500643682,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996226447292429,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994861224859051,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.22.mlp": [
+ {
+ "accuracy": 0.9842658781562932,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846874004579149,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872097237966955,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880227182293311,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921422847255599,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927411952521652,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993845232820604,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959024451673031,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962649137887638,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960189153935062,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964987884595757,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997976030492282,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982532844805974,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988919206953142,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989660839673888,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991774850786896,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996749793717754,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.23.self_attn": [
+ {
+ "accuracy": 0.9837653411086649,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840333891916089,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904419900849462,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904393116594292,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907222538604401,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885966366273351,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906790258246474,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888943562982604,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991739244927885,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916874898481183,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955156600044575,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954305627325084,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955371092073619,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954360554984305,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968435579503421,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997513074951712,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968456693313783,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993884474752122,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992451430443907,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.23.mlp": [
+ {
+ "accuracy": 0.9759478892665356,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9765194484498352,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803963751764968,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815706800436601,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878923632204533,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888132131309249,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904801220982336,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937392670253757,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994295743497787,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938581893220544,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99459478398785,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996881335915532,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973090803541709,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983202805378824,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984141130771604,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987336630001664,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995366168823239,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.24.self_attn": [
+ {
+ "accuracy": 0.9556966400705278,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9570334849413484,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.960812549572438,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9703220267547294,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9772858648793772,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9778754758881405,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9854066136176698,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862237990018912,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869342013844289,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879776879679412,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887068463722244,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893193228635937,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898968231282197,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907162148156203,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942090424301568,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953937472309917,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957477512361947,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974927010043757,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988711644837167,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.24.mlp": [
+ {
+ "accuracy": 0.939760722219944,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9414845332503319,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9511478268541396,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.954194939462468,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9698484404943883,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.972340663196519,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9763197798747569,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841194238979369,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855774453608319,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846697235479951,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866198089439422,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921901000780053,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933257188240532,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956658300361596,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960375506052515,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968587130861124,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998800835244765,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.25.self_attn": [
+ {
+ "accuracy": 0.9503053929656744,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9525580331683159,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9565810647327453,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9682650562608615,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9738911159802228,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9746871701208875,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981898903963156,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829692101338878,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843669487163424,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848410444683395,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867640393786132,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879159886040725,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881031874101609,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892810968449339,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934559455141425,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945060422469396,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950925561715849,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970777546841418,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986730054151849,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.25.mlp": [
+ {
+ "accuracy": 0.9339546884875745,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9359053233638406,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9467246429994702,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9503237595781684,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9668553009396419,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9696119821164757,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9741377918981016,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823939295019954,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840543393511325,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831261212239042,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985276190796867,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913982859579846,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926482754817698,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951997506432235,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956310951092746,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966006540635135,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986620795898489,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.26.self_attn": [
+ {
+ "accuracy": 0.950924233533442,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9526217842940241,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9560005511157215,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9651925785001367,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.974756367970258,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9754410153254867,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821089534088969,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829862234182656,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9833339191973209,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847710862522945,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871181106427684,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879768814425915,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882259144214913,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893726909067482,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922168490593322,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943546110007446,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933991891448386,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969799995742505,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998169852187857,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.26.mlp": [
+ {
+ "accuracy": 0.930072165094316,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9321989654563367,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9437624660786241,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9476719659287483,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9649747095536441,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9678829013137147,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.972721746424213,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812747784890234,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9830624930327758,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821682657347992,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844429356744513,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909043678781018,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992228524555685,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948874444817193,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953900648106355,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996433951149811,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998560123873176,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.27.self_attn": [
+ {
+ "accuracy": 0.9506571341771632,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.95194994029589,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9554108332376927,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.965399626409635,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.973783713998273,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9746870229719207,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815001173410565,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827296736184508,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836740818573162,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849110426730476,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869808834628202,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881130746216513,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988304317521397,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895301525830291,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934029994474258,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946216194075532,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949501673690975,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971529373142403,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986130048564519,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.27.mlp": [
+ {
+ "accuracy": 0.9300565491430461,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9322172533720732,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9436244373209774,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9475068859755993,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9649612298235297,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9679478236939758,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9727855251403525,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812858692603186,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9830675406847149,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821449116570875,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844449243973941,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908643537783064,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922332215355709,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948491686081979,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995345929695759,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963671152072493,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985312398639508,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.28.self_attn": [
+ {
+ "accuracy": 0.9852219514432363,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9854540597880259,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992581088328734,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925188558117952,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992044290585909,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908394656376913,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921488391701132,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909024515654892,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931627179321367,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933414816041477,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962164734170074,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975688391277799,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962678278825479,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975411284103757,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981369049564819,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986740430613281,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981451820058282,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995850489922304,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994992236806866,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.28.mlp": [
+ {
+ "accuracy": 0.9810774835059419,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9816042414167896,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.984690910903737,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9857331512612291,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99059497367125,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913256106083281,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926997422298882,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950385903939605,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954885762417689,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952267833868973,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958167897420935,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975691078143427,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979071016423404,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986456794285914,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987542949966155,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990193766789162,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995884474592458,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.29.self_attn": [
+ {
+ "accuracy": 0.9801707121077925,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803194139385596,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9864167625200935,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861038540257141,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867335963645019,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873976144008338,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986821249302011,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876039975206368,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904251811094582,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903880580095574,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943270806106739,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950134730897844,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943478996865451,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949842338974122,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966566005459754,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976610257581342,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966578336025123,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993154952098848,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991263385672937,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.29.mlp": [
+ {
+ "accuracy": 0.9730628487886861,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.973774635582231,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9779872408835217,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9792550611309707,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865127976518124,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875629763700999,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893352640210651,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929699986823834,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993587943376042,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931577415263746,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939938187017106,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965159249695716,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970039997715503,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980992393029737,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982241835823515,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985617383063072,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99946317839931,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.30.self_attn": [
+ {
+ "accuracy": 0.960547131486237,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9619710550177842,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9641266402322799,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9726306177908555,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9774892054265365,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.978482106118463,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831622139317915,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843430254259147,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861415453488007,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869873017305508,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988334433583077,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893150111893192,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892438768292777,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901899283868261,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938217146846,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951323153509293,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948282100085635,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976286789024016,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985810204962036,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.30.mlp": [
+ {
+ "accuracy": 0.9314752910286188,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9334497996605933,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9442711595911533,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9477534026373178,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9658016683533788,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9686134352814406,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731169363949448,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820591834140942,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836634104140103,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826249615289271,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.984836339019239,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911455910187215,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924397784052417,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951095145952422,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954999905312434,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996416717855027,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986326706202817,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.31.self_attn": [
+ {
+ "accuracy": 0.9617549669928849,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9633937487378716,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9656938512343913,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.973594733630307,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9796587142627686,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803864565910771,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848732578684576,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858064573490992,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872101178625599,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877110663801432,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895211174734868,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901978281559423,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903222138527781,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910594234825112,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943284161563497,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953842611867003,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952440769411623,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976959823688958,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986929383812821,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.31.mlp": [
+ {
+ "accuracy": 0.9317440665327013,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9337658795993775,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9442870973143727,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9475885890424252,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9661285383626819,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.968816627166234,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731064232764766,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823057807516307,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9838297074311413,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828014142112806,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849381670355797,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912480839411728,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925001342198811,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951820038841106,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955530187871773,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964022201456828,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986571829067543,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.32.self_attn": [
+ {
+ "accuracy": 0.9678770737955347,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9691678545204923,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9709918311564252,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9765152925392613,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807526028016582,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820293172961101,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841885701171122,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855642172624357,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866925096139312,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870485090650618,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897773859556764,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902223890530877,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902153389411978,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909679422853515,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942741809645668,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955411965493113,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948284650454298,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980486317217583,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986889937572414,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.32.mlp": [
+ {
+ "accuracy": 0.932304578833282,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9342344424221665,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9444406803231686,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.947586020687595,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9662447790615261,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9689184916205704,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731233624042943,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98242219875101,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839446639525704,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828673212323338,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849894531071186,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912908738479018,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925277467991691,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952316170674749,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955659814586397,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964031476119999,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986658465059008,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.33.self_attn": [
+ {
+ "accuracy": 0.9655442168004811,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9671692294068635,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9697359376586974,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9761799619300291,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803578312275931,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814343429170549,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843596651917323,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858793318271637,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869456980959512,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874889963539317,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896778290858492,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908777676173486,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990540920291096,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991681206272915,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948772302595899,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958305972686503,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956192086392548,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997941504814662,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988134019586141,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.33.mlp": [
+ {
+ "accuracy": 0.9325818351935595,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9344900948926806,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9443744430318475,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9474421949125826,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9663861433509737,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9690310031874105,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731040432816371,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9824838677886873,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839952855254523,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829381878953427,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850510018295608,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913280457840301,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925565869780257,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952473715238739,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995588112360565,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963884983881144,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986808212925098,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.34.self_attn": [
+ {
+ "accuracy": 0.9875456623267382,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879995033843443,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943620861449745,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943356119911186,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945071052061394,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966131598775974,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945579385675956,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965798364137299,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963930066151079,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963805679144571,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978544423211133,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978882907453226,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979100275522796,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979497697204351,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985688193410169,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992296196833195,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985732258646749,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997000065504835,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999592912312437,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.34.mlp": [
+ {
+ "accuracy": 0.9806502945721149,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981183686060831,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839521977119148,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848100668168627,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903757648426108,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911127613158897,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922769031254575,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950082659197506,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954419361311011,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951236607448664,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957144900981802,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975175310974009,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978603233030299,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986405047020526,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987238197063562,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989435812094598,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995850031682494,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.35.self_attn": [
+ {
+ "accuracy": 0.9871479421271943,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880682986695319,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990131174272392,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898401049431413,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900419003679417,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925147893663961,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990109168633353,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992998635803815,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936714109790046,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937381937343162,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962826371920528,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965584015008062,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964808065269608,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966537497966783,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974247746868059,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984650467449683,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99742937242263,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999517957230637,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992533045333403,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.35.mlp": [
+ {
+ "accuracy": 0.9704249275382608,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9712161585921422,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9754329854622483,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766340904170647,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985306327464059,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9864371244912036,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881206953432411,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924467977543827,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930931831186172,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925579508999363,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934620868298225,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962237248546444,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967452616401715,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979627527136472,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998072977818083,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983912546158535,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994254562079732,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.36.self_attn": [
+ {
+ "accuracy": 0.965649431804195,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9668996206019074,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9694499158067629,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9755590457934886,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807646207045764,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981554796686396,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9854566292488016,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9863431870471686,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881037988234311,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988736093393527,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904858563677408,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914403361617588,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915653750649653,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924774839309976,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99530891442555,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961586743447697,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996117347778636,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978859801776707,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989366781519493,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.36.mlp": [
+ {
+ "accuracy": 0.9328135452233255,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9347493099048734,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9444518794771284,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9474378717131913,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.966479453491047,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9691691853804514,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731421418255195,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.982578118913807,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840653012506664,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829857222503051,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851169427274726,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913530095946044,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925886914716102,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952522270032205,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956026887812186,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963736499339575,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986830660418491,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.37.self_attn": [
+ {
+ "accuracy": 0.9632973142433912,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9647458797553554,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9677944149589166,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9744596001692116,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9805283090099692,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815037929220125,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855152769596316,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868621388450265,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877731337910518,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881437398144044,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905192241421901,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911876061523799,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915461755299475,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924024375504814,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948597592592705,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961402852204628,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957100535684731,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979259906467632,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988766744427267,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.37.mlp": [
+ {
+ "accuracy": 0.9317238796502352,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9336495161987841,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9432325733359903,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9462370390538126,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.965948719996959,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9686132016358897,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9725569972069934,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822160669136792,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837689126143232,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827277816366404,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848514216719195,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912281962460838,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924635672650766,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951937775185797,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955459379707463,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996324696301599,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986840636047418,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.38.self_attn": [
+ {
+ "accuracy": 0.9624556459020823,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9638149088714272,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.966128898318857,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731651132460684,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.979888558271341,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807631041621789,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9854432169813663,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865111434482969,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881467132945545,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988405313459225,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900051496224478,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905749676981941,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908825725433417,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917469431529753,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947028355381917,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959433214535238,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956959496485069,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977895863121375,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998879095823213,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.38.mlp": [
+ {
+ "accuracy": 0.9317085896618664,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9336435773875564,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.942958232248202,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9459079855587333,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9658660412533209,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9685534046730027,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.97245246428065,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821948111057281,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837224487564526,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826810664962977,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848097579088062,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912046356475912,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924372929672245,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951681817183271,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995533375418745,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962979012634605,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986440870707156,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.39.self_attn": [
+ {
+ "accuracy": 0.9656216644216329,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9668171582743526,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9708404617849737,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9773957233410329,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9818287189118564,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825881774304435,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866229476756416,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877469159546308,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876828042906709,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885670749936253,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910485846921802,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914193961885758,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918927509279456,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924896143202204,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954261311504524,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962042814731831,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963586331723491,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980747035369859,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989914739744563,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.39.mlp": [
+ {
+ "accuracy": 0.9323020433075726,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9342701844871044,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9433167146053165,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9462174526415765,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9660997575847432,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9687749480362982,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9725526751717553,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822096759453416,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837591436225921,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828054906101897,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849168861983344,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991255592321977,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924872927949764,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951640699291602,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955584222916514,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962992245709756,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986363174612052,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.40.self_attn": [
+ {
+ "accuracy": 0.9923371153709013,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925224412581883,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954915028938558,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99549823399866,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995459749334259,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971324702346465,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955590646713972,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972384700377006,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975309232977452,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975821622356307,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985752828288241,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984370510574081,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985908498783829,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998461567243794,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989729408916901,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991191346489359,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989771689652116,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997115908463456,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999673805286875,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.40.mlp": [
+ {
+ "accuracy": 0.9811925254762173,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9817058588378131,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842266714549623,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849823992699385,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905871613882482,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913147779298015,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923701590159908,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951182099757716,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955421296763234,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952246638713405,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958023339568172,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975680393690709,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979028400339303,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986676175176399,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987487139806035,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989479207215481,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999586516083582,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.41.self_attn": [
+ {
+ "accuracy": 0.9863162519177422,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870703057968058,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923058476706501,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921633902995382,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924568198912311,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943161772680469,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924855433346238,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943068775464781,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946824459475465,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947726883983705,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960416847025044,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970286915777251,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961076912295539,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971158314438071,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978602468036115,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983802583883516,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978632671263767,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995351644083712,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993034406106744,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.41.mlp": [
+ {
+ "accuracy": 0.9722781821619719,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9730187952518463,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9761316905496642,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.97704425919801,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861467212904245,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872288115439005,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885241388110444,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927656080690213,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933978671906516,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929643408977427,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993827001657337,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964197074586991,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969165127258748,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980382031499175,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981734739849344,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984235907904804,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994161145659746,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.42.self_attn": [
+ {
+ "accuracy": 0.9635075002443045,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9647748144343495,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9678804749855772,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.975613204529509,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9811677185352892,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9817737329285592,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870748340035789,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877749991719611,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888431992731057,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889247854589485,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905630798893981,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912089991848916,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916505919245537,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922629974607844,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952286969055422,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961338660214096,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963223397062393,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978658634063322,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989551416583708,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.42.mlp": [
+ {
+ "accuracy": 0.9316645334474742,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9335778222884983,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9425450335256755,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9455189639702439,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9656224818900228,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9683304415084422,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9721890660002828,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981844296795316,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834737943019718,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825274200411513,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846752261510119,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911125443177298,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923583892232273,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950715682061855,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954821281717159,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996249489224283,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985835553889046,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.43.self_attn": [
+ {
+ "accuracy": 0.9537865724414587,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9551741087343544,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9607197451405227,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9698350551771,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.976477846968919,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9769113931106403,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844105805968866,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849680608604103,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986211190931499,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871965564088896,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881930081755854,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889599440502934,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989799489849247,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907162202289328,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940077341743745,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952460718923248,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952964097028598,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973116889304947,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988119364206796,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.43.mlp": [
+ {
+ "accuracy": 0.9297301913611591,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9317178723867983,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.941267877118662,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9443992015440017,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9644244713708758,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9672436950495467,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9714020473184064,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810993310529739,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98275462386664,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9818051372421905,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839897501515225,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906999649247155,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919934530335013,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948506485670805,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995264163473621,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960826287424425,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985817404231057,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.44.self_attn": [
+ {
+ "accuracy": 0.9438093651551753,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9455973405856639,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9558804116677493,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9643303193151951,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9721492730313912,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9696818670490757,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9780592895112932,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.975161383044906,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9798537619644776,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9800844814162701,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868916404084302,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878041589981876,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882374077569693,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889709630515426,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932704607781488,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943467889679596,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945205381663982,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971068996092072,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984773485775804,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.44.mlp": [
+ {
+ "accuracy": 0.922890292480588,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.924928605556488,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9353087733034045,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9386197202838957,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9604559869039804,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.964205636177212,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9687150722602382,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9781123218126595,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9808429721742868,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9800167196663097,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823547832202166,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989655573212076,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911466648918577,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942131606221665,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947176255809609,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99570402916288,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983965002611512,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.45.self_attn": [
+ {
+ "accuracy": 0.9432861453387886,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9456116510555148,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9537060365546495,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.966851421049796,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9733642505016178,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9735991606721655,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9824671322712675,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828924887115136,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846992893726565,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985385547101032,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866445885854773,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875469116959721,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884863024926744,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894941137754358,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993721309438115,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994685380050214,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952394235297106,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971018662472488,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986625081146485,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.45.mlp": [
+ {
+ "accuracy": 0.9337531251367182,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9352742773480713,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9434063888620585,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9460821493994445,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9660537266172469,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9689954988425598,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9726249100640416,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820060149068013,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9838739162078127,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826479388866574,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848665873287246,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910833079484291,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924003609921783,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950582988094538,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954360906558577,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962620872247498,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985231003520312,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.46.self_attn": [
+ {
+ "accuracy": 0.9973379723232938,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975171713740565,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982815929397475,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984281775759882,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986830925190588,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987098783676629,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991939260071376,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992102088763204,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992937228125811,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993120388971874,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993242765995092,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993784863581823,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995262923948758,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995612997481658,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997043807688897,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997337334916665,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997336823016667,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998307124706116,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998758041801921,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.46.mlp": [
+ {
+ "accuracy": 0.9914991960686166,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917268468707334,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927488385292236,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930720798729453,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958052360889269,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961346215859521,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966010905336589,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978082937304862,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980109936295776,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978642705682432,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981226952077122,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988933841959806,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990413547820935,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993695393313828,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999404157664685,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994923167796514,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997564339473684,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.47.self_attn": [
+ {
+ "accuracy": 0.9942247387953103,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946865537785925,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962510646437295,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964844116329914,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972275005711708,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972892192599829,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998286331188865,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984212848285097,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985290603726753,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985686516301939,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986273442555103,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987223440039088,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990466599265346,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991211750748334,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994395356698078,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995100941032433,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995127596866951,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997207773012633,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998063149869267,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.47.mlp": [
+ {
+ "accuracy": 0.9895866647129878,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898669680696912,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911745040444657,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915538832719903,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948009337531403,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952090648002923,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957429091446102,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973041417542845,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975289096473716,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997346305361134,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976660298998468,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986318852752447,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988157128464081,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992340586177306,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992737183492864,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993743035811349,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997262309279904,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.norm.norm": null,
+ "lm_head.linear": null
+ },
+ "strategy": {
+ "model.layers.0.self_attn": {
+ "accuracy": 0.9976803997560637,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.0.mlp": {
+ "accuracy": 0.9985220660455525,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.1.self_attn": {
+ "accuracy": 0.9963489300280344,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.1.mlp": {
+ "accuracy": 0.9993296123193431,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.2.self_attn": {
+ "accuracy": 0.9997747411516684,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.2.mlp": {
+ "accuracy": 0.9998017110610817,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.3.self_attn": {
+ "accuracy": 0.9996684330690186,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.3.mlp": {
+ "accuracy": 0.9997108341558487,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.4.self_attn": {
+ "accuracy": 0.9998596248324247,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.4.mlp": {
+ "accuracy": 0.9999101926136973,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.5.self_attn": {
+ "accuracy": 0.9997582192772825,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.5.mlp": {
+ "accuracy": 0.9998637763601437,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.6.self_attn": {
+ "accuracy": 0.9996078892836522,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.6.mlp": {
+ "accuracy": 0.9996214626262372,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.7.self_attn": {
+ "accuracy": 0.9995699401733873,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.7.mlp": {
+ "accuracy": 0.9995165277887281,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.8.self_attn": {
+ "accuracy": 0.9995301622984698,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.8.mlp": {
+ "accuracy": 0.9994418839887658,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.9.self_attn": {
+ "accuracy": 0.9994058420306828,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.9.mlp": {
+ "accuracy": 0.9993462364582228,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.10.self_attn": {
+ "accuracy": 0.999758682492029,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.10.mlp": {
+ "accuracy": 0.9998092828209337,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.11.self_attn": {
+ "accuracy": 0.9995570989412954,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.11.mlp": {
+ "accuracy": 0.999716363789048,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.12.self_attn": {
+ "accuracy": 0.9993911658057186,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.12.mlp": {
+ "accuracy": 0.9992910219734767,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.13.self_attn": {
+ "accuracy": 0.9993510511812929,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.13.mlp": {
+ "accuracy": 0.9992196869279724,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.14.self_attn": {
+ "accuracy": 0.9992562690567866,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.14.mlp": {
+ "accuracy": 0.9991946576228656,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.15.self_attn": {
+ "accuracy": 0.9990211970216478,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.15.mlp": {
+ "accuracy": 0.9991551881867053,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.16.self_attn": {
+ "accuracy": 0.9996761174661515,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.16.mlp": {
+ "accuracy": 0.9997426053332674,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.17.self_attn": {
+ "accuracy": 0.9993985197106667,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.17.mlp": {
+ "accuracy": 0.9996122916345485,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.18.self_attn": {
+ "accuracy": 0.9988193828030489,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.18.mlp": {
+ "accuracy": 0.999061718754092,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.19.self_attn": {
+ "accuracy": 0.9990680614864687,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.19.mlp": {
+ "accuracy": 0.999004362798587,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.20.self_attn": {
+ "accuracy": 0.9988004061815445,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.20.mlp": {
+ "accuracy": 0.9988709868557635,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.21.self_attn": {
+ "accuracy": 0.9985773553344188,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.21.mlp": {
+ "accuracy": 0.9989376296944101,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.22.self_attn": {
+ "accuracy": 0.9996226447292429,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.22.mlp": {
+ "accuracy": 0.9996749793717754,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.23.self_attn": {
+ "accuracy": 0.9993884474752122,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.23.mlp": {
+ "accuracy": 0.9995366168823239,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.24.self_attn": {
+ "accuracy": 0.9988711644837167,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.24.mlp": {
+ "accuracy": 0.998800835244765,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.25.self_attn": {
+ "accuracy": 0.9986730054151849,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.25.mlp": {
+ "accuracy": 0.9986620795898489,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.26.self_attn": {
+ "accuracy": 0.998169852187857,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.26.mlp": {
+ "accuracy": 0.998560123873176,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.27.self_attn": {
+ "accuracy": 0.9986130048564519,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.27.mlp": {
+ "accuracy": 0.9985312398639508,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.28.self_attn": {
+ "accuracy": 0.9995850489922304,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.28.mlp": {
+ "accuracy": 0.9995884474592458,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.29.self_attn": {
+ "accuracy": 0.9993154952098848,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.29.mlp": {
+ "accuracy": 0.99946317839931,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.30.self_attn": {
+ "accuracy": 0.9985810204962036,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.30.mlp": {
+ "accuracy": 0.9986326706202817,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.31.self_attn": {
+ "accuracy": 0.9986929383812821,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.31.mlp": {
+ "accuracy": 0.9986571829067543,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.32.self_attn": {
+ "accuracy": 0.9986889937572414,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.32.mlp": {
+ "accuracy": 0.9986658465059008,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.33.self_attn": {
+ "accuracy": 0.9988134019586141,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.33.mlp": {
+ "accuracy": 0.9986808212925098,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.34.self_attn": {
+ "accuracy": 0.9997000065504835,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.34.mlp": {
+ "accuracy": 0.9995850031682494,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.35.self_attn": {
+ "accuracy": 0.999517957230637,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.35.mlp": {
+ "accuracy": 0.9994254562079732,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.36.self_attn": {
+ "accuracy": 0.9989366781519493,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.36.mlp": {
+ "accuracy": 0.9986830660418491,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.37.self_attn": {
+ "accuracy": 0.9988766744427267,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.37.mlp": {
+ "accuracy": 0.9986840636047418,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.38.self_attn": {
+ "accuracy": 0.998879095823213,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.38.mlp": {
+ "accuracy": 0.9986440870707156,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.39.self_attn": {
+ "accuracy": 0.9989914739744563,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.39.mlp": {
+ "accuracy": 0.9986363174612052,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.40.self_attn": {
+ "accuracy": 0.9997115908463456,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.40.mlp": {
+ "accuracy": 0.999586516083582,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.41.self_attn": {
+ "accuracy": 0.9995351644083712,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.41.mlp": {
+ "accuracy": 0.9994161145659746,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.42.self_attn": {
+ "accuracy": 0.9989551416583708,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.42.mlp": {
+ "accuracy": 0.9985835553889046,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.43.self_attn": {
+ "accuracy": 0.9988119364206796,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.43.mlp": {
+ "accuracy": 0.9985817404231057,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.44.self_attn": {
+ "accuracy": 0.9984773485775804,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.44.mlp": {
+ "accuracy": 0.9983965002611512,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.45.self_attn": {
+ "accuracy": 0.9986625081146485,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.45.mlp": {
+ "accuracy": 0.9985231003520312,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.46.self_attn": {
+ "accuracy": 0.9998758041801921,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.46.mlp": {
+ "accuracy": 0.9997564339473684,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.47.self_attn": {
+ "accuracy": 0.9998063149869267,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ "model.layers.47.mlp": {
+ "accuracy": 0.9997262309279904,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ },
+ "q_last_module_idx": 98
+}
\ No newline at end of file
diff --git a/measurement.json b/measurement.json
new file mode 100644
index 0000000000000000000000000000000000000000..50d825fe46a9f38e98f195eab77788d4e7067cd3
--- /dev/null
+++ b/measurement.json
@@ -0,0 +1,93655 @@
+{
+ "measurement": {
+ "model.layers.0.self_attn": [
+ {
+ "accuracy": 0.902508161496371,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9203556086868048,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9246767205186188,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.955721165984869,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9563947499264032,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9568075467832386,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.967554040485993,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9687355454079807,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9720487990416586,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9737953173462301,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9780201958492398,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9794629843672737,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9802156471414492,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822191685670987,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892206196091138,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910724487854168,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913987399486359,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943463499366771,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976803997560637,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.0.mlp": [
+ {
+ "accuracy": 0.9123474769294262,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9179784115403891,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.932591964257881,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.937203103909269,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9628385086543858,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9679891671985388,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.975108077051118,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814978303038515,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9832107973634265,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9824423746322282,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846852865885012,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910700924519915,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924008402740583,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950336539186537,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954662031377666,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970542312948965,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985220660455525,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.1.self_attn": [
+ {
+ "accuracy": 0.8877861187793314,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.8973407302983105,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.910242407117039,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9385041804052889,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.940526916179806,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9435215112753212,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9515076652169228,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.955113283591345,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.963577882386744,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9648914394201711,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9705673614516854,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9749045115895569,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9721663881791756,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9771681335987523,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837308657588437,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887261725962162,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9859461099258624,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945900982129388,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963489300280344,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.1.mlp": [
+ {
+ "accuracy": 0.9527104110457003,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9563737579155713,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9567327869590372,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9568935022689402,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930489940161351,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939982455398422,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947059626574628,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970151603920385,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970192952168873,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996223299196572,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975017743272474,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980218067430542,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983664975079591,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998927123764588,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990109881728131,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991535865574406,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993296123193431,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.2.self_attn": [
+ {
+ "accuracy": 0.9912483744265046,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916435057821218,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923406311427243,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994128311634995,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952435296145268,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953209938539658,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969110778911272,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970286335446872,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972990020178258,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974499639647547,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974876438791398,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976576824410586,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979693320783554,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981310617731651,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988565697785816,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990829429698351,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991081247353577,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995378834355506,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997747411516684,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.2.mlp": [
+ {
+ "accuracy": 0.9892762480885722,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895454781362787,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912471331772394,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991794113710057,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947041008272208,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951274986669887,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958551824238384,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972624051006278,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975140962487785,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973148557328386,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976522701763315,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998632437454944,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988308300817152,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992609197433922,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993038972388604,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994499729345989,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998017110610817,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.3.self_attn": [
+ {
+ "accuracy": 0.9899887884967029,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904003122937866,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915672297938727,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936683645646553,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944629863894079,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945710314495955,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955734857358038,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956779086787719,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961270387721015,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962668074440444,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971315091243014,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974506823491538,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997335236883373,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976795297843637,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985245664138347,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988046068392578,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987181324831909,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995058817348763,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996684330690186,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.3.mlp": [
+ {
+ "accuracy": 0.9844154579914175,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847926673828624,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871780377579853,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879374770680442,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921912606514525,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928218296554405,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938397765217815,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959715837030672,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963449053175282,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960351230402011,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965353266888997,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997980712352728,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982754946904606,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989119462479721,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989720682242478,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991756232702755,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997108341558487,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.4.self_attn": [
+ {
+ "accuracy": 0.9940207607578486,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951273926417343,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978095898550237,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977955239373841,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976957584294723,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979161136798211,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977876108750934,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997963167646958,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997377075618715,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973086231620982,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998610572751204,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987351718518767,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986403053189861,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987657150850282,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992435346975981,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994336164018023,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992440667119808,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998596248324247,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998452412955885,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.4.mlp": [
+ {
+ "accuracy": 0.9953385644475929,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954606430546846,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961155957134906,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963309457525611,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976867814257275,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978648535179673,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981546685303329,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988047819279018,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989133847047924,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988297178279026,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989727466163458,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994054905910161,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994889207046072,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996792593592545,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996966957805853,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997519157095667,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9999101926136973,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.5.self_attn": [
+ {
+ "accuracy": 0.983027494745329,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988349880441092,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933699457033072,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933736611856148,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932925261673518,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959597528359154,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934077207872178,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996139597526053,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955893133592326,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955878545151791,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972478003473952,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997818165429635,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973545127431862,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977891548696789,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987297143379692,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999152827916987,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987308212730568,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997582192772825,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996598286616063,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.5.mlp": [
+ {
+ "accuracy": 0.9925510261964519,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992728061741218,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993811263149837,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941331359441392,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962669986125547,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965560747805284,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970157300849678,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980933387705591,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982620093869627,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981101373850834,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983399104385171,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990414018138836,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991743572754785,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994895925538003,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995117338021373,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995983660337515,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998637763601437,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.6.self_attn": [
+ {
+ "accuracy": 0.9853996349847876,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894411026616581,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904139981372282,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927006095531397,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937793352000881,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939195910119452,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951347391761374,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953175328264479,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995725197615684,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959138984268066,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967400987807196,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969654745509615,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969755911588436,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997284421682707,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982928356475895,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986039540162892,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985298825704376,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994038183504017,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996078892836522,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.6.mlp": [
+ {
+ "accuracy": 0.9796033757738769,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801610637223348,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834316545748152,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844096631277353,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898302512592636,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906708221533336,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99199359229533,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947536016406957,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952311203232966,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948356513341423,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954958254238591,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973706452437909,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977587403773214,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985758317998261,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986605149169918,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989306384450174,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996214626262372,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.7.self_attn": [
+ {
+ "accuracy": 0.9842456089681946,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847515997826122,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862019201391377,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989355675119441,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918334566464182,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919354065787047,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939575470634736,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941131755767856,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950063232099637,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995209871471161,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958428621175699,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962687853985699,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962730693951016,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966743008699268,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979651567628025,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982993875746615,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998358772485517,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992254695025622,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995699401733873,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.7.mlp": [
+ {
+ "accuracy": 0.974171947222203,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9748882604762912,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9789550169371068,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801530737895519,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871387034072541,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881842365139164,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898216370493174,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933750616037287,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939624306280166,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934718561416958,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994298821548,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966810355253983,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99716517124034,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982046157965669,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983098571392475,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998642562663008,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995165277887281,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.8.self_attn": [
+ {
+ "accuracy": 0.9817969363648444,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826170109445229,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842383282957599,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987987891305238,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902089073439129,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905349539476447,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925528938765638,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929857852112036,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941390909953043,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943753193656448,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953846742282622,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957563256757567,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958568579750136,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962565110181458,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977382431097794,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981046210305067,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982187150235404,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990283438819461,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995301622984698,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.8.mlp": [
+ {
+ "accuracy": 0.9699697830947116,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9707644692389295,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9758064048364758,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9772977469256148,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850295406067744,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862511573592201,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882942370604724,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922574244847056,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929685922397766,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924017082084902,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933631024032366,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961383405752713,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966958504082868,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979089612534153,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980363582653808,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984643486241112,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994418839887658,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.9.self_attn": [
+ {
+ "accuracy": 0.9762045053066686,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.977105101919733,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9792947630630806,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844983145594597,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880177224404179,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882629308849573,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916761830099858,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919323883077595,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927965526585467,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931856998300646,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940749995876104,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945208916324191,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947839853120968,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952225348097272,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970656104414957,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975598896999145,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978074637183454,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986692166348803,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994058420306828,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.9.mlp": [
+ {
+ "accuracy": 0.9653462056303397,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.966274723643437,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9718257325002924,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9734515016898513,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827394402818754,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841620186925866,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9863689955091104,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910840421216562,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918851483380422,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912260650889948,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923439234553371,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955300039000576,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996181691181846,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975773043261142,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977210198558168,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981782444083365,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993462364582228,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.10.self_attn": [
+ {
+ "accuracy": 0.9896986646344885,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898966330802068,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959875158820068,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959419664373854,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958830689574825,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994527042581467,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958289333299035,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945835779071786,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961835095891729,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961431270785397,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976346265902976,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984772361494834,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976802616729401,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998478227716987,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987404754647287,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989050509684603,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987427304295124,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999758682492029,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996692690583586,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.10.mlp": [
+ {
+ "accuracy": 0.990279221732635,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905298211961053,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920168473036028,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924921841884498,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951802256982774,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955459609918762,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961894396110438,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975046989129623,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977252853277605,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975598426244687,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978572657200857,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987597939252737,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989334050405887,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993266929595848,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993670692056185,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994898942059081,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998092828209337,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.11.self_attn": [
+ {
+ "accuracy": 0.9863239590195008,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868265291443095,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921606028510723,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919705191277899,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919389690621756,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992855364602292,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919611498771701,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929512560192961,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936951973941177,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936885720526334,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966282595996745,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969246424443554,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966480978473555,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969086252094712,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981692790461238,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984889770348673,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981699131167261,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995570989412954,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999439229904965,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.11.mlp": [
+ {
+ "accuracy": 0.9847793944063596,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985123383696191,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875411554821767,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882662810268812,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923551889369264,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929447612666991,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939775222155731,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996076546041877,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964278059342178,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961227821040666,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965966620657127,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980314106360311,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983075839991216,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989473513051053,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989980212376395,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991948430506454,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999716363789048,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.12.self_attn": [
+ {
+ "accuracy": 0.9758020673179999,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766555116511881,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9786176779307425,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9845216747489758,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873013857286423,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874568734667264,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911378039978445,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912875468144193,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920827563619241,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928605046588928,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936999409110285,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940654404636007,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942948371754028,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947681912162807,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968999695556704,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974256125715328,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977206327166641,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986770139730652,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993911658057186,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.12.mlp": [
+ {
+ "accuracy": 0.9619943019933999,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9630233785137534,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9692408089758828,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9710238851839676,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810652892338112,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826576914638281,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851201827987097,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902692650794052,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911376674135681,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990367868449539,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916139839915559,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950910237093922,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958211112534627,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973492388962768,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974947760347277,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979947993706446,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992910219734767,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.13.self_attn": [
+ {
+ "accuracy": 0.9732593579683453,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9740604794351384,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9758571569109336,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981850401032716,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858934246003628,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9859436851111241,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901865926804021,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901829248992726,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916712933336385,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991981672210386,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932556867133826,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936436033167411,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937982890987769,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943627052416559,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966125944047235,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971977410459658,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975617044838145,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998476260661846,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993510511812929,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.13.mlp": [
+ {
+ "accuracy": 0.9590617874637246,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9601758192293346,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9669503723271191,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9689558737445623,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9794857824454084,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812142355367541,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839909761212766,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894312581745908,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903528640279546,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895940163987689,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909067719127052,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946883149386849,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954700223461259,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997104218302411,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972932919627056,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997851840693329,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992196869279724,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.14.self_attn": [
+ {
+ "accuracy": 0.9674572005169466,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9686749550746754,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9713187958113849,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9791675666347146,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834151559043676,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9835956042516045,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892363072722219,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894226833130233,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904058027314022,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911344906722661,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915743695746642,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922297771845479,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926136705034878,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931706467177719,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959881600225344,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965825928520644,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972190943808528,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980772634007735,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992562690567866,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.14.mlp": [
+ {
+ "accuracy": 0.9578170392196625,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9590193158946931,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9663224390242249,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9685161245288327,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9788233999861404,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9806237743468955,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836092637851834,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889325238764286,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900051793083549,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892134199035354,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906160493264906,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944984828180168,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953196261485573,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969970610691234,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997200449812226,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978280807699775,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991946576228656,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.15.self_attn": [
+ {
+ "accuracy": 0.9639496663585305,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9654018925502896,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9679478466277942,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9765551248565316,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810993222054094,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812793986639008,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867310639237985,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869522373774089,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879012388410047,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883594005950727,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903323815669864,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910267862142064,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912635658401996,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921826082281768,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951284613925964,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960675196198281,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962311515701003,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977348528045695,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990211970216478,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.15.mlp": [
+ {
+ "accuracy": 0.9561710001435131,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9574084184132516,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9650795814814046,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9674192051170394,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9779465935425833,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9798049030359834,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829885881626979,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884562431252562,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895629284437746,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887702904525213,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902152858558111,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942774242081214,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995120030187536,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968639670987613,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970916908496292,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977569706679787,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991551881867053,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.16.self_attn": [
+ {
+ "accuracy": 0.9918902807403356,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919913714402355,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941640416509472,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994114655040903,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943733899563085,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995450929418439,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945171528088395,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954713895567693,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960881019214867,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961418636376038,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970261527196271,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997912951730541,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969646203535376,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978629015240585,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983468192367582,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987805694254348,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983534378916374,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996761174661515,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995716315679601,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.16.mlp": [
+ {
+ "accuracy": 0.9872070293640718,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875441331532784,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897534946794622,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904376343474723,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936092627176549,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941051893692929,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950599781586789,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966741525713587,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969771623436827,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967607702419627,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971554788498906,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983539172244491,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985822996895877,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999102601341292,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999159805731324,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993414144846611,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997426053332674,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.17.self_attn": [
+ {
+ "accuracy": 0.979622381972149,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.979827641043812,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915289613127243,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915508964331821,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917960677703377,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99220766252256,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919130146445241,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920798306411598,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899292784975842,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899077572044916,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952535879274365,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955962051753886,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952810344693717,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956986843317281,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968346282839775,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975842618150637,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968424138351111,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993985197106667,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992879528981575,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.17.mlp": [
+ {
+ "accuracy": 0.9795986886601895,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9800954010570422,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836475889314897,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847203819663264,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897230201167986,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905139947659336,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920523978071287,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946800437464844,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951683254330419,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947878780949395,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954185747192241,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973539462371264,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977197637344943,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985762029973557,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998654469876783,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989491958876897,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996122916345485,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.18.self_attn": [
+ {
+ "accuracy": 0.9591868193820119,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9606455501634628,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.963689126772806,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9717553314985707,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9779619486071169,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9790570001350716,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839622974977829,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852984003955498,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869856234290637,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874973931582645,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891311423270963,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899767622118816,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899715750943869,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910780027857982,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943656724644825,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953498630784452,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956311181304045,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976474752329523,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988193828030489,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.18.mlp": [
+ {
+ "accuracy": 0.9518720533233136,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9533123800065368,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9613713058643043,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9637906108982861,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9758038044674322,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.977887489949353,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812068799510598,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873612260562368,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885673976968974,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876663720351644,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892762696254067,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937061189557426,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946411842829548,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965408335992834,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967898455652175,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974644881876884,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999061718754092,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.19.self_attn": [
+ {
+ "accuracy": 0.9581109315622598,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9596416996791959,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9630198783706874,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9733511302620173,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9787798321340233,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9791236850433052,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867767564137466,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872424571658485,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881223333068192,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891258522984572,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893496042350307,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899621973163448,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904887008597143,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912755576951895,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994794098805869,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956135836691828,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996461629998521,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975838983955327,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990680614864687,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.19.mlp": [
+ {
+ "accuracy": 0.949402768863365,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9508629126939923,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9593443237245083,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9619821181986481,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9744504160480574,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766377885825932,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.980175971868448,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865899439901114,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878867646912113,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869770252262242,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9886702921357937,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933583022502717,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943471373990178,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996348384549492,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966181806812529,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973575899493881,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999004362798587,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.20.self_attn": [
+ {
+ "accuracy": 0.9531727249268442,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9553546705283225,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9591875285841525,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9703965260414407,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9756896772887558,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9762924946844578,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842725213966332,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850752123165876,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985899701656308,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9863111876766197,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987529011850711,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885381097556092,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9890030458336696,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899866348132491,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938212371780537,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994938170624664,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954652949527372,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972972613177262,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988004061815445,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.20.mlp": [
+ {
+ "accuracy": 0.9480401042383164,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.949517953209579,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9579005774576217,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9605092275887728,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9739315117476508,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9761737691005692,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9796711904928088,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861559904529713,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875617006327957,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866939535131678,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884115278837271,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931988872704096,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942056277068332,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962179714348167,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965195794648025,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972543855546974,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988709868557635,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.21.self_attn": [
+ {
+ "accuracy": 0.9528840403072536,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9542537578381598,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9580731589812785,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9668758142506704,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9747469594003633,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9754544387105852,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98189686704427,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828780224779621,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843287441181019,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9857511474983767,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870679471641779,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881277907988988,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880640183691867,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891430772840977,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934388622350525,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947039625258185,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948006026097573,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971943795535481,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985773553344188,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.21.mlp": [
+ {
+ "accuracy": 0.9452786394394934,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9468083463143557,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9557567997835577,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9585219516884536,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9725894997827709,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9748451801715419,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9785158903105184,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9856567675014958,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869675827212632,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9860588647425175,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878197305952199,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928987653984223,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939260903047398,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960985295183491,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963896564295283,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971529023896437,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989376296944101,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.22.self_attn": [
+ {
+ "accuracy": 0.9910135175450705,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912824938655831,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941338914213702,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941643851052504,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945628636050969,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953863266273402,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948620661452878,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955346850911155,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961977519269567,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962104187288787,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976131010480458,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973233057680773,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976833624823485,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997326233657077,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980814326991094,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988377549670986,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980935500643682,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996226447292429,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994861224859051,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.22.mlp": [
+ {
+ "accuracy": 0.9842658781562932,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846874004579149,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872097237966955,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880227182293311,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921422847255599,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927411952521652,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993845232820604,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959024451673031,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962649137887638,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960189153935062,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964987884595757,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997976030492282,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982532844805974,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988919206953142,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989660839673888,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991774850786896,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996749793717754,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.23.self_attn": [
+ {
+ "accuracy": 0.9837653411086649,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840333891916089,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904419900849462,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904393116594292,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907222538604401,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885966366273351,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906790258246474,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888943562982604,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991739244927885,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916874898481183,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955156600044575,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954305627325084,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955371092073619,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954360554984305,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968435579503421,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997513074951712,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968456693313783,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993884474752122,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992451430443907,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.23.mlp": [
+ {
+ "accuracy": 0.9759478892665356,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9765194484498352,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803963751764968,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815706800436601,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878923632204533,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888132131309249,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904801220982336,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937392670253757,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994295743497787,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938581893220544,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99459478398785,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996881335915532,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973090803541709,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983202805378824,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984141130771604,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987336630001664,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995366168823239,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.24.self_attn": [
+ {
+ "accuracy": 0.9556966400705278,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9570334849413484,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.960812549572438,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9703220267547294,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9772858648793772,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9778754758881405,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9854066136176698,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862237990018912,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869342013844289,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879776879679412,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887068463722244,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893193228635937,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898968231282197,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907162148156203,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942090424301568,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953937472309917,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957477512361947,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974927010043757,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988711644837167,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.24.mlp": [
+ {
+ "accuracy": 0.939760722219944,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9414845332503319,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9511478268541396,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.954194939462468,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9698484404943883,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.972340663196519,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9763197798747569,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841194238979369,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855774453608319,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846697235479951,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866198089439422,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921901000780053,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933257188240532,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956658300361596,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960375506052515,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968587130861124,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998800835244765,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.25.self_attn": [
+ {
+ "accuracy": 0.9503053929656744,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9525580331683159,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9565810647327453,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9682650562608615,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9738911159802228,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9746871701208875,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981898903963156,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829692101338878,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843669487163424,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848410444683395,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867640393786132,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879159886040725,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881031874101609,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892810968449339,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934559455141425,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945060422469396,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950925561715849,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970777546841418,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986730054151849,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.25.mlp": [
+ {
+ "accuracy": 0.9339546884875745,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9359053233638406,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9467246429994702,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9503237595781684,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9668553009396419,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9696119821164757,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9741377918981016,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823939295019954,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840543393511325,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831261212239042,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985276190796867,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913982859579846,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926482754817698,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951997506432235,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956310951092746,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966006540635135,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986620795898489,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.26.self_attn": [
+ {
+ "accuracy": 0.950924233533442,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9526217842940241,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9560005511157215,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9651925785001367,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.974756367970258,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9754410153254867,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821089534088969,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829862234182656,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9833339191973209,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847710862522945,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871181106427684,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879768814425915,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882259144214913,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893726909067482,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922168490593322,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943546110007446,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933991891448386,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969799995742505,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998169852187857,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.26.mlp": [
+ {
+ "accuracy": 0.930072165094316,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9321989654563367,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9437624660786241,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9476719659287483,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9649747095536441,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9678829013137147,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.972721746424213,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812747784890234,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9830624930327758,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821682657347992,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844429356744513,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909043678781018,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992228524555685,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948874444817193,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953900648106355,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996433951149811,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998560123873176,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.27.self_attn": [
+ {
+ "accuracy": 0.9506571341771632,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.95194994029589,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9554108332376927,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.965399626409635,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.973783713998273,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9746870229719207,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815001173410565,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827296736184508,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836740818573162,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849110426730476,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869808834628202,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881130746216513,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988304317521397,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895301525830291,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934029994474258,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946216194075532,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949501673690975,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971529373142403,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986130048564519,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.27.mlp": [
+ {
+ "accuracy": 0.9300565491430461,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9322172533720732,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9436244373209774,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9475068859755993,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9649612298235297,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9679478236939758,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9727855251403525,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812858692603186,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9830675406847149,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821449116570875,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844449243973941,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908643537783064,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922332215355709,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948491686081979,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995345929695759,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963671152072493,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985312398639508,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.28.self_attn": [
+ {
+ "accuracy": 0.9852219514432363,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9854540597880259,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992581088328734,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925188558117952,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992044290585909,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908394656376913,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921488391701132,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909024515654892,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931627179321367,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933414816041477,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962164734170074,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975688391277799,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962678278825479,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975411284103757,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981369049564819,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986740430613281,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981451820058282,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995850489922304,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994992236806866,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.28.mlp": [
+ {
+ "accuracy": 0.9810774835059419,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9816042414167896,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.984690910903737,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9857331512612291,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99059497367125,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913256106083281,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926997422298882,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950385903939605,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954885762417689,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952267833868973,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958167897420935,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975691078143427,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979071016423404,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986456794285914,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987542949966155,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990193766789162,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995884474592458,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.29.self_attn": [
+ {
+ "accuracy": 0.9801707121077925,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803194139385596,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9864167625200935,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861038540257141,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867335963645019,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873976144008338,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986821249302011,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876039975206368,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904251811094582,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903880580095574,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943270806106739,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950134730897844,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943478996865451,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949842338974122,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966566005459754,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976610257581342,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966578336025123,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993154952098848,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991263385672937,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.29.mlp": [
+ {
+ "accuracy": 0.9730628487886861,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.973774635582231,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9779872408835217,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9792550611309707,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865127976518124,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875629763700999,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893352640210651,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929699986823834,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993587943376042,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931577415263746,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939938187017106,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965159249695716,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970039997715503,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980992393029737,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982241835823515,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985617383063072,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99946317839931,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.30.self_attn": [
+ {
+ "accuracy": 0.960547131486237,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9619710550177842,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9641266402322799,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9726306177908555,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9774892054265365,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.978482106118463,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831622139317915,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843430254259147,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861415453488007,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869873017305508,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988334433583077,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893150111893192,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892438768292777,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901899283868261,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938217146846,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951323153509293,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948282100085635,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976286789024016,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985810204962036,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.30.mlp": [
+ {
+ "accuracy": 0.9314752910286188,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9334497996605933,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9442711595911533,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9477534026373178,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9658016683533788,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9686134352814406,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731169363949448,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820591834140942,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836634104140103,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826249615289271,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.984836339019239,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911455910187215,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924397784052417,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951095145952422,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954999905312434,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996416717855027,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986326706202817,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.31.self_attn": [
+ {
+ "accuracy": 0.9617549669928849,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9633937487378716,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9656938512343913,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.973594733630307,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9796587142627686,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803864565910771,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848732578684576,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858064573490992,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872101178625599,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877110663801432,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895211174734868,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901978281559423,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903222138527781,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910594234825112,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943284161563497,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953842611867003,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952440769411623,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976959823688958,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986929383812821,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.31.mlp": [
+ {
+ "accuracy": 0.9317440665327013,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9337658795993775,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9442870973143727,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9475885890424252,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9661285383626819,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.968816627166234,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731064232764766,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823057807516307,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9838297074311413,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828014142112806,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849381670355797,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912480839411728,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925001342198811,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951820038841106,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955530187871773,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964022201456828,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986571829067543,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.32.self_attn": [
+ {
+ "accuracy": 0.9678770737955347,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9691678545204923,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9709918311564252,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9765152925392613,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807526028016582,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820293172961101,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841885701171122,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855642172624357,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866925096139312,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870485090650618,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897773859556764,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902223890530877,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902153389411978,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909679422853515,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942741809645668,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955411965493113,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948284650454298,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980486317217583,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986889937572414,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.32.mlp": [
+ {
+ "accuracy": 0.932304578833282,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9342344424221665,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9444406803231686,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.947586020687595,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9662447790615261,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9689184916205704,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731233624042943,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98242219875101,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839446639525704,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828673212323338,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849894531071186,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912908738479018,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925277467991691,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952316170674749,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955659814586397,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964031476119999,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986658465059008,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.33.self_attn": [
+ {
+ "accuracy": 0.9655442168004811,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9671692294068635,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9697359376586974,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9761799619300291,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803578312275931,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814343429170549,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843596651917323,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858793318271637,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869456980959512,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874889963539317,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896778290858492,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908777676173486,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990540920291096,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991681206272915,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948772302595899,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958305972686503,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956192086392548,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997941504814662,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988134019586141,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.33.mlp": [
+ {
+ "accuracy": 0.9325818351935595,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9344900948926806,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9443744430318475,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9474421949125826,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9663861433509737,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9690310031874105,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731040432816371,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9824838677886873,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839952855254523,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829381878953427,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850510018295608,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913280457840301,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925565869780257,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952473715238739,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995588112360565,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963884983881144,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986808212925098,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.34.self_attn": [
+ {
+ "accuracy": 0.9875456623267382,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879995033843443,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943620861449745,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943356119911186,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945071052061394,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966131598775974,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945579385675956,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965798364137299,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963930066151079,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963805679144571,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978544423211133,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978882907453226,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979100275522796,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979497697204351,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985688193410169,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992296196833195,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985732258646749,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997000065504835,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999592912312437,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.34.mlp": [
+ {
+ "accuracy": 0.9806502945721149,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981183686060831,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839521977119148,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848100668168627,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903757648426108,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911127613158897,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922769031254575,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950082659197506,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954419361311011,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951236607448664,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957144900981802,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975175310974009,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978603233030299,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986405047020526,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987238197063562,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989435812094598,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995850031682494,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.35.self_attn": [
+ {
+ "accuracy": 0.9871479421271943,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880682986695319,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990131174272392,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898401049431413,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900419003679417,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925147893663961,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990109168633353,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992998635803815,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936714109790046,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937381937343162,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962826371920528,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965584015008062,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964808065269608,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966537497966783,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974247746868059,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984650467449683,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99742937242263,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999517957230637,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992533045333403,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.35.mlp": [
+ {
+ "accuracy": 0.9704249275382608,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9712161585921422,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9754329854622483,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766340904170647,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985306327464059,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9864371244912036,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881206953432411,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924467977543827,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930931831186172,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925579508999363,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934620868298225,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962237248546444,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967452616401715,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979627527136472,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998072977818083,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983912546158535,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994254562079732,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.36.self_attn": [
+ {
+ "accuracy": 0.965649431804195,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9668996206019074,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9694499158067629,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9755590457934886,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807646207045764,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981554796686396,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9854566292488016,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9863431870471686,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881037988234311,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988736093393527,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904858563677408,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914403361617588,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915653750649653,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924774839309976,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99530891442555,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961586743447697,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996117347778636,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978859801776707,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989366781519493,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.36.mlp": [
+ {
+ "accuracy": 0.9328135452233255,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9347493099048734,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9444518794771284,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9474378717131913,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.966479453491047,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9691691853804514,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731421418255195,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.982578118913807,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840653012506664,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829857222503051,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851169427274726,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913530095946044,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925886914716102,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952522270032205,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956026887812186,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963736499339575,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986830660418491,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.37.self_attn": [
+ {
+ "accuracy": 0.9632973142433912,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9647458797553554,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9677944149589166,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9744596001692116,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9805283090099692,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815037929220125,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855152769596316,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868621388450265,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877731337910518,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881437398144044,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905192241421901,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911876061523799,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915461755299475,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924024375504814,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948597592592705,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961402852204628,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957100535684731,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979259906467632,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988766744427267,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.37.mlp": [
+ {
+ "accuracy": 0.9317238796502352,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9336495161987841,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9432325733359903,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9462370390538126,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.965948719996959,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9686132016358897,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9725569972069934,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822160669136792,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837689126143232,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827277816366404,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848514216719195,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912281962460838,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924635672650766,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951937775185797,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955459379707463,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996324696301599,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986840636047418,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.38.self_attn": [
+ {
+ "accuracy": 0.9624556459020823,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9638149088714272,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.966128898318857,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731651132460684,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.979888558271341,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807631041621789,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9854432169813663,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865111434482969,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881467132945545,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988405313459225,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900051496224478,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905749676981941,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908825725433417,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917469431529753,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947028355381917,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959433214535238,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956959496485069,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977895863121375,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998879095823213,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.38.mlp": [
+ {
+ "accuracy": 0.9317085896618664,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9336435773875564,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.942958232248202,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9459079855587333,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9658660412533209,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9685534046730027,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.97245246428065,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821948111057281,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837224487564526,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826810664962977,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848097579088062,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912046356475912,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924372929672245,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951681817183271,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995533375418745,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962979012634605,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986440870707156,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.39.self_attn": [
+ {
+ "accuracy": 0.9656216644216329,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9668171582743526,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9708404617849737,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9773957233410329,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9818287189118564,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825881774304435,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866229476756416,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877469159546308,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876828042906709,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885670749936253,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910485846921802,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914193961885758,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918927509279456,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924896143202204,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954261311504524,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962042814731831,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963586331723491,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980747035369859,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989914739744563,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.39.mlp": [
+ {
+ "accuracy": 0.9323020433075726,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9342701844871044,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9433167146053165,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9462174526415765,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9660997575847432,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9687749480362982,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9725526751717553,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822096759453416,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837591436225921,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828054906101897,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849168861983344,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991255592321977,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924872927949764,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951640699291602,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955584222916514,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962992245709756,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986363174612052,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.40.self_attn": [
+ {
+ "accuracy": 0.9923371153709013,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925224412581883,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954915028938558,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99549823399866,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995459749334259,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971324702346465,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955590646713972,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972384700377006,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975309232977452,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975821622356307,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985752828288241,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984370510574081,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985908498783829,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998461567243794,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989729408916901,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991191346489359,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989771689652116,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997115908463456,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999673805286875,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.40.mlp": [
+ {
+ "accuracy": 0.9811925254762173,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9817058588378131,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842266714549623,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849823992699385,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905871613882482,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913147779298015,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923701590159908,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951182099757716,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955421296763234,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952246638713405,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958023339568172,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975680393690709,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979028400339303,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986676175176399,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987487139806035,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989479207215481,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999586516083582,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.41.self_attn": [
+ {
+ "accuracy": 0.9863162519177422,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870703057968058,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923058476706501,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921633902995382,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924568198912311,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943161772680469,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924855433346238,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943068775464781,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946824459475465,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947726883983705,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960416847025044,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970286915777251,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961076912295539,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971158314438071,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978602468036115,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983802583883516,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978632671263767,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995351644083712,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993034406106744,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.41.mlp": [
+ {
+ "accuracy": 0.9722781821619719,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9730187952518463,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9761316905496642,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.97704425919801,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861467212904245,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872288115439005,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885241388110444,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927656080690213,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933978671906516,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929643408977427,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993827001657337,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964197074586991,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969165127258748,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980382031499175,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981734739849344,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984235907904804,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994161145659746,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.42.self_attn": [
+ {
+ "accuracy": 0.9635075002443045,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9647748144343495,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9678804749855772,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.975613204529509,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9811677185352892,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9817737329285592,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870748340035789,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877749991719611,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888431992731057,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889247854589485,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905630798893981,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912089991848916,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916505919245537,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922629974607844,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952286969055422,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961338660214096,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963223397062393,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978658634063322,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989551416583708,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.42.mlp": [
+ {
+ "accuracy": 0.9316645334474742,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9335778222884983,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9425450335256755,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9455189639702439,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9656224818900228,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9683304415084422,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9721890660002828,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981844296795316,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834737943019718,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825274200411513,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846752261510119,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911125443177298,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923583892232273,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950715682061855,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954821281717159,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996249489224283,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985835553889046,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.43.self_attn": [
+ {
+ "accuracy": 0.9537865724414587,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9551741087343544,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9607197451405227,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9698350551771,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.976477846968919,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9769113931106403,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844105805968866,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849680608604103,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986211190931499,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871965564088896,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881930081755854,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889599440502934,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989799489849247,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907162202289328,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940077341743745,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952460718923248,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952964097028598,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973116889304947,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988119364206796,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.43.mlp": [
+ {
+ "accuracy": 0.9297301913611591,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9317178723867983,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.941267877118662,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9443992015440017,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9644244713708758,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9672436950495467,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9714020473184064,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810993310529739,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98275462386664,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9818051372421905,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839897501515225,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906999649247155,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919934530335013,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948506485670805,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995264163473621,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960826287424425,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985817404231057,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.44.self_attn": [
+ {
+ "accuracy": 0.9438093651551753,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9455973405856639,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9558804116677493,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9643303193151951,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9721492730313912,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9696818670490757,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9780592895112932,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.975161383044906,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9798537619644776,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9800844814162701,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868916404084302,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878041589981876,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882374077569693,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889709630515426,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932704607781488,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943467889679596,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945205381663982,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971068996092072,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984773485775804,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.44.mlp": [
+ {
+ "accuracy": 0.922890292480588,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.924928605556488,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9353087733034045,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9386197202838957,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9604559869039804,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.964205636177212,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9687150722602382,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9781123218126595,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9808429721742868,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9800167196663097,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823547832202166,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989655573212076,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911466648918577,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942131606221665,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947176255809609,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99570402916288,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983965002611512,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.45.self_attn": [
+ {
+ "accuracy": 0.9432861453387886,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9456116510555148,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9537060365546495,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.966851421049796,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9733642505016178,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9735991606721655,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9824671322712675,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828924887115136,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846992893726565,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985385547101032,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866445885854773,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875469116959721,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884863024926744,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894941137754358,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993721309438115,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994685380050214,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952394235297106,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971018662472488,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986625081146485,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.45.mlp": [
+ {
+ "accuracy": 0.9337531251367182,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9352742773480713,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9434063888620585,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9460821493994445,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9660537266172469,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9689954988425598,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9726249100640416,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820060149068013,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9838739162078127,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826479388866574,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848665873287246,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910833079484291,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924003609921783,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950582988094538,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954360906558577,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962620872247498,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985231003520312,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.46.self_attn": [
+ {
+ "accuracy": 0.9973379723232938,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975171713740565,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982815929397475,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984281775759882,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986830925190588,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987098783676629,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991939260071376,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992102088763204,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992937228125811,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993120388971874,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993242765995092,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993784863581823,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995262923948758,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995612997481658,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997043807688897,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997337334916665,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997336823016667,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998307124706116,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998758041801921,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.46.mlp": [
+ {
+ "accuracy": 0.9914991960686166,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917268468707334,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927488385292236,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930720798729453,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958052360889269,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961346215859521,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966010905336589,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978082937304862,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980109936295776,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978642705682432,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981226952077122,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988933841959806,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990413547820935,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993695393313828,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999404157664685,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994923167796514,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997564339473684,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.47.self_attn": [
+ {
+ "accuracy": 0.9942247387953103,
+ "total_bits": 89665536,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946865537785925,
+ "total_bits": 92221440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962510646437295,
+ "total_bits": 95758848,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964844116329914,
+ "total_bits": 112272384,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972275005711708,
+ "total_bits": 132913152,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972892192599829,
+ "total_bits": 132980224,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998286331188865,
+ "total_bits": 169613312,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984212848285097,
+ "total_bits": 169745920,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985290603726753,
+ "total_bits": 171195392,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985686516301939,
+ "total_bits": 173563904,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986273442555103,
+ "total_bits": 174923264,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987223440039088,
+ "total_bits": 175750144,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990466599265346,
+ "total_bits": 179253248,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991211750748334,
+ "total_bits": 181592064,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994395356698078,
+ "total_bits": 220469248,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995100941032433,
+ "total_bits": 223535104,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995127596866951,
+ "total_bits": 253499392,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997207773012633,
+ "total_bits": 265838592,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998063149869267,
+ "total_bits": 337385472,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.47.mlp": [
+ {
+ "accuracy": 0.9895866647129878,
+ "total_bits": 395461696,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898669680696912,
+ "total_bits": 409224256,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911745040444657,
+ "total_bits": 457272320,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915538832719903,
+ "total_bits": 512977920,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948009337531403,
+ "total_bits": 578397280,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952090648002923,
+ "total_bits": 592872448,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957429091446102,
+ "total_bits": 637454432,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973041417542845,
+ "total_bits": 728741472,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975289096473716,
+ "total_bits": 739476480,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997346305361134,
+ "total_bits": 751543392,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976660298998468,
+ "total_bits": 766018560,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986318852752447,
+ "total_bits": 924689504,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988157128464081,
+ "total_bits": 939164672,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992340586177306,
+ "total_bits": 1069524064,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992737183492864,
+ "total_bits": 1110384896,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993743035811349,
+ "total_bits": 1209999616,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997262309279904,
+ "total_bits": 1415520512,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.norm.norm": null,
+ "lm_head.linear": null
+ },
+ "last_module_idx": 98
+}
\ No newline at end of file
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..fd50792103e113781267be8add7e594d9a147499
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1,442 @@
+{
+ "metadata": {
+ "total_size": 24952840192
+ },
+ "weight_map": {
+ "lm_head.weight": "model-00006-of-00006.safetensors",
+ "model.embed_tokens.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.19.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.19.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.19.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.20.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.29.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.29.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.29.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.29.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.29.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.30.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.38.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.38.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.38.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.38.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.38.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.38.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.38.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.38.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.38.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.39.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.40.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.norm.weight": "model-00005-of-00006.safetensors"
+ }
+}
diff --git a/mtbench-comparison.png b/mtbench-comparison.png
new file mode 100644
index 0000000000000000000000000000000000000000..87fb2c26715940a222e99a74c5562402dca6d40f
Binary files /dev/null and b/mtbench-comparison.png differ
diff --git a/needle-in-a-haystack.txt b/needle-in-a-haystack.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43b6cd4182b1d1ffafd0cae6ea2a51af9579becf
--- /dev/null
+++ b/needle-in-a-haystack.txt
@@ -0,0 +1,898 @@
+what is the random number?
+```
+May 2006(This essay is derived from a keynote at Xtech.)Could you reproduce Silicon Valley elsewhere, or is there something
+unique about it?It wouldn't be surprising if it were hard to reproduce in other
+countries, because you couldn't reproduce it in most of the US
+either. What does it take to make a silicon valley even here?What it takes is the right people. If you could get the right ten
+thousand people to move from Silicon Valley to Buffalo, Buffalo
+would become Silicon Valley.
+[1]That's a striking departure from the past. Up till a couple decades
+ago, geography was destiny for cities. All great cities were located
+on waterways, because cities made money by trade, and water was the
+only economical way to ship.Now you could make a great city anywhere, if you could get the right
+people to move there. So the question of how to make a silicon
+valley becomes: who are the right people, and how do you get them
+to move?Two TypesI think you only need two kinds of people to create a technology
+hub: rich people and nerds. They're the limiting reagents in the
+reaction that produces startups, because they're the only ones
+present when startups get started. Everyone else will move.Observation bears this out: within the US, towns have become startup
+hubs if and only if they have both rich people and nerds. Few
+startups happen in Miami, for example, because although it's full
+of rich people, it has few nerds. It's not the kind of place nerds
+like.Whereas Pittsburgh has the opposite problem: plenty of nerds, but
+no rich people. The top US Computer Science departments are said
+to be MIT, Stanford, Berkeley, and Carnegie-Mellon. MIT yielded
+Route 128. Stanford and Berkeley yielded Silicon Valley. But
+Carnegie-Mellon? The record skips at that point. Lower down the
+list, the University of Washington yielded a high-tech community
+in Seattle, and the University of Texas at Austin yielded one in
+Austin. But what happened in Pittsburgh? And in Ithaca, home of
+Cornell, which is also high on the list?I grew up in Pittsburgh and went to college at Cornell, so I can
+answer for both. The weather is terrible, particularly in winter,
+and there's no interesting old city to make up for it, as there is
+in Boston. Rich people don't want to live in Pittsburgh or Ithaca.
+So while there are plenty of hackers who could start startups,
+there's no one to invest in them.Not BureaucratsDo you really need the rich people? Wouldn't it work to have the
+government invest in the nerds? No, it would not. Startup investors
+are a distinct type of rich people. They tend to have a lot of
+experience themselves in the technology business. This (a) helps
+them pick the right startups, and (b) means they can supply advice
+and connections as well as money. And the fact that they have a
+personal stake in the outcome makes them really pay attention.Bureaucrats by their nature are the exact opposite sort of people
+from startup investors. The idea of them making startup investments
+is comic. It would be like mathematicians running Vogue-- or
+perhaps more accurately, Vogue editors running a math journal.
+[2]Though indeed, most things bureaucrats do, they do badly. We just
+don't notice usually, because they only have to compete against
+other bureaucrats. But as startup investors they'd have to compete
+against pros with a great deal more experience and motivation.Even corporations that have in-house VC groups generally forbid
+them to make their own investment decisions. Most are only allowed
+to invest in deals where some reputable private VC firm is willing
+to act as lead investor.Not BuildingsIf you go to see Silicon Valley, what you'll see are buildings.
+But it's the people that make it Silicon Valley, not the buildings.
+I read occasionally about attempts to set up "technology
+parks" in other places, as if the active ingredient of Silicon
+Valley were the office space. An article about Sophia Antipolis
+bragged that companies there included Cisco, Compaq, IBM, NCR, and
+Nortel. Don't the French realize these aren't startups?Building office buildings for technology companies won't get you a
+silicon valley, because the key stage in the life of a startup
+happens before they want that kind of space. The key stage is when
+they're three guys operating out of an apartment. Wherever the
+startup is when it gets funded, it will stay. The defining quality
+of Silicon Valley is not that Intel or Apple or Google have offices
+there, but that they were started there.So if you want to reproduce Silicon Valley, what you need to reproduce
+is those two or three founders sitting around a kitchen table
+deciding to start a company. And to reproduce that you need those
+people.UniversitiesThe exciting thing is, all you need are the people. If you could
+attract a critical mass of nerds and investors to live somewhere,
+you could reproduce Silicon Valley. And both groups are highly
+mobile. They'll go where life is good. So what makes a place good
+to them?What nerds like is other nerds. Smart people will go wherever other
+smart people are. And in particular, to great universities. In
+theory there could be other ways to attract them, but so far
+universities seem to be indispensable. Within the US, there are
+no technology hubs without first-rate universities-- or at least,
+first-rate computer science departments.So if you want to make a silicon valley, you not only need a
+university, but one of the top handful in the world. It has to be
+good enough to act as a magnet, drawing the best people from thousands
+of miles away. And that means it has to stand up to existing magnets
+like MIT and Stanford.This sounds hard. Actually it might be easy. My professor friends,
+when they're deciding where they'd like to work, consider one thing
+above all: the quality of the other faculty. What attracts professors
+is good colleagues. So if you managed to recruit, en masse, a
+significant number of the best young researchers, you could create
+a first-rate university from nothing overnight. And you could do
+that for surprisingly little. If you paid 200 people hiring bonuses
+of $3 million apiece, you could put together a faculty that would
+bear comparison with any in the world. And from that point the
+chain reaction would be self-sustaining. So whatever it costs to
+establish a mediocre university, for an additional half billion or
+so you could have a great one.
+Random number is 23.
+[3]PersonalityHowever, merely creating a new university would not be enough to
+start a silicon valley. The university is just the seed. It has
+to be planted in the right soil, or it won't germinate. Plant it
+in the wrong place, and you just create Carnegie-Mellon.To spawn startups, your university has to be in a town that has
+attractions other than the university. It has to be a place where
+investors want to live, and students want to stay after they graduate.The two like much the same things, because most startup investors
+are nerds themselves. So what do nerds look for in a town? Their
+tastes aren't completely different from other people's, because a
+lot of the towns they like most in the US are also big tourist
+destinations: San Francisco, Boston, Seattle. But their tastes
+can't be quite mainstream either, because they dislike other big
+tourist destinations, like New York, Los Angeles, and Las Vegas.There has been a lot written lately about the "creative class." The
+thesis seems to be that as wealth derives increasingly from ideas,
+cities will prosper only if they attract those who have them. That
+is certainly true; in fact it was the basis of Amsterdam's prosperity
+400 years ago.A lot of nerd tastes they share with the creative class in general.
+For example, they like well-preserved old neighborhoods instead of
+cookie-cutter suburbs, and locally-owned shops and restaurants
+instead of national chains. Like the rest of the creative class,
+they want to live somewhere with personality.What exactly is personality? I think it's the feeling that each
+building is the work of a distinct group of people. A town with
+personality is one that doesn't feel mass-produced. So if you want
+to make a startup hub-- or any town to attract the "creative class"--
+you probably have to ban large development projects.
+When a large tract has been developed by a single organization, you
+can always tell.
+[4]Most towns with personality are old, but they don't have to be.
+Old towns have two advantages: they're denser, because they were
+laid out before cars, and they're more varied, because they were
+built one building at a time. You could have both now. Just have
+building codes that ensure density, and ban large scale developments.A corollary is that you have to keep out the biggest developer of
+all: the government. A government that asks "How can we build a
+silicon valley?" has probably ensured failure by the way they framed
+the question. You don't build a silicon valley; you let one grow.NerdsIf you want to attract nerds, you need more than a town with
+personality. You need a town with the right personality. Nerds
+are a distinct subset of the creative class, with different tastes
+from the rest. You can see this most clearly in New York, which
+attracts a lot of creative people, but few nerds.
+[5]What nerds like is the kind of town where people walk around smiling.
+This excludes LA, where no one walks at all, and also New York,
+where people walk, but not smiling. When I was in grad school in
+Boston, a friend came to visit from New York. On the subway back
+from the airport she asked "Why is everyone smiling?" I looked and
+they weren't smiling. They just looked like they were compared to
+the facial expressions she was used to.If you've lived in New York, you know where these facial expressions
+come from. It's the kind of place where your mind may be excited,
+but your body knows it's having a bad time. People don't so much
+enjoy living there as endure it for the sake of the excitement.
+And if you like certain kinds of excitement, New York is incomparable.
+It's a hub of glamour, a magnet for all the shorter half-life
+isotopes of style and fame.Nerds don't care about glamour, so to them the appeal of New York
+is a mystery. People who like New York will pay a fortune for a
+small, dark, noisy apartment in order to live in a town where the
+cool people are really cool. A nerd looks at that deal and sees
+only: pay a fortune for a small, dark, noisy apartment.Nerds will pay a premium to live in a town where the smart people
+are really smart, but you don't have to pay as much for that. It's
+supply and demand: glamour is popular, so you have to pay a lot for
+it.Most nerds like quieter pleasures. They like cafes instead of
+clubs; used bookshops instead of fashionable clothing shops; hiking
+instead of dancing; sunlight instead of tall buildings. A nerd's
+idea of paradise is Berkeley or Boulder.YouthIt's the young nerds who start startups, so it's those specifically
+the city has to appeal to. The startup hubs in the US are all
+young-feeling towns. This doesn't mean they have to be new.
+Cambridge has the oldest town plan in America, but it feels young
+because it's full of students.What you can't have, if you want to create a silicon valley, is a
+large, existing population of stodgy people. It would be a waste
+of time to try to reverse the fortunes of a declining industrial town
+like Detroit or Philadelphia by trying to encourage startups. Those
+places have too much momentum in the wrong direction. You're better
+off starting with a blank slate in the form of a small town. Or
+better still, if there's a town young people already flock to, that
+one.The Bay Area was a magnet for the young and optimistic for decades
+before it was associated with technology. It was a place people
+went in search of something new. And so it became synonymous with
+California nuttiness. There's still a lot of that there. If you
+wanted to start a new fad-- a new way to focus one's "energy," for
+example, or a new category of things not to eat-- the Bay Area would
+be the place to do it. But a place that tolerates oddness in the
+search for the new is exactly what you want in a startup hub, because
+economically that's what startups are. Most good startup ideas
+seem a little crazy; if they were obviously good ideas, someone
+would have done them already.(How many people are going to want computers in their houses?
+What, another search engine?)That's the connection between technology and liberalism. Without
+exception the high-tech cities in the US are also the most liberal.
+But it's not because liberals are smarter that this is so. It's
+because liberal cities tolerate odd ideas, and smart people by
+definition have odd ideas.Conversely, a town that gets praised for being "solid" or representing
+"traditional values" may be a fine place to live, but it's never
+going to succeed as a startup hub. The 2004 presidential election,
+though a disaster in other respects, conveniently supplied us with
+a county-by-county
+map of such places.
+[6]To attract the young, a town must have an intact center. In most
+American cities the center has been abandoned, and the growth, if
+any, is in the suburbs. Most American cities have been turned
+inside out. But none of the startup hubs has: not San Francisco,
+or Boston, or Seattle. They all have intact centers.
+[7]
+My guess is that no city with a dead center could be turned into a
+startup hub. Young people don't want to live in the suburbs.Within the US, the two cities I think could most easily be turned
+into new silicon valleys are Boulder and Portland. Both have the
+kind of effervescent feel that attracts the young. They're each
+only a great university short of becoming a silicon valley, if they
+wanted to.TimeA great university near an attractive town. Is that all it takes?
+That was all it took to make the original Silicon Valley. Silicon
+Valley traces its origins to William Shockley, one of the inventors
+of the transistor. He did the research that won him the Nobel Prize
+at Bell Labs, but when he started his own company in 1956 he moved
+to Palo Alto to do it. At the time that was an odd thing to do.
+Why did he? Because he had grown up there and remembered how nice
+it was. Now Palo Alto is suburbia, but then it was a charming
+college town-- a charming college town with perfect weather and San
+Francisco only an hour away.The companies that rule Silicon Valley now are all descended in
+various ways from Shockley Semiconductor. Shockley was a difficult
+man, and in 1957 his top people-- "the traitorous eight"-- left to
+start a new company, Fairchild Semiconductor. Among them were
+Gordon Moore and Robert Noyce, who went on to found Intel, and
+Eugene Kleiner, who founded the VC firm Kleiner Perkins. Forty-two
+years later, Kleiner Perkins funded Google, and the partner responsible
+for the deal was John Doerr, who came to Silicon Valley in 1974 to
+work for Intel.So although a lot of the newest companies in Silicon Valley don't
+make anything out of silicon, there always seem to be multiple links
+back to Shockley. There's a lesson here: startups beget startups.
+People who work for startups start their own. People who get rich
+from startups fund new ones. I suspect this kind of organic growth
+is the only way to produce a startup hub, because it's the only way
+to grow the expertise you need.That has two important implications. The first is that you need
+time to grow a silicon valley. The university you could create in
+a couple years, but the startup community around it has to grow
+organically. The cycle time is limited by the time it takes a
+company to succeed, which probably averages about five years.The other implication of the organic growth hypothesis is that you
+can't be somewhat of a startup hub. You either have a self-sustaining
+chain reaction, or not. Observation confirms this too: cities
+either have a startup scene, or they don't. There is no middle
+ground. Chicago has the third largest metropolitan area in America.
+As source of startups it's negligible compared to Seattle, number 15.The good news is that the initial seed can be quite small. Shockley
+Semiconductor, though itself not very successful, was big enough.
+It brought a critical mass of experts in an important new technology
+together in a place they liked enough to stay.CompetingOf course, a would-be silicon valley faces an obstacle the original
+one didn't: it has to compete with Silicon Valley. Can that be
+done? Probably.One of Silicon Valley's biggest advantages is its venture capital
+firms. This was not a factor in Shockley's day, because VC funds
+didn't exist. In fact, Shockley Semiconductor and Fairchild
+Semiconductor were not startups at all in our sense. They were
+subsidiaries-- of Beckman Instruments and Fairchild Camera and
+Instrument respectively. Those companies were apparently willing
+to establish subsidiaries wherever the experts wanted to live.Venture investors, however, prefer to fund startups within an hour's
+drive. For one, they're more likely to notice startups nearby.
+But when they do notice startups in other towns they prefer them
+to move. They don't want to have to travel to attend board meetings,
+and in any case the odds of succeeding are higher in a startup hub.The centralizing effect of venture firms is a double one: they cause
+startups to form around them, and those draw in more startups through
+acquisitions. And although the first may be weakening because it's
+now so cheap to start some startups, the second seems as strong as ever.
+Three of the most admired
+"Web 2.0" companies were started outside the usual startup hubs,
+but two of them have already been reeled in through acquisitions.Such centralizing forces make it harder for new silicon valleys to
+get started. But by no means impossible. Ultimately power rests
+with the founders. A startup with the best people will beat one
+with funding from famous VCs, and a startup that was sufficiently
+successful would never have to move. So a town that
+could exert enough pull over the right people could resist and
+perhaps even surpass Silicon Valley.For all its power, Silicon Valley has a great weakness: the paradise
+Shockley found in 1956 is now one giant parking lot. San Francisco
+and Berkeley are great, but they're forty miles away. Silicon
+Valley proper is soul-crushing suburban sprawl. It
+has fabulous weather, which makes it significantly better than the
+soul-crushing sprawl of most other American cities. But a competitor
+that managed to avoid sprawl would have real leverage. All a city
+needs is to be the kind of place the next traitorous eight look at
+and say "I want to stay here," and that would be enough to get the
+chain reaction started.Notes[1]
+It's interesting to consider how low this number could be
+made. I suspect five hundred would be enough, even if they could
+bring no assets with them. Probably just thirty, if I could pick them,
+would be enough to turn Buffalo into a significant startup hub.[2]
+Bureaucrats manage to allocate research funding moderately
+well, but only because (like an in-house VC fund) they outsource
+most of the work of selection. A professor at a famous university
+who is highly regarded by his peers will get funding, pretty much
+regardless of the proposal. That wouldn't work for startups, whose
+founders aren't sponsored by organizations, and are often unknowns.[3]
+You'd have to do it all at once, or at least a whole department
+at a time, because people would be more likely to come if they
+knew their friends were. And you should probably start from scratch,
+rather than trying to upgrade an existing university, or much energy
+would be lost in friction.[4]
+Hypothesis: Any plan in which multiple independent buildings
+are gutted or demolished to be "redeveloped" as a single project
+is a net loss of personality for the city, with the exception of
+the conversion of buildings not previously public, like warehouses.[5]
+A few startups get started in New York, but less
+than a tenth as many per capita as in Boston, and mostly
+in less nerdy fields like finance and media.[6]
+Some blue counties are false positives (reflecting the
+remaining power of Democractic party machines), but there are no
+false negatives. You can safely write off all the red counties.[7]
+Some "urban renewal" experts took a shot at destroying Boston's
+in the 1960s, leaving the area around city hall a bleak wasteland,
+but most neighborhoods successfully resisted them.Thanks to Chris Anderson, Trevor Blackwell, Marc Hedlund,
+Jessica Livingston, Robert Morris, Greg Mcadoo, Fred Wilson,
+and Stephen Wolfram for
+reading drafts of this, and to Ed Dumbill for inviting me to speak.(The second part of this talk became Why Startups
+Condense in America.)
+May 2001(This article was written as a kind of business plan for a
+new language.
+So it is missing (because it takes for granted) the most important
+feature of a good programming language: very powerful abstractions.)A friend of mine once told an eminent operating systems
+expert that he wanted to design a really good
+programming language. The expert told him that it would be a
+waste of time, that programming languages don't become popular
+or unpopular based on their merits, and so no matter how
+good his language was, no one would use it. At least, that
+was what had happened to the language he had designed.What does make a language popular? Do popular
+languages deserve their popularity? Is it worth trying to
+define a good programming language? How would you do it?I think the answers to these questions can be found by looking
+at hackers, and learning what they want. Programming
+languages are for hackers, and a programming language
+is good as a programming language (rather than, say, an
+exercise in denotational semantics or compiler design)
+if and only if hackers like it.1 The Mechanics of PopularityIt's true, certainly, that most people don't choose programming
+languages simply based on their merits. Most programmers are told
+what language to use by someone else. And yet I think the effect
+of such external factors on the popularity of programming languages
+is not as great as it's sometimes thought to be. I think a bigger
+problem is that a hacker's idea of a good programming language is
+not the same as most language designers'.Between the two, the hacker's opinion is the one that matters.
+Programming languages are not theorems. They're tools, designed
+for people, and they have to be designed to suit human strengths
+and weaknesses as much as shoes have to be designed for human feet.
+If a shoe pinches when you put it on, it's a bad shoe, however
+elegant it may be as a piece of sculpture.It may be that the majority of programmers can't tell a good language
+from a bad one. But that's no different with any other tool. It
+doesn't mean that it's a waste of time to try designing a good
+language. Expert hackers
+can tell a good language when they see
+one, and they'll use it. Expert hackers are a tiny minority,
+admittedly, but that tiny minority write all the good software,
+and their influence is such that the rest of the programmers will
+tend to use whatever language they use. Often, indeed, it is not
+merely influence but command: often the expert hackers are the very
+people who, as their bosses or faculty advisors, tell the other
+programmers what language to use.The opinion of expert hackers is not the only force that determines
+the relative popularity of programming languages — legacy software
+(Cobol) and hype (Ada, Java) also play a role — but I think it is
+the most powerful force over the long term. Given an initial critical
+mass and enough time, a programming language probably becomes about
+as popular as it deserves to be. And popularity further separates
+good languages from bad ones, because feedback from real live users
+always leads to improvements. Look at how much any popular language
+has changed during its life. Perl and Fortran are extreme cases,
+but even Lisp has changed a lot. Lisp 1.5 didn't have macros, for
+example; these evolved later, after hackers at MIT had spent a
+couple years using Lisp to write real programs. [1]So whether or not a language has to be good to be popular, I think
+a language has to be popular to be good. And it has to stay popular
+to stay good. The state of the art in programming languages doesn't
+stand still. And yet the Lisps we have today are still pretty much
+what they had at MIT in the mid-1980s, because that's the last time
+Lisp had a sufficiently large and demanding user base.Of course, hackers have to know about a language before they can
+use it. How are they to hear? From other hackers. But there has to
+be some initial group of hackers using the language for others even
+to hear about it. I wonder how large this group has to be; how many
+users make a critical mass? Off the top of my head, I'd say twenty.
+If a language had twenty separate users, meaning twenty users who
+decided on their own to use it, I'd consider it to be real.Getting there can't be easy. I would not be surprised if it is
+harder to get from zero to twenty than from twenty to a thousand.
+The best way to get those initial twenty users is probably to use
+a trojan horse: to give people an application they want, which
+happens to be written in the new language.2 External FactorsLet's start by acknowledging one external factor that does affect
+the popularity of a programming language. To become popular, a
+programming language has to be the scripting language of a popular
+system. Fortran and Cobol were the scripting languages of early
+IBM mainframes. C was the scripting language of Unix, and so, later,
+was Perl. Tcl is the scripting language of Tk. Java and Javascript
+are intended to be the scripting languages of web browsers.Lisp is not a massively popular language because it is not the
+scripting language of a massively popular system. What popularity
+it retains dates back to the 1960s and 1970s, when it was the
+scripting language of MIT. A lot of the great programmers of the
+day were associated with MIT at some point. And in the early 1970s,
+before C, MIT's dialect of Lisp, called MacLisp, was one of the
+only programming languages a serious hacker would want to use.Today Lisp is the scripting language of two moderately popular
+systems, Emacs and Autocad, and for that reason I suspect that most
+of the Lisp programming done today is done in Emacs Lisp or AutoLisp.Programming languages don't exist in isolation. To hack is a
+transitive verb — hackers are usually hacking something — and in
+practice languages are judged relative to whatever they're used to
+hack. So if you want to design a popular language, you either have
+to supply more than a language, or you have to design your language
+to replace the scripting language of some existing system.Common Lisp is unpopular partly because it's an orphan. It did
+originally come with a system to hack: the Lisp Machine. But Lisp
+Machines (along with parallel computers) were steamrollered by the
+increasing power of general purpose processors in the 1980s. Common
+Lisp might have remained popular if it had been a good scripting
+language for Unix. It is, alas, an atrociously bad one.One way to describe this situation is to say that a language isn't
+judged on its own merits. Another view is that a programming language
+really isn't a programming language unless it's also the scripting
+language of something. This only seems unfair if it comes as a
+surprise. I think it's no more unfair than expecting a programming
+language to have, say, an implementation. It's just part of what
+a programming language is.A programming language does need a good implementation, of course,
+and this must be free. Companies will pay for software, but individual
+hackers won't, and it's the hackers you need to attract.A language also needs to have a book about it. The book should be
+thin, well-written, and full of good examples. K&R is the ideal
+here. At the moment I'd almost say that a language has to have a
+book published by O'Reilly. That's becoming the test of mattering
+to hackers.There should be online documentation as well. In fact, the book
+can start as online documentation. But I don't think that physical
+books are outmoded yet. Their format is convenient, and the de
+facto censorship imposed by publishers is a useful if imperfect
+filter. Bookstores are one of the most important places for learning
+about new languages.3 BrevityGiven that you can supply the three things any language needs — a
+free implementation, a book, and something to hack — how do you
+make a language that hackers will like?One thing hackers like is brevity. Hackers are lazy, in the same
+way that mathematicians and modernist architects are lazy: they
+hate anything extraneous. It would not be far from the truth to
+say that a hacker about to write a program decides what language
+to use, at least subconsciously, based on the total number of
+characters he'll have to type. If this isn't precisely how hackers
+think, a language designer would do well to act as if it were.It is a mistake to try to baby the user with long-winded expressions
+that are meant to resemble English. Cobol is notorious for this
+flaw. A hacker would consider being asked to writeadd x to y giving zinstead ofz = x+yas something between an insult to his intelligence and a sin against
+God.It has sometimes been said that Lisp should use first and rest
+instead of car and cdr, because it would make programs easier to
+read. Maybe for the first couple hours. But a hacker can learn
+quickly enough that car means the first element of a list and cdr
+means the rest. Using first and rest means 50% more typing. And
+they are also different lengths, meaning that the arguments won't
+line up when they're called, as car and cdr often are, in successive
+lines. I've found that it matters a lot how code lines up on the
+page. I can barely read Lisp code when it is set in a variable-width
+font, and friends say this is true for other languages too.Brevity is one place where strongly typed languages lose. All other
+things being equal, no one wants to begin a program with a bunch
+of declarations. Anything that can be implicit, should be.The individual tokens should be short as well. Perl and Common Lisp
+occupy opposite poles on this question. Perl programs can be almost
+cryptically dense, while the names of built-in Common Lisp operators
+are comically long. The designers of Common Lisp probably expected
+users to have text editors that would type these long names for
+them. But the cost of a long name is not just the cost of typing
+it. There is also the cost of reading it, and the cost of the space
+it takes up on your screen.4 HackabilityThere is one thing more important than brevity to a hacker: being
+able to do what you want. In the history of programming languages
+a surprising amount of effort has gone into preventing programmers
+from doing things considered to be improper. This is a dangerously
+presumptuous plan. How can the language designer know what the
+programmer is going to need to do? I think language designers would
+do better to consider their target user to be a genius who will
+need to do things they never anticipated, rather than a bumbler
+who needs to be protected from himself. The bumbler will shoot
+himself in the foot anyway. You may save him from referring to
+variables in another package, but you can't save him from writing
+a badly designed program to solve the wrong problem, and taking
+forever to do it.Good programmers often want to do dangerous and unsavory things.
+By unsavory I mean things that go behind whatever semantic facade
+the language is trying to present: getting hold of the internal
+representation of some high-level abstraction, for example. Hackers
+like to hack, and hacking means getting inside things and second
+guessing the original designer.Let yourself be second guessed. When you make any tool, people use
+it in ways you didn't intend, and this is especially true of a
+highly articulated tool like a programming language. Many a hacker
+will want to tweak your semantic model in a way that you never
+imagined. I say, let them; give the programmer access to as much
+internal stuff as you can without endangering runtime systems like
+the garbage collector.In Common Lisp I have often wanted to iterate through the fields
+of a struct — to comb out references to a deleted object, for example,
+or find fields that are uninitialized. I know the structs are just
+vectors underneath. And yet I can't write a general purpose function
+that I can call on any struct. I can only access the fields by
+name, because that's what a struct is supposed to mean.A hacker may only want to subvert the intended model of things once
+or twice in a big program. But what a difference it makes to be
+able to. And it may be more than a question of just solving a
+problem. There is a kind of pleasure here too. Hackers share the
+surgeon's secret pleasure in poking about in gross innards, the
+teenager's secret pleasure in popping zits. [2] For boys, at least,
+certain kinds of horrors are fascinating. Maxim magazine publishes
+an annual volume of photographs, containing a mix of pin-ups and
+grisly accidents. They know their audience.Historically, Lisp has been good at letting hackers have their way.
+The political correctness of Common Lisp is an aberration. Early
+Lisps let you get your hands on everything. A good deal of that
+spirit is, fortunately, preserved in macros. What a wonderful thing,
+to be able to make arbitrary transformations on the source code.Classic macros are a real hacker's tool — simple, powerful, and
+dangerous. It's so easy to understand what they do: you call a
+function on the macro's arguments, and whatever it returns gets
+inserted in place of the macro call. Hygienic macros embody the
+opposite principle. They try to protect you from understanding what
+they're doing. I have never heard hygienic macros explained in one
+sentence. And they are a classic example of the dangers of deciding
+what programmers are allowed to want. Hygienic macros are intended
+to protect me from variable capture, among other things, but variable
+capture is exactly what I want in some macros.A really good language should be both clean and dirty: cleanly
+designed, with a small core of well understood and highly orthogonal
+operators, but dirty in the sense that it lets hackers have their
+way with it. C is like this. So were the early Lisps. A real hacker's
+language will always have a slightly raffish character.A good programming language should have features that make the kind
+of people who use the phrase "software engineering" shake their
+heads disapprovingly. At the other end of the continuum are languages
+like Ada and Pascal, models of propriety that are good for teaching
+and not much else.5 Throwaway ProgramsTo be attractive to hackers, a language must be good for writing
+the kinds of programs they want to write. And that means, perhaps
+surprisingly, that it has to be good for writing throwaway programs.A throwaway program is a program you write quickly for some limited
+task: a program to automate some system administration task, or
+generate test data for a simulation, or convert data from one format
+to another. The surprising thing about throwaway programs is that,
+like the "temporary" buildings built at so many American universities
+during World War II, they often don't get thrown away. Many evolve
+into real programs, with real features and real users.I have a hunch that the best big programs begin life this way,
+rather than being designed big from the start, like the Hoover Dam.
+It's terrifying to build something big from scratch. When people
+take on a project that's too big, they become overwhelmed. The
+project either gets bogged down, or the result is sterile and
+wooden: a shopping mall rather than a real downtown, Brasilia rather
+than Rome, Ada rather than C.Another way to get a big program is to start with a throwaway
+program and keep improving it. This approach is less daunting, and
+the design of the program benefits from evolution. I think, if one
+looked, that this would turn out to be the way most big programs
+were developed. And those that did evolve this way are probably
+still written in whatever language they were first written in,
+because it's rare for a program to be ported, except for political
+reasons. And so, paradoxically, if you want to make a language that
+is used for big systems, you have to make it good for writing
+throwaway programs, because that's where big systems come from.Perl is a striking example of this idea. It was not only designed
+for writing throwaway programs, but was pretty much a throwaway
+program itself. Perl began life as a collection of utilities for
+generating reports, and only evolved into a programming language
+as the throwaway programs people wrote in it grew larger. It was
+not until Perl 5 (if then) that the language was suitable for
+writing serious programs, and yet it was already massively popular.What makes a language good for throwaway programs? To start with,
+it must be readily available. A throwaway program is something that
+you expect to write in an hour. So the language probably must
+already be installed on the computer you're using. It can't be
+something you have to install before you use it. It has to be there.
+C was there because it came with the operating system. Perl was
+there because it was originally a tool for system administrators,
+and yours had already installed it.Being available means more than being installed, though. An
+interactive language, with a command-line interface, is more
+available than one that you have to compile and run separately. A
+popular programming language should be interactive, and start up
+fast.Another thing you want in a throwaway program is brevity. Brevity
+is always attractive to hackers, and never more so than in a program
+they expect to turn out in an hour.6 LibrariesOf course the ultimate in brevity is to have the program already
+written for you, and merely to call it. And this brings us to what
+I think will be an increasingly important feature of programming
+languages: library functions. Perl wins because it has large
+libraries for manipulating strings. This class of library functions
+are especially important for throwaway programs, which are often
+originally written for converting or extracting data. Many Perl
+programs probably begin as just a couple library calls stuck
+together.I think a lot of the advances that happen in programming languages
+in the next fifty years will have to do with library functions. I
+think future programming languages will have libraries that are as
+carefully designed as the core language. Programming language design
+will not be about whether to make your language strongly or weakly
+typed, or object oriented, or functional, or whatever, but about
+how to design great libraries. The kind of language designers who
+like to think about how to design type systems may shudder at this.
+It's almost like writing applications! Too bad. Languages are for
+programmers, and libraries are what programmers need.It's hard to design good libraries. It's not simply a matter of
+writing a lot of code. Once the libraries get too big, it can
+sometimes take longer to find the function you need than to write
+the code yourself. Libraries need to be designed using a small set
+of orthogonal operators, just like the core language. It ought to
+be possible for the programmer to guess what library call will do
+what he needs.Libraries are one place Common Lisp falls short. There are only
+rudimentary libraries for manipulating strings, and almost none
+for talking to the operating system. For historical reasons, Common
+Lisp tries to pretend that the OS doesn't exist. And because you
+can't talk to the OS, you're unlikely to be able to write a serious
+program using only the built-in operators in Common Lisp. You have
+to use some implementation-specific hacks as well, and in practice
+these tend not to give you everything you want. Hackers would think
+a lot more highly of Lisp if Common Lisp had powerful string
+libraries and good OS support.7 SyntaxCould a language with Lisp's syntax, or more precisely, lack of
+syntax, ever become popular? I don't know the answer to this
+question. I do think that syntax is not the main reason Lisp isn't
+currently popular. Common Lisp has worse problems than unfamiliar
+syntax. I know several programmers who are comfortable with prefix
+syntax and yet use Perl by default, because it has powerful string
+libraries and can talk to the os.There are two possible problems with prefix notation: that it is
+unfamiliar to programmers, and that it is not dense enough. The
+conventional wisdom in the Lisp world is that the first problem is
+the real one. I'm not so sure. Yes, prefix notation makes ordinary
+programmers panic. But I don't think ordinary programmers' opinions
+matter. Languages become popular or unpopular based on what expert
+hackers think of them, and I think expert hackers might be able to
+deal with prefix notation. Perl syntax can be pretty incomprehensible,
+but that has not stood in the way of Perl's popularity. If anything
+it may have helped foster a Perl cult.A more serious problem is the diffuseness of prefix notation. For
+expert hackers, that really is a problem. No one wants to write
+(aref a x y) when they could write a[x,y].In this particular case there is a way to finesse our way out of
+the problem. If we treat data structures as if they were functions
+on indexes, we could write (a x y) instead, which is even shorter
+than the Perl form. Similar tricks may shorten other types of
+expressions.We can get rid of (or make optional) a lot of parentheses by making
+indentation significant. That's how programmers read code anyway:
+when indentation says one thing and delimiters say another, we go
+by the indentation. Treating indentation as significant would
+eliminate this common source of bugs as well as making programs
+shorter.Sometimes infix syntax is easier to read. This is especially true
+for math expressions. I've used Lisp my whole programming life and
+I still don't find prefix math expressions natural. And yet it is
+convenient, especially when you're generating code, to have operators
+that take any number of arguments. So if we do have infix syntax,
+it should probably be implemented as some kind of read-macro.I don't think we should be religiously opposed to introducing syntax
+into Lisp, as long as it translates in a well-understood way into
+underlying s-expressions. There is already a good deal of syntax
+in Lisp. It's not necessarily bad to introduce more, as long as no
+one is forced to use it. In Common Lisp, some delimiters are reserved
+for the language, suggesting that at least some of the designers
+intended to have more syntax in the future.One of the most egregiously unlispy pieces of syntax in Common Lisp
+occurs in format strings; format is a language in its own right,
+and that language is not Lisp. If there were a plan for introducing
+more syntax into Lisp, format specifiers might be able to be included
+in it. It would be a good thing if macros could generate format
+specifiers the way they generate any other kind of code.An eminent Lisp hacker told me that his copy of CLTL falls open to
+the section format. Mine too. This probably indicates room for
+improvement. It may also mean that programs do a lot of I/O.8 EfficiencyA good language, as everyone knows, should generate fast code. But
+in practice I don't think fast code comes primarily from things
+you do in the design of the language. As Knuth pointed out long
+ago, speed only matters in certain critical bottlenecks. And as
+many programmers have observed since, one is very often mistaken
+about where these bottlenecks are.So, in practice, the way to get fast code is to have a very good
+profiler, rather than by, say, making the language strongly typed.
+You don't need to know the type of every argument in every call in
+the program. You do need to be able to declare the types of arguments
+in the bottlenecks. And even more, you need to be able to find out
+where the bottlenecks are.One complaint people have had with Lisp is that it's hard to tell
+what's expensive. This might be true. It might also be inevitable,
+if you want to have a very abstract language. And in any case I
+think good profiling would go a long way toward fixing the problem:
+you'd soon learn what was expensive.Part of the problem here is social. Language designers like to
+write fast compilers. That's how they measure their skill. They
+think of the profiler as an add-on, at best. But in practice a good
+profiler may do more to improve the speed of actual programs written
+in the language than a compiler that generates fast code. Here,
+again, language designers are somewhat out of touch with their
+users. They do a really good job of solving slightly the wrong
+problem.It might be a good idea to have an active profiler — to push
+performance data to the programmer instead of waiting for him to
+come asking for it. For example, the editor could display bottlenecks
+in red when the programmer edits the source code. Another approach
+would be to somehow represent what's happening in running programs.
+This would be an especially big win in server-based applications,
+where you have lots of running programs to look at. An active
+profiler could show graphically what's happening in memory as a
+program's running, or even make sounds that tell what's happening.Sound is a good cue to problems. In one place I worked, we had a
+big board of dials showing what was happening to our web servers.
+The hands were moved by little servomotors that made a slight noise
+when they turned. I couldn't see the board from my desk, but I
+found that I could tell immediately, by the sound, when there was
+a problem with a server.It might even be possible to write a profiler that would automatically
+detect inefficient algorithms. I would not be surprised if certain
+patterns of memory access turned out to be sure signs of bad
+algorithms. If there were a little guy running around inside the
+computer executing our programs, he would probably have as long
+and plaintive a tale to tell about his job as a federal government
+employee. I often have a feeling that I'm sending the processor on
+a lot of wild goose chases, but I've never had a good way to look
+at what it's doing.A number of Lisps now compile into byte code, which is then executed
+by an interpreter. This is usually done to make the implementation
+easier to port, but it could be a useful language feature. It might
+be a good idea to make the byte code an official part of the
+language, and to allow programmers to use inline byte code in
+bottlenecks. Then such optimizations would be portable too.The nature of speed, as perceived by the end-user, may be changing.
+With the rise of server-based applications, more and more programs
+may turn out to be i/o-bound. It will be worth making i/o fast.
+The language can help with straightforward measures like simple,
+fast, formatted output functions, and also with deep structural
+changes like caching and persistent objects.Users are interested in response time. But another kind of efficiency
+will be increasingly important: the number of simultaneous users
+you can support per processor. Many of the interesting applications
+written in the near future will be server-based, and the number of
+users per server is the critical question for anyone hosting such
+applications. In the capital cost of a business offering a server-based
+application, this is the divisor.For years, efficiency hasn't mattered much in most end-user
+applications. Developers have been able to assume that each user
+would have an increasingly powerful processor sitting on their
+desk. And by Parkinson's Law, software has expanded to use the
+resources available. That will change with server-based applications.
+In that world, the hardware and software will be supplied together.
+For companies that offer server-based applications, it will make
+a very big difference to the bottom line how many users they can
+support per server.In some applications, the processor will be the limiting factor,
+and execution speed will be the most important thing to optimize.
+But often memory will be the limit; the number of simultaneous
+users will be determined by the amount of memory you need for each
+user's data. The language can help here too. Good support for
+threads will enable all the users to share a single heap. It may
+also help to have persistent objects and/or language level support
+for lazy loading.9 TimeThe last ingredient a popular language needs is time. No one wants
+to write programs in a language that might go away, as so many
+programming languages do. So most hackers will tend to wait until
+a language has been around for a couple years before even considering
+using it.Inventors of wonderful new things are often surprised to discover
+this, but you need time to get any message through to people. A
+friend of mine rarely does anything the first time someone asks
+him. He knows that people sometimes ask for things that they turn
+out not to want. To avoid wasting his time, he waits till the third
+or fourth time he's asked to do something; by then, whoever's asking
+him may be fairly annoyed, but at least they probably really do
+want whatever they're asking for.Most people have learned to do a similar sort of filtering on new
+things they hear about. They don't even start paying attention
+until they've heard about something ten times. They're perfectly
+justified: the majority of hot new whatevers do turn out to be a
+waste of time, and eventually go away. By delaying learning VRML,
+I avoided having to learn it at all.So anyone who invents something new has to expect to keep repeating
+their message for years before people will start to get it. We
+wrote what was, as far as I know, the first web-server based
+application, and it took us years to get it through to people that
+it didn't have to be downloaded. It wasn't that they were stupid.
+They just had us tuned out.The good news is, simple repetition solves the problem. All you
+have to do is keep telling your story, and eventually people will
+start to hear. It's not when people notice you're there that they
+pay attention; it's when they notice you're still there.It's just as well that it usually takes a while to gain momentum.
+Most technologies evolve a good deal even after they're first
+launched — programming languages especially. Nothing could be better,
+for a new techology, than a few years of being used only by a small
+number of early adopters. Early adopters are sophisticated and
+demanding, and quickly flush out whatever flaws remain in your
+technology. When you only have a few users you can be in close
+contact with all of them. And early adopters are forgiving when
+you improve your system, even if this causes some breakage.There are two ways new technology gets introduced: the organic
+growth method, and the big bang method. The organic growth method
+is exemplified by the classic seat-of-the-pants underfunded garage
+startup. A couple guys, working in obscurity, develop some new
+technology. They launch it with no marketing and initially have
+only a few (fanatically devoted) users. They continue to improve
+the technology, and meanwhile their user base grows by word of
+mouth. Before they know it, they're big.The other approach, the big bang method, is exemplified by the
+VC-backed, heavily marketed startup. They rush to develop a product,
+launch it with great publicity, and immediately (they hope) have
+a large user base.Generally, the garage guys envy the big bang guys. The big bang
+guys are smooth and confident and respected by the VCs. They can
+afford the best of everything, and the PR campaign surrounding the
+launch has the side effect of making them celebrities. The organic
+growth guys, sitting in their garage, feel poor and unloved. And
+yet I think they are often mistaken to feel sorry for themselves.
+Organic growth seems to yield better technology and richer founders
+than the big bang method. If you look at the dominant technologies
+today, you'll find that most of them grew organically.This pattern doesn't only apply to companies. You see it in sponsored
+research too. Multics and Common Lisp were big-bang projects, and
+Unix and MacLisp were organic growth projects.10 Redesign"The best writing is rewriting," wrote E. B. White. Every good
+writer knows this, and it's true for software too. The most important
+part of design is redesign. Programming languages, especially,
+don't get redesigned enough.To write good software you must simultaneously keep two opposing
+ideas in your head. You need the young hacker's naive faith in
+his abilities, and at the same time the veteran's skepticism. You
+have to be able to think
+how hard can it be? with one half of
+your brain while thinking
+it will never work with the other.The trick is to realize that there's no real contradiction here.
+You want to be optimistic and skeptical about two different things.
+You have to be optimistic about the possibility of solving the
+problem, but skeptical about the value of whatever solution you've
+got so far.People who do good work often think that whatever they're working
+on is no good. Others see what they've done and are full of wonder,
+but the creator is full of worry. This pattern is no coincidence:
+it is the worry that made the work good.If you can keep hope and worry balanced, they will drive a project
+forward the same way your two legs drive a bicycle forward. In the
+first phase of the two-cycle innovation engine, you work furiously
+on some problem, inspired by your confidence that you'll be able
+to solve it. In the second phase, you look at what you've done in
+the cold light of morning, and see all its flaws very clearly. But
+as long as your critical spirit doesn't outweigh your hope, you'll
+be able to look at your admittedly incomplete system, and think,
+how hard can it be to get the rest of the way?, thereby continuing
+the cycle.It's tricky to keep the two forces balanced. In young hackers,
+optimism predominates. They produce something, are convinced it's
+great, and never improve it. In old hackers, skepticism predominates,
+and they won't even dare to take on ambitious projects.Anything you can do to keep the redesign cycle going is good. Prose
+can be rewritten over and over until you're happy with it. But
+software, as a rule, doesn't get redesigned enough. Prose has
+readers, but software has users. If a writer rewrites an essay,
+people who read the old version are unlikely to complain that their
+thoughts have been broken by some newly introduced incompatibility.Users are a double-edged sword. They can help you improve your
+language, but they can also deter you from improving it. So choose
+your users carefully, and be slow to grow their number. Having
+users is like optimization: the wise course is to delay it. Also,
+as a general rule, you can at any given time get away with changing
+more than you think. Introducing change is like pulling off a
+bandage: the pain is a memory almost as soon as you feel it.Everyone knows that it's not a good idea to have a language designed
+by a committee. Committees yield bad design. But I think the worst
+danger of committees is that they interfere with redesign. It is
+so much work to introduce changes that no one wants to bother.
+Whatever a committee decides tends to stay that way, even if most
+of the members don't like it.Even a committee of two gets in the way of redesign. This happens
+particularly in the interfaces between pieces of software written
+by two different people. To change the interface both have to agree
+to change it at once. And so interfaces tend not to change at all,
+which is a problem because they tend to be one of the most ad hoc
+parts of any system.One solution here might be to design systems so that interfaces
+are horizontal instead of vertical — so that modules are always
+vertically stacked strata of abstraction. Then the interface will
+tend to be owned by one of them. The lower of two levels will either
+be a language in which the upper is written, in which case the
+lower level will own the interface, or it will be a slave, in which
+case the interface can be dictated by the upper level.11 LispWhat all this implies is that there is hope for a new Lisp. There
+is hope for any language that gives hackers what they want, including
+Lisp. I think we may have made a mistake in thinking that hackers
+are turned off by Lisp's strangeness. This comforting illusion may
+have prevented us from seeing the real problem with Lisp, or at
+least Common Lisp, which is that it sucks for doing what hackers
+want to do. A hacker's language needs powerful libraries and
+something to hack. Common Lisp has neither. A hacker's language is
+terse and hackable. Common Lisp is not.The good news is, it's not Lisp that sucks, but Common Lisp. If we
+can develop a new Lisp that is a real hacker's language, I think
+hackers will use it. They will use whatever language does the job.
+All we have to do is make sure this new Lisp does some important
+job better than other languages.History offers some encouragement. Over time, successive new
+programming languages have taken more and more features from Lisp.
+There is no longer much left to copy before the language you've
+made is Lisp. The latest hot language, Python, is a watered-down
+Lisp with infix syntax and no macros. A new Lisp would be a natural
+step in this progression.I sometimes think that it would be a good marketing trick to call
+it an improved version of Python. That sounds hipper than Lisp. To
+many people, Lisp is a slow AI language with a lot of parentheses.
+Fritz Kunze's official biography carefully avoids mentioning the
+L-word. But my guess is that we shouldn't be afraid to call the
+new Lisp Lisp. Lisp still has a lot of latent respect among the
+very best hackers — the ones who took 6.001 and understood it, for
+example. And those are the users you need to win.In "How to Become a Hacker," Eric Raymond describes Lisp as something
+like Latin or Greek — a language you should learn as an intellectual
+exercise, even though you won't actually use it:
+
+ Lisp is worth learning for the profound enlightenment experience
+ you will have when you finally get it; that experience will make
+ you a better programmer for the rest of your days, even if you
+ never actually use Lisp itself a lot.
+
+If I didn't know Lisp, reading this would set me asking questions.
+A language that would make me a better programmer, if it means
+anything at all, means a language that would be better for programming.
+And that is in fact the implication of what Eric is saying.As long as that idea is still floating around, I think hackers will
+be receptive enough to a new Lisp, even if it is called Lisp. But
+this Lisp must be a hacker's language, like the classic Lisps of
+the 1970s. It must be terse, simple, and hackable. And it must have
+powerful libraries for doing what hackers want to do now.In the matter of libraries I think there is room to beat languages
+like Perl and Python at their own game. A lot of the new applications
+that will need to be written in the coming years will be
+server-based
+applications. There's no reason a new Lisp shouldn't have string
+libraries as good as Perl, and if this new Lisp also had powerful
+libraries for server-based applications, it could be very popular.
+Real hackers won't turn up their noses at a new tool that will let
+them solve hard problems with a few library calls. Remember, hackers
+are lazy.It could be an even bigger win to have core language support for
+server-based applications. For example, explicit support for programs
+with multiple users, or data ownership at the level of type tags.Server-based applications also give us the answer to the question
+of what this new Lisp will be used to hack. It would not hurt to
+make Lisp better as a scripting language for Unix. (It would be
+hard to make it worse.) But I think there are areas where existing
+languages would be easier to beat. I think it might be better to
+follow the model of Tcl, and supply the Lisp together with a complete
+system for supporting server-based applications. Lisp is a natural
+fit for server-based applications. Lexical closures provide a way
+to get the effect of subroutines when the ui is just a series of
+web pages. S-expressions map nicely onto html, and macros are good
+at generating it. There need to be better tools for writing
+server-based applications, and there needs to be a new Lisp, and
+the two would work very well together.12 The Dream LanguageBy way of summary, let's try describing the hacker's dream language.
+The dream language is
+beautiful, clean, and terse. It has an
+interactive toplevel that starts up fast. You can write programs
+to solve common problems with very little code. Nearly all the
+code in any program you write is code that's specific to your
+application. Everything else has been done for you.The syntax of the language is brief to a fault. You never have to
+type an unnecessary character, or even to use the shift key much.Using big abstractions you can write the first version of a program
+very quickly. Later, when you want to optimize, there's a really
+good profiler that tells you where to focus your attention. You
+can make inner loops blindingly fast, even writing inline byte code
+if you need to.There are lots of good examples to learn from, and the language is
+intuitive enough that you can learn how to use it from examples in
+a couple minutes. You don't need to look in the manual much. The
+manual is thin, and has few warnings and qualifications.The language has a small core, and powerful, highly orthogonal
+libraries that are as carefully designed as the core language. The
+libraries all work well together; everything in the language fits
+together like the parts in a fine camera. Nothing is deprecated,
+or retained for compatibility. The source code of all the libraries
+is readily available. It's easy to talk to the operating system
+and to applications written in other languages.The language is built in layers. The higher-level abstractions are
+built in a very transparent way out of lower-level abstractions,
+which you can get hold of if you want.Nothing is hidden from you that doesn't absolutely have to be. The
+language offers abstractions only as a way of saving you work,
+rather than as a way of telling you what to do. In fact, the language
+encourages you to be an equal participant in its design. You can
+change everything about it, including even its syntax, and anything
+you write has, as much as possible, the same status as what comes
+predefined.Notes[1] Macros very close to the modern idea were proposed by Timothy
+Hart in 1964, two years after Lisp 1.5 was released. What was
+missing, initially, were ways to avoid variable capture and multiple
+evaluation; Hart's examples are subject to both.[2] In When the Air Hits Your Brain, neurosurgeon Frank Vertosick
+recounts a conversation in which his chief resident, Gary, talks
+about the difference between surgeons and internists ("fleas"):
+
+ Gary and I ordered a large pizza and found an open booth. The
+ chief lit a cigarette. "Look at those goddamn fleas, jabbering
+ about some disease they'll see once in their lifetimes. That's
+ the trouble with fleas, they only like the bizarre stuff. They
+ hate their bread and butter cases. That's the difference between
+ us and the fucking fleas. See, we love big juicy lumbar disc
+ herniations, but they hate hypertension...."
+
+It's hard to think of a lumbar disc herniation as juicy (except
+literally). And yet I think I know what they mean. I've often had
+a juicy bug to track down. Someone who's not a programmer would
+find it hard to imagine that there could be pleasure in a bug.
+Surely it's better if everything just works. In one way, it is.
+And yet there is undeniably a grim satisfaction in hunting down
+certain sorts of bugs.
+```
\ No newline at end of file
diff --git a/out_tensor/lm_head.safetensors b/out_tensor/lm_head.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5b2f3def64018af66316434d76dc7bd38e8f6b37
--- /dev/null
+++ b/out_tensor/lm_head.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e83ee9ad17b69f0006c79a0db328747dd20fc7a99f2527e44e47b7961132b69e
+size 103953008
diff --git a/out_tensor/model.layers.0.mlp.down_proj.safetensors b/out_tensor/model.layers.0.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ffe422666a6d76157c5da6ea3632b06e0d35464d
--- /dev/null
+++ b/out_tensor/model.layers.0.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58b753a71e9cbb8edeee01f0f74a633fc14fe4247f8d9470d2f431e715dabe86
+size 59008184
diff --git a/out_tensor/model.layers.0.mlp.gate_proj.safetensors b/out_tensor/model.layers.0.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..61326e6715ec2c899293931a8a4864947daac19b
--- /dev/null
+++ b/out_tensor/model.layers.0.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4bbb48147fe352e5d4839aa006d60153bcb0324af3c5bb99d7f4189804f7a55f
+size 58966744
diff --git a/out_tensor/model.layers.0.mlp.up_proj.safetensors b/out_tensor/model.layers.0.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b2870643479eecde3bc1431ce9da18a3528b89cc
--- /dev/null
+++ b/out_tensor/model.layers.0.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c4dd82135f6e65d0fb8efff27d45d256518b12969a5545eeb3ea8f5fdeb6e92
+size 58966728
diff --git a/out_tensor/model.layers.0.self_attn.k_proj.safetensors b/out_tensor/model.layers.0.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..703f003bf5b63d338a9562f385cbd06f0888d99e
--- /dev/null
+++ b/out_tensor/model.layers.0.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b648ecc13d09154b96fdd9d90051c9a04ca824d20971dcfa0c2dbace8fa659d
+size 4227800
diff --git a/out_tensor/model.layers.0.self_attn.o_proj.safetensors b/out_tensor/model.layers.0.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c313507fce73c89c44a731b8de322f5c31b58a57
--- /dev/null
+++ b/out_tensor/model.layers.0.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c596236068604d0c4bb1ae70a9ea53028c2d6a0ece26a7346bfd1fe29d6fb9ce
+size 16859872
diff --git a/out_tensor/model.layers.0.self_attn.q_proj.safetensors b/out_tensor/model.layers.0.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..880e1351fc04504db509b81797fb6def807437a0
--- /dev/null
+++ b/out_tensor/model.layers.0.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:937ac10f7c027aa74a5a993ede3bc8528178a8ba925c73793c48fca572427e0f
+size 16859872
diff --git a/out_tensor/model.layers.0.self_attn.v_proj.safetensors b/out_tensor/model.layers.0.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f1d4ffe6a2ddd007f4f769202e3668314fb0f87e
--- /dev/null
+++ b/out_tensor/model.layers.0.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dadd9c3ee54460202627ca5139ecbe9e885eebbc12e5eaf9b5988e0a1df80ca4
+size 4227800
diff --git a/out_tensor/model.layers.1.mlp.down_proj.safetensors b/out_tensor/model.layers.1.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..77b792322bebba2355cc6d3b9f7d70a100b135d0
--- /dev/null
+++ b/out_tensor/model.layers.1.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e4c9a98433ac336c7da610adf2b9f23b4fe9f63536506a03ce8f4173d72770b
+size 59008184
diff --git a/out_tensor/model.layers.1.mlp.gate_proj.safetensors b/out_tensor/model.layers.1.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b939cb73aaac3583ca5b227e1e4d6edf119cec3c
--- /dev/null
+++ b/out_tensor/model.layers.1.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84a7e2ac594e4c85c42bde532feb4059a98b4e1f81f5d311e18468d4deede457
+size 58966744
diff --git a/out_tensor/model.layers.1.mlp.up_proj.safetensors b/out_tensor/model.layers.1.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..984f72a6e87cab7130a325114eb0561dba665737
--- /dev/null
+++ b/out_tensor/model.layers.1.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:509af50ac331eb7d333249885a66436202fef7b7f50e0c9817c8ce37d51056cf
+size 58966728
diff --git a/out_tensor/model.layers.1.self_attn.k_proj.safetensors b/out_tensor/model.layers.1.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..067de6a6ab042d85374b1f67e10d66f6c7e82ec7
--- /dev/null
+++ b/out_tensor/model.layers.1.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f102631c5753effe17e396aa1bb8f09b5f7727af8d078b8f45028ed696419d00
+size 4227800
diff --git a/out_tensor/model.layers.1.self_attn.o_proj.safetensors b/out_tensor/model.layers.1.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8256bb02bd48a47329b5c4ccec08122c5738c679
--- /dev/null
+++ b/out_tensor/model.layers.1.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:52bd7ccbd8b4cb551ed0e2fdb93ddaa626dd67c72f5625897abeac865596be8d
+size 16859872
diff --git a/out_tensor/model.layers.1.self_attn.q_proj.safetensors b/out_tensor/model.layers.1.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d860f397045c0bd12648cf949a30f6d1861d3dc2
--- /dev/null
+++ b/out_tensor/model.layers.1.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aacb3dc932fc4c0e18503f37e7ff81b2aebd1d4e4db1dfd078b7c5949c416cf5
+size 16859872
diff --git a/out_tensor/model.layers.1.self_attn.v_proj.safetensors b/out_tensor/model.layers.1.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9702b863af47cc8e4ba45d8ab48095373646eac0
--- /dev/null
+++ b/out_tensor/model.layers.1.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e47471ee6cf191c4ed397002ad071209df09f15f8b87a3cd73a812f5d4b381de
+size 4227800
diff --git a/out_tensor/model.layers.10.mlp.down_proj.safetensors b/out_tensor/model.layers.10.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a2d1d205c7635642d42f043805ab84b26e0c25af
--- /dev/null
+++ b/out_tensor/model.layers.10.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f863b6d9f46e5b6c60f2f538b89847a115d92e1e06b05ccc5c0cbd556ed3ac70
+size 59008192
diff --git a/out_tensor/model.layers.10.mlp.gate_proj.safetensors b/out_tensor/model.layers.10.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f886ba48016337497adfa7ab9d8627c6d375178b
--- /dev/null
+++ b/out_tensor/model.layers.10.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82eabda5dfaa8dc4c30e8f653408e7e3d01d2fcbf1bcd2cf09a591708eef9916
+size 58966744
diff --git a/out_tensor/model.layers.10.mlp.up_proj.safetensors b/out_tensor/model.layers.10.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d0700db39214135117e562440809c17fe3210095
--- /dev/null
+++ b/out_tensor/model.layers.10.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71966f0ef3456624c615909999369a7caf3b5c9de2a6f5aebdbec25d52ee2221
+size 58966736
diff --git a/out_tensor/model.layers.10.self_attn.k_proj.safetensors b/out_tensor/model.layers.10.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d1e9df0dc0e1c505893c2503c7b08d5ffd2015cd
--- /dev/null
+++ b/out_tensor/model.layers.10.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3fad96aa9e520072456007d81da58816242764edd90eacfbaff657966a6b4e79
+size 3228960
diff --git a/out_tensor/model.layers.10.self_attn.o_proj.safetensors b/out_tensor/model.layers.10.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6085846c9a6330c29797b33f9e300078e389eb60
--- /dev/null
+++ b/out_tensor/model.layers.10.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29184abc9a15291f76e4907ff653a1550da4bbe273bc28e0ae9fc9a440fd16ec
+size 12862760
diff --git a/out_tensor/model.layers.10.self_attn.q_proj.safetensors b/out_tensor/model.layers.10.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..745c54433f708539eff3302fc4766b51ec6dbd9a
--- /dev/null
+++ b/out_tensor/model.layers.10.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b89fd52d1bc8de6dfb73642230d7dd152a5bbb6cf54ae1bfc3903cb7a0c99584
+size 12862760
diff --git a/out_tensor/model.layers.10.self_attn.v_proj.safetensors b/out_tensor/model.layers.10.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9281577bdfb4709a27dc022c87b69c7b714568e4
--- /dev/null
+++ b/out_tensor/model.layers.10.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a48605e596dabe902ff98039a73bfa1fe13f2fcd88fbeb13547e4821376595e
+size 4277536
diff --git a/out_tensor/model.layers.11.mlp.down_proj.safetensors b/out_tensor/model.layers.11.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e1b427089e215168ae3b61db02b3a621ccc6c7f4
--- /dev/null
+++ b/out_tensor/model.layers.11.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17a61ddc6f0678e896cf52ccbc8a432d74803c3d01aa06379e991b11c91d65b2
+size 59008192
diff --git a/out_tensor/model.layers.11.mlp.gate_proj.safetensors b/out_tensor/model.layers.11.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c6eae05444f02385837ba4b9744ca21ce66e623b
--- /dev/null
+++ b/out_tensor/model.layers.11.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff54e50b631a4086f367d8218c6656c41a0fb69cfd7c00099c5cc8a89f57dcdd
+size 58966744
diff --git a/out_tensor/model.layers.11.mlp.up_proj.safetensors b/out_tensor/model.layers.11.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1868f9af2d49ecd96df1aafbc897e433f92dbe62
--- /dev/null
+++ b/out_tensor/model.layers.11.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ae0ebb419ff3efca4e0810fbc77f9ede58eb58ef9535746ada2c7abe15f21b5
+size 58966736
diff --git a/out_tensor/model.layers.11.self_attn.k_proj.safetensors b/out_tensor/model.layers.11.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3bdcfcb65812836319891810cd82b94321a309fa
--- /dev/null
+++ b/out_tensor/model.layers.11.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8aa55accca8bc2457a8eeb5748c28fe04b706298438192341d327f94f70dcf5f
+size 3228960
diff --git a/out_tensor/model.layers.11.self_attn.o_proj.safetensors b/out_tensor/model.layers.11.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e799b1648734f92921567772645c67701267c803
--- /dev/null
+++ b/out_tensor/model.layers.11.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4ee4581744296fe89ce897037879eba1611845889966c7c9a25352225d0d344
+size 12862760
diff --git a/out_tensor/model.layers.11.self_attn.q_proj.safetensors b/out_tensor/model.layers.11.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..01b190d224930edd222719cde0332f6e8e979090
--- /dev/null
+++ b/out_tensor/model.layers.11.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d51541b3a96aa41df3375312ebf56a5cd583ed39bcd439311b29bb22a292b4d
+size 12862760
diff --git a/out_tensor/model.layers.11.self_attn.v_proj.safetensors b/out_tensor/model.layers.11.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e0014357ffc70a849e864ac4ec35c2c9574625a9
--- /dev/null
+++ b/out_tensor/model.layers.11.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1c1683279c8a302ef40142fce030a358a855ac025485a30612f0835a79f35c1a
+size 4277536
diff --git a/out_tensor/model.layers.12.mlp.down_proj.safetensors b/out_tensor/model.layers.12.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..09c56fd08c55a263b97c2acb38f9a3dc401e42d1
--- /dev/null
+++ b/out_tensor/model.layers.12.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96a40603ee96fe2f7c03b25fd31d52026c8bfe453bdfaeb12a216699fb244620
+size 59008192
diff --git a/out_tensor/model.layers.12.mlp.gate_proj.safetensors b/out_tensor/model.layers.12.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0d1220b50801c3bae1fed6fd7e7707ac2c44ca29
--- /dev/null
+++ b/out_tensor/model.layers.12.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5526c9ccea1f8253c76a8865fb720fd14260672ba743338fab2f45e6ca38304
+size 58966744
diff --git a/out_tensor/model.layers.12.mlp.up_proj.safetensors b/out_tensor/model.layers.12.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..28983a96972176e9e2b7e275dda7ae2950fd1b9a
--- /dev/null
+++ b/out_tensor/model.layers.12.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6097378e665a6a7084648e99386f31c05c672a21ba28a84cd79d1b5d95f5a919
+size 58966736
diff --git a/out_tensor/model.layers.12.self_attn.k_proj.safetensors b/out_tensor/model.layers.12.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..53f593be570df972e4c4cb63ccb478032fb14dfb
--- /dev/null
+++ b/out_tensor/model.layers.12.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b45f1fdb613a1afa4dfdd94c11d1f36ed870a8dd4739e9a3edfada57a6bd7d3
+size 4227808
diff --git a/out_tensor/model.layers.12.self_attn.o_proj.safetensors b/out_tensor/model.layers.12.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c3c42ad91cd665ffff60a56a9c4f5e818e76f767
--- /dev/null
+++ b/out_tensor/model.layers.12.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3eac9cf397880af4c6cb35a91efacfc40f0c6e7dbf49e8b18e82ff8ab391e371
+size 16859880
diff --git a/out_tensor/model.layers.12.self_attn.q_proj.safetensors b/out_tensor/model.layers.12.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c59bdf5a9a9a8ea38bbea869b292cf6f431cc5c1
--- /dev/null
+++ b/out_tensor/model.layers.12.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e3e7f6d25e0e9f3acb2262fa26d50f9fd426b5fc832fa319538c48d81994854
+size 16859880
diff --git a/out_tensor/model.layers.12.self_attn.v_proj.safetensors b/out_tensor/model.layers.12.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bea98efcdd7d835cbc8e1912becd18be92574df9
--- /dev/null
+++ b/out_tensor/model.layers.12.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:893c79c250a02a7ef526359aa8a1306f1f64a8abe64f4e3c09ad5906e8f37b83
+size 4227808
diff --git a/out_tensor/model.layers.13.mlp.down_proj.safetensors b/out_tensor/model.layers.13.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..277cadceb32bac3e72bc50839de02fced4a02151
--- /dev/null
+++ b/out_tensor/model.layers.13.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89ed950d55c7b603a99889aee7cd5aa35690c0f4cceccc2d98f14118f44dfa38
+size 59008192
diff --git a/out_tensor/model.layers.13.mlp.gate_proj.safetensors b/out_tensor/model.layers.13.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7ef4b42cbc7f7cc43e34260ef8282de533dfdaad
--- /dev/null
+++ b/out_tensor/model.layers.13.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5283de009b90803d91a1662dd04d9ab185fa2b6ad2f2593b6add69127b68284b
+size 58966744
diff --git a/out_tensor/model.layers.13.mlp.up_proj.safetensors b/out_tensor/model.layers.13.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c668f83b6112292bdf9d69878b8b0ee26c1ff9e5
--- /dev/null
+++ b/out_tensor/model.layers.13.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:900a712455150eb07e780ab69817069f6798da3beef20f0495b9a2d9d2294823
+size 58966736
diff --git a/out_tensor/model.layers.13.self_attn.k_proj.safetensors b/out_tensor/model.layers.13.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fa2abe77f33c2354c1b660ea9037697da8a92ac5
--- /dev/null
+++ b/out_tensor/model.layers.13.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0baaa095e481486b0a418f3f54a564985397568156e00e22eb24fd2f551e8220
+size 4227808
diff --git a/out_tensor/model.layers.13.self_attn.o_proj.safetensors b/out_tensor/model.layers.13.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..748babb84363ff60b8d6ca1d82dafdd437d48716
--- /dev/null
+++ b/out_tensor/model.layers.13.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9bb0d4aca2f52f5559853f7b786485fb4074a94a152af6065b7bdc8b7259f0df
+size 16859880
diff --git a/out_tensor/model.layers.13.self_attn.q_proj.safetensors b/out_tensor/model.layers.13.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..856847d6b0359d346906d8cf1b64b46291e26b8c
--- /dev/null
+++ b/out_tensor/model.layers.13.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3ad3a5b8f877b31b930b10e4b54c3ca0aa88f16526f12407a64073c15b90f30
+size 16859880
diff --git a/out_tensor/model.layers.13.self_attn.v_proj.safetensors b/out_tensor/model.layers.13.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b00ca15ffcc17d07236d71bf547e0f67cdc6cdf5
--- /dev/null
+++ b/out_tensor/model.layers.13.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e9349c4c1fef8ed17739c765ac59a49ac96cd4dca641d0de3b2276f97facd47
+size 4227808
diff --git a/out_tensor/model.layers.14.mlp.down_proj.safetensors b/out_tensor/model.layers.14.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..519d66f7957ec37907a197495117c3d0088f5b46
--- /dev/null
+++ b/out_tensor/model.layers.14.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19e28748d93fca74d80fe3b3c048f0b3a62481a37f6937bd1534646d76643ed5
+size 59008192
diff --git a/out_tensor/model.layers.14.mlp.gate_proj.safetensors b/out_tensor/model.layers.14.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ffd85a9a92bace3103193b01e38ee305b2709d3c
--- /dev/null
+++ b/out_tensor/model.layers.14.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b2fcfab871f96ee66638c563e136dbeb649148fd3963c2492d19947493c59fc
+size 58966744
diff --git a/out_tensor/model.layers.14.mlp.up_proj.safetensors b/out_tensor/model.layers.14.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..820f21411050eb9b010e4193c5a9780b6129f781
--- /dev/null
+++ b/out_tensor/model.layers.14.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4bb2bda7116f01685cce7b6f4186196a698a800b3a384c464d00851d17464b65
+size 58966736
diff --git a/out_tensor/model.layers.14.self_attn.k_proj.safetensors b/out_tensor/model.layers.14.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8e5c937d4dc56d51db4ac0446ae32f49d86d5c62
--- /dev/null
+++ b/out_tensor/model.layers.14.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dee5b6e16406e94db33848773de25960cd8cf1e7e4263d4de3c6d1ec11d26ae7
+size 4227808
diff --git a/out_tensor/model.layers.14.self_attn.o_proj.safetensors b/out_tensor/model.layers.14.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..181f45468932f4c305132f2937cfe7b7fb3a83de
--- /dev/null
+++ b/out_tensor/model.layers.14.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2238b14fecde21125a8be974d23c850e54db7621cbe1e834d5d43a3011d4d82f
+size 16859880
diff --git a/out_tensor/model.layers.14.self_attn.q_proj.safetensors b/out_tensor/model.layers.14.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5671faaba0c17c62f640e66b4d9fbd5a3e1a652e
--- /dev/null
+++ b/out_tensor/model.layers.14.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c778f2fc4d3e6d9be2708efea0bd01a1828ffde0a3ce9da6f1564d460327e3da
+size 16859880
diff --git a/out_tensor/model.layers.14.self_attn.v_proj.safetensors b/out_tensor/model.layers.14.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..616d588a586129928107036455002ffd59af6ca4
--- /dev/null
+++ b/out_tensor/model.layers.14.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f27d5b08c853d653594ac22a74dc4442691075189ab0a64cc510e84f1a172f6
+size 4227808
diff --git a/out_tensor/model.layers.15.mlp.down_proj.safetensors b/out_tensor/model.layers.15.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..41106fd73c1eb7cfae29e95d1fddb4686253820f
--- /dev/null
+++ b/out_tensor/model.layers.15.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f2570066c0145807854b37e0c79e89098238f2ab11c2c6ae3aa30e97fb0c304
+size 59008192
diff --git a/out_tensor/model.layers.15.mlp.gate_proj.safetensors b/out_tensor/model.layers.15.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..63e97e7362a2d0c15377849ad4835b9ee8190e41
--- /dev/null
+++ b/out_tensor/model.layers.15.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1de4820453bfbd34b02a13731c749bf147dadfbca563d10b9265c00d6395aa3
+size 58966744
diff --git a/out_tensor/model.layers.15.mlp.up_proj.safetensors b/out_tensor/model.layers.15.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e98dfea380db4111479854efa5859b6be87793e1
--- /dev/null
+++ b/out_tensor/model.layers.15.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0daaa3c67a22f9a597b5e50457ccc7e12dcccd7a6eae6bf61ef6ff9e655f6110
+size 58966736
diff --git a/out_tensor/model.layers.15.self_attn.k_proj.safetensors b/out_tensor/model.layers.15.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0f662b6138e5c2f7244315680da592d883de6422
--- /dev/null
+++ b/out_tensor/model.layers.15.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08596568db538887ec70bac0d2b6ec9b4e5cf72652a19f45fdb5992c2b88a545
+size 4227808
diff --git a/out_tensor/model.layers.15.self_attn.o_proj.safetensors b/out_tensor/model.layers.15.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..721b8248bfe3b5bf3220e89689cbac043d6bef66
--- /dev/null
+++ b/out_tensor/model.layers.15.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:608f3ab39b05e989b12b193ccadf7e730848cdee39fae2dddb2bcfe73d542247
+size 16859880
diff --git a/out_tensor/model.layers.15.self_attn.q_proj.safetensors b/out_tensor/model.layers.15.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8f7becbae5b739b626450ed50c3341fe438a6c77
--- /dev/null
+++ b/out_tensor/model.layers.15.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddcbeed0990b7902420d6a3add8a7ab075e08268411f2f49899fa928c930aff5
+size 16859880
diff --git a/out_tensor/model.layers.15.self_attn.v_proj.safetensors b/out_tensor/model.layers.15.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f90914aa1563520b75d158cb8cb9e25f3fc60cb3
--- /dev/null
+++ b/out_tensor/model.layers.15.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7180cb21d479f916d572f20cd018b7d0eaba842f679a7c62139673f51ed748e
+size 4227808
diff --git a/out_tensor/model.layers.16.mlp.down_proj.safetensors b/out_tensor/model.layers.16.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d09212e144b236c4c1a4ac08b5824a105445b5c8
--- /dev/null
+++ b/out_tensor/model.layers.16.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:973a89e92f26ba8c3ab178bc51cffcb4656651ea832911e9d56f962b564745be
+size 59008192
diff --git a/out_tensor/model.layers.16.mlp.gate_proj.safetensors b/out_tensor/model.layers.16.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..37dd12bb2684cab1c1fea9407aad0933bcf60822
--- /dev/null
+++ b/out_tensor/model.layers.16.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2236689151645c95484bbb52cac5582bb81dc0010077e4e799f8598d8f1b0556
+size 58966744
diff --git a/out_tensor/model.layers.16.mlp.up_proj.safetensors b/out_tensor/model.layers.16.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b07ff41ea0f166da39089f05bb82075947c7460f
--- /dev/null
+++ b/out_tensor/model.layers.16.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7a916c859567e42d2c2a7e4de02c0c5a1da69cb1fa6f8d628f6b7497e24a1c7
+size 58966736
diff --git a/out_tensor/model.layers.16.self_attn.k_proj.safetensors b/out_tensor/model.layers.16.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..958a11d5c752081f547ff8adda001ee517e180d3
--- /dev/null
+++ b/out_tensor/model.layers.16.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4d9b037f2a0b8d9cb0bee2de5ca0cade176dc3c8c090ebd0e4abd133e13dbe8
+size 3228960
diff --git a/out_tensor/model.layers.16.self_attn.o_proj.safetensors b/out_tensor/model.layers.16.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bad4af0c7e66b9d9e5bdb0a4622a18bc219fe5fb
--- /dev/null
+++ b/out_tensor/model.layers.16.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e25e36971cfe98a11b5ee3e9daafb7d4407eca2cdf0169b5b019e9d5e464049
+size 12862760
diff --git a/out_tensor/model.layers.16.self_attn.q_proj.safetensors b/out_tensor/model.layers.16.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dd2798cecce273dad53ad0061b8ab62ae9b05cc7
--- /dev/null
+++ b/out_tensor/model.layers.16.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40bae9459fb072768bb75e90234b215840164f8c868c5d8aaf594387d058dddd
+size 12862760
diff --git a/out_tensor/model.layers.16.self_attn.v_proj.safetensors b/out_tensor/model.layers.16.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..35a5ad919070de15d527824db952fea83a910111
--- /dev/null
+++ b/out_tensor/model.layers.16.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a66768606d90ab950f230efebc4e641ced36a39f78cd5160bb9d5e334159d87
+size 4277536
diff --git a/out_tensor/model.layers.17.mlp.down_proj.safetensors b/out_tensor/model.layers.17.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a9cd5f1836af4bb1c040af0611066aeeecd45167
--- /dev/null
+++ b/out_tensor/model.layers.17.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:348ace0a816945b111729743456568a238f676ffe1dabc2d73ff44413a7d68dd
+size 59008192
diff --git a/out_tensor/model.layers.17.mlp.gate_proj.safetensors b/out_tensor/model.layers.17.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ed17aa90f68829df0a0c683118b0ee251197cd20
--- /dev/null
+++ b/out_tensor/model.layers.17.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0fe849c924319c8fa932d1f66747facbb2b67a029851bad0d99e2307a7bb1977
+size 58966744
diff --git a/out_tensor/model.layers.17.mlp.up_proj.safetensors b/out_tensor/model.layers.17.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..01fc512adff1659afe098dfb0db19d5fbee4e00e
--- /dev/null
+++ b/out_tensor/model.layers.17.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0c645ff1e94579ac6f702d1016d10e46f1a299ab45cbe38663146f3c7bb46cf
+size 58966736
diff --git a/out_tensor/model.layers.17.self_attn.k_proj.safetensors b/out_tensor/model.layers.17.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..971dc4f5e79555bc11cff2a11416e1a87014eb4a
--- /dev/null
+++ b/out_tensor/model.layers.17.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7c3812d7b182f4b0d8c7f53d1fd22d5beebd0d313cd9d135dc98e5af0f13877
+size 3228960
diff --git a/out_tensor/model.layers.17.self_attn.o_proj.safetensors b/out_tensor/model.layers.17.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..427c062cfee285f7f864d5b95f95ebdba2d435ab
--- /dev/null
+++ b/out_tensor/model.layers.17.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96547719f8d729dbe0c022ee3eb9ebc7153f3b035a66061248051fd0338c3c85
+size 12862760
diff --git a/out_tensor/model.layers.17.self_attn.q_proj.safetensors b/out_tensor/model.layers.17.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c1d32bc86061d3b3d222c7976f9e3e01bd6edeac
--- /dev/null
+++ b/out_tensor/model.layers.17.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:978084f9b14fe5fb4b2570c8ba295273b9452459ea71fdea5fb0367eb47f3225
+size 12862760
diff --git a/out_tensor/model.layers.17.self_attn.v_proj.safetensors b/out_tensor/model.layers.17.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..12fc7502dd716e2b61bd99f82bd5ead760354937
--- /dev/null
+++ b/out_tensor/model.layers.17.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84ccc5d7f0a1684f7d3f90603ac224c75ccc24f0ff4e453afdc7108fbda2b9e5
+size 4277536
diff --git a/out_tensor/model.layers.18.mlp.down_proj.safetensors b/out_tensor/model.layers.18.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..53259ab758776228a4b71b70c2f70399cacbb32b
--- /dev/null
+++ b/out_tensor/model.layers.18.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcc60c9d625ce13a5a32c68c9dbcb55b6c7e49c77f98276469510b48f1dbfc10
+size 59008192
diff --git a/out_tensor/model.layers.18.mlp.gate_proj.safetensors b/out_tensor/model.layers.18.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..94bc2aa15b2875ebb26e48b350eaec24d33365e0
--- /dev/null
+++ b/out_tensor/model.layers.18.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4a7908ea0e885bc4d47c53638f001230e2f027568e2f89ae5e4a15d7f00bc14
+size 58966744
diff --git a/out_tensor/model.layers.18.mlp.up_proj.safetensors b/out_tensor/model.layers.18.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..56fcd2fb726d884f4bfd6d99c0cecdab361376b7
--- /dev/null
+++ b/out_tensor/model.layers.18.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c69e0bbd03f62153be1148827bb54622781ed675e21fe2ee096daf2242a88afa
+size 58966736
diff --git a/out_tensor/model.layers.18.self_attn.k_proj.safetensors b/out_tensor/model.layers.18.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..23b99d241612d47af14072f4477bb63f4039cc6c
--- /dev/null
+++ b/out_tensor/model.layers.18.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1797338432b2681c3f87a1cf8d3405dc384b785214b1684d483a650b39e65333
+size 4227808
diff --git a/out_tensor/model.layers.18.self_attn.o_proj.safetensors b/out_tensor/model.layers.18.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4e804636707b0e71ce610498cdc93fb60f7c4bc9
--- /dev/null
+++ b/out_tensor/model.layers.18.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ac8befa8b202354ed1f743377ec4cbba75815b332b185f12e31fffd7536f10a
+size 16859880
diff --git a/out_tensor/model.layers.18.self_attn.q_proj.safetensors b/out_tensor/model.layers.18.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..aa13c1350b46a5f50400cf904327de201cb5e765
--- /dev/null
+++ b/out_tensor/model.layers.18.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:52bf522f9ff902d54fb04b6fd1a8ab6902426235c14b51217bb718190c5df824
+size 16859880
diff --git a/out_tensor/model.layers.18.self_attn.v_proj.safetensors b/out_tensor/model.layers.18.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9a3a9670270eaba45c5addba874f75a0700716e0
--- /dev/null
+++ b/out_tensor/model.layers.18.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4901f7854a8e11cec2ab78f7d2af6b92817134f760a0abbf6deb8cfdbb3171b1
+size 4227808
diff --git a/out_tensor/model.layers.19.mlp.down_proj.safetensors b/out_tensor/model.layers.19.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c4ac15e8532bd537239e783ede52b603df3c8934
--- /dev/null
+++ b/out_tensor/model.layers.19.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4aca8504ecc7a7dc3d2a4d68f954a8d8882ef7332fbcaa1d8c4beb25035d3123
+size 59008192
diff --git a/out_tensor/model.layers.19.mlp.gate_proj.safetensors b/out_tensor/model.layers.19.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6d40e86102e835082d1b5417ee7f3af864e4a266
--- /dev/null
+++ b/out_tensor/model.layers.19.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e74e7bdec9025513a8226a795a333704dc44cff9b3649f484ac465ac02f80597
+size 58966744
diff --git a/out_tensor/model.layers.19.mlp.up_proj.safetensors b/out_tensor/model.layers.19.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a5d5544e27fbdbb1b5e96131f2b6ddace7321cad
--- /dev/null
+++ b/out_tensor/model.layers.19.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c31dfa3b393155e3e62b7ee992380741c438c90b7970a8707515f3a90b4231c
+size 58966736
diff --git a/out_tensor/model.layers.19.self_attn.k_proj.safetensors b/out_tensor/model.layers.19.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5b2103e60fbb130866536562e1a11083e45cf089
--- /dev/null
+++ b/out_tensor/model.layers.19.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:885a09433bfb856c15de16a818709590d5b66ff03415a48bbf6abbf88c934d0d
+size 4227808
diff --git a/out_tensor/model.layers.19.self_attn.o_proj.safetensors b/out_tensor/model.layers.19.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7373ccb845b4b0afbab63bb6bb47c74b24bc778f
--- /dev/null
+++ b/out_tensor/model.layers.19.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eed312ffcd7f02a45b2137bfa49afee9ab7c7d57d598f298300e58742248fd3e
+size 16859880
diff --git a/out_tensor/model.layers.19.self_attn.q_proj.safetensors b/out_tensor/model.layers.19.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0e9198be6bd0f17da3d3efd8e51ea66537346a1b
--- /dev/null
+++ b/out_tensor/model.layers.19.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:762494babdb644a46d50d29a141e93368e885eef4cb1834540e0478095f140fe
+size 16859880
diff --git a/out_tensor/model.layers.19.self_attn.v_proj.safetensors b/out_tensor/model.layers.19.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a3f49ed1c5a85d9f262ca08c7235d4bbcc569a7b
--- /dev/null
+++ b/out_tensor/model.layers.19.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b101650c7ca4d978707051020fcd42ec1b2631a1678b9f2e3b756d90233f55b
+size 4227808
diff --git a/out_tensor/model.layers.2.mlp.down_proj.safetensors b/out_tensor/model.layers.2.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..015dea6231fde675ee2df11088fa7de40b8bdcab
--- /dev/null
+++ b/out_tensor/model.layers.2.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99b8b2dfe3d9064a8f4b48c163f8af5e2fd135453b4bcd02cad08fecc224ba95
+size 59008184
diff --git a/out_tensor/model.layers.2.mlp.gate_proj.safetensors b/out_tensor/model.layers.2.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..caa0bd2b7b0234352b383cb8270278a15618157c
--- /dev/null
+++ b/out_tensor/model.layers.2.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f92d474c0505b2847ed6fc3289a2be965e8ff587b27b511dbd17dd4210e95975
+size 58966744
diff --git a/out_tensor/model.layers.2.mlp.up_proj.safetensors b/out_tensor/model.layers.2.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1ac13d5ea0f54fdb233030cf3bd9882848c68615
--- /dev/null
+++ b/out_tensor/model.layers.2.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b841f42ed19f870ff0ddf52b9576625caeb07e1ab3180caa84cab4d0382fcf87
+size 58966728
diff --git a/out_tensor/model.layers.2.self_attn.k_proj.safetensors b/out_tensor/model.layers.2.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fa769cf6391fbbb9e0ddb8904b3853caebd158fb
--- /dev/null
+++ b/out_tensor/model.layers.2.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72616d2d555449d6375e6f56a3d3c4c9391ea587ece519429ab7497154e8c0dd
+size 4227800
diff --git a/out_tensor/model.layers.2.self_attn.o_proj.safetensors b/out_tensor/model.layers.2.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dc6070b83241bbd049381c80fde30836e3e157e0
--- /dev/null
+++ b/out_tensor/model.layers.2.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b68d8bbcaccf03e8bfd88753bb5b2f861fbde1b11c4b2d8c1abe9609254a8b40
+size 16859872
diff --git a/out_tensor/model.layers.2.self_attn.q_proj.safetensors b/out_tensor/model.layers.2.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c7495f0ca1ac5f8e8c3565c6b28c549eeea9840f
--- /dev/null
+++ b/out_tensor/model.layers.2.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:edcc33dc039a596dd4920637a895152385034b78341af68cfbc5e4ff63054977
+size 16859872
diff --git a/out_tensor/model.layers.2.self_attn.v_proj.safetensors b/out_tensor/model.layers.2.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..12a17cf75c09f8be4c90062649ea7199e7869ceb
--- /dev/null
+++ b/out_tensor/model.layers.2.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9e3f61ee88ab815c1495da4d7bf90162ac2719f4bcef8da5454f99c75d1bc2c
+size 4227800
diff --git a/out_tensor/model.layers.20.mlp.down_proj.safetensors b/out_tensor/model.layers.20.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..632f059ac9ea037a5feb0def03ff68cec194d97d
--- /dev/null
+++ b/out_tensor/model.layers.20.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c60db21c611fdca5576031d4797c73feed08a6ee5c5899fc01af483104664db2
+size 59008192
diff --git a/out_tensor/model.layers.20.mlp.gate_proj.safetensors b/out_tensor/model.layers.20.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ced7454db5940fc9a0ed2c9b5a8564e39bc22379
--- /dev/null
+++ b/out_tensor/model.layers.20.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:354cd53ce45cf36b6d9744923551b472f46aa7fa6884f777e07c38f2763cb904
+size 58966744
diff --git a/out_tensor/model.layers.20.mlp.up_proj.safetensors b/out_tensor/model.layers.20.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e4af665939f978899ef7e8200868a1e59a96ee89
--- /dev/null
+++ b/out_tensor/model.layers.20.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a77ad44e781d72f6cb91c8aaea6b66617a824dfa1522011ea2dfc950b27794f
+size 58966736
diff --git a/out_tensor/model.layers.20.self_attn.k_proj.safetensors b/out_tensor/model.layers.20.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5768e88944c78b3aa0a11645f3c7d7908b0aefe0
--- /dev/null
+++ b/out_tensor/model.layers.20.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0be441efd101f7a12f4fb9b300b786d9b6a25c4f1aa3b5e7935387c57b2dc03e
+size 4227808
diff --git a/out_tensor/model.layers.20.self_attn.o_proj.safetensors b/out_tensor/model.layers.20.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4294f6c085b711d5c013c66d207ec1e964e6e131
--- /dev/null
+++ b/out_tensor/model.layers.20.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b16391878711b46ee9040ccf0a247d2dbbb935752b5de5c96081265fe8a12dd9
+size 16859880
diff --git a/out_tensor/model.layers.20.self_attn.q_proj.safetensors b/out_tensor/model.layers.20.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1baf01188faa6c98a37b8800e1e4e65db6e1f7a0
--- /dev/null
+++ b/out_tensor/model.layers.20.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6bcfbbc7289b1bb7934400c87fde08eb18d845af63045f59ff5729cd37abbc60
+size 16859880
diff --git a/out_tensor/model.layers.20.self_attn.v_proj.safetensors b/out_tensor/model.layers.20.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..90251976d09aceeb5a4c5a36b98cebbffbdb7723
--- /dev/null
+++ b/out_tensor/model.layers.20.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2aa9212ff12f6ea99ba57519adeb596f987d5c375ffaada83181da430836e453
+size 4227808
diff --git a/out_tensor/model.layers.21.mlp.down_proj.safetensors b/out_tensor/model.layers.21.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2e50e31e31273cf723b0166bf2158b90f09a799c
--- /dev/null
+++ b/out_tensor/model.layers.21.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e351305cb3c46b4772fe706cdf6c85e27a73fe30a3dfa67c175faed0ec5a235
+size 59008192
diff --git a/out_tensor/model.layers.21.mlp.gate_proj.safetensors b/out_tensor/model.layers.21.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3a7a7acd9fb1fed6a0940011934736f5f5c710bd
--- /dev/null
+++ b/out_tensor/model.layers.21.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b1f109b7cca2002be3cc25fa8ddb690a9640c415f5caa76bb4d70bcbda3830b
+size 58966744
diff --git a/out_tensor/model.layers.21.mlp.up_proj.safetensors b/out_tensor/model.layers.21.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..26d78a4fce05bc91f2cb33b44db9997e136b6df4
--- /dev/null
+++ b/out_tensor/model.layers.21.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68c39801b0b062f5ba29d20ae793b118024ccef1bcf09e6986ac46289fe7043d
+size 58966736
diff --git a/out_tensor/model.layers.21.self_attn.k_proj.safetensors b/out_tensor/model.layers.21.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..968536fbabf902a159cd08e06d19931c12216740
--- /dev/null
+++ b/out_tensor/model.layers.21.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc97e8d77e8a312ec9213c72c63ea12a0d1909f7a88b93134286aa511cba9b09
+size 4227808
diff --git a/out_tensor/model.layers.21.self_attn.o_proj.safetensors b/out_tensor/model.layers.21.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3586668710f08ef3b4d50d096e9de180848a0d6f
--- /dev/null
+++ b/out_tensor/model.layers.21.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc3ed6ed538f42ab1ea43e2602b082e8ec371398d15ba7de3572523274d83e91
+size 16859880
diff --git a/out_tensor/model.layers.21.self_attn.q_proj.safetensors b/out_tensor/model.layers.21.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..16ba22c37422ef91ac256777e054f6cbd362238f
--- /dev/null
+++ b/out_tensor/model.layers.21.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25cd0fc6a190c875d7dd543f011b7c128855bd7cc8b59d8269437f6472346e23
+size 16859880
diff --git a/out_tensor/model.layers.21.self_attn.v_proj.safetensors b/out_tensor/model.layers.21.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b7017c4ca93925402ed8dfb9eae22779401ebb99
--- /dev/null
+++ b/out_tensor/model.layers.21.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae465f75b8e91c148f3330f7645ff3db3d3c1a1d1b668cb2ef5dd1e1ee1466cf
+size 4227808
diff --git a/out_tensor/model.layers.22.mlp.down_proj.safetensors b/out_tensor/model.layers.22.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3a877ee2f3a0f6dffe65ab61c314bd501b20735b
--- /dev/null
+++ b/out_tensor/model.layers.22.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d96b2edef5155ebd990d60868057075decb7f1c117978473d95251b61b848beb
+size 59008192
diff --git a/out_tensor/model.layers.22.mlp.gate_proj.safetensors b/out_tensor/model.layers.22.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a2977b88532a6ef87dc1f4f2c5920daa3e535ae0
--- /dev/null
+++ b/out_tensor/model.layers.22.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:720ea9ed6f9703d548825e59b0a0b8ae0c1c233458240a1c8538d5c433297751
+size 58966744
diff --git a/out_tensor/model.layers.22.mlp.up_proj.safetensors b/out_tensor/model.layers.22.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..59dfb3a8cf817306a35f50bb344f46d1cd916fc2
--- /dev/null
+++ b/out_tensor/model.layers.22.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d562e1f1fe7386e27ded55d3150a9c866dd4cdb350cdde4313b02fbbf02ecdd3
+size 58966736
diff --git a/out_tensor/model.layers.22.self_attn.k_proj.safetensors b/out_tensor/model.layers.22.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d5ba2728658131a0493cb8e68fe11494efe422e9
--- /dev/null
+++ b/out_tensor/model.layers.22.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f49053c494a42065960cadc72466257c252e9d5912c5f5461734cee04f742f5e
+size 3228960
diff --git a/out_tensor/model.layers.22.self_attn.o_proj.safetensors b/out_tensor/model.layers.22.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..023a802ab8724c25c4b023ca333907dba309fcdc
--- /dev/null
+++ b/out_tensor/model.layers.22.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb06d9cfbf7f830689f6e68af8ce75de11ad65cdf0c89149f37eaa8f1e5cf2ad
+size 12862760
diff --git a/out_tensor/model.layers.22.self_attn.q_proj.safetensors b/out_tensor/model.layers.22.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0a9c16c755c4bfc9b693fe02240aef9bf4ec0f22
--- /dev/null
+++ b/out_tensor/model.layers.22.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81102b8c8b0be4277668e9a475a4974685f643c5d5310d415705e7d5f009309c
+size 12862760
diff --git a/out_tensor/model.layers.22.self_attn.v_proj.safetensors b/out_tensor/model.layers.22.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..18f1726197f156110b489cc6dcecb15117a4b1bb
--- /dev/null
+++ b/out_tensor/model.layers.22.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd847f563a09c44b94687ad22d9095b6ffdd4e792c1c5771bfa2d6309a659a88
+size 4277536
diff --git a/out_tensor/model.layers.23.mlp.down_proj.safetensors b/out_tensor/model.layers.23.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bd39e2cd66d3becfb32979ba90342e10a234e206
--- /dev/null
+++ b/out_tensor/model.layers.23.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:544ffbf2f59371b8e0417b4ec0b17b4a8cbc4886260a45c62ec18a4e8e048ac6
+size 59008192
diff --git a/out_tensor/model.layers.23.mlp.gate_proj.safetensors b/out_tensor/model.layers.23.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d642782c2ee66502ac9e7951302aa72123444952
--- /dev/null
+++ b/out_tensor/model.layers.23.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e68ca4e37d40152dbbf5ae363d569f00cccec2b326c28cbf38baf1c70832fc2f
+size 58966744
diff --git a/out_tensor/model.layers.23.mlp.up_proj.safetensors b/out_tensor/model.layers.23.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8ea7299b9cba3af3d7520926e97f191af1f7d6ab
--- /dev/null
+++ b/out_tensor/model.layers.23.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dab558d286a9a04fd2b6617784d870a306afaccedbd9adbc6ef1f56ff07bb28b
+size 58966736
diff --git a/out_tensor/model.layers.23.self_attn.k_proj.safetensors b/out_tensor/model.layers.23.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b3d470e6b2b6e41e3438514e4362731a80cb46dd
--- /dev/null
+++ b/out_tensor/model.layers.23.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c96a38cbb3b5a2754f6056469310ee34a8382d1703cb79e9992ebe6b28ff7386
+size 3228960
diff --git a/out_tensor/model.layers.23.self_attn.o_proj.safetensors b/out_tensor/model.layers.23.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fbb37e5fb99a109fa94f26c873c2934b0200bf72
--- /dev/null
+++ b/out_tensor/model.layers.23.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b37cfa56d554afa128ed6d9acbd580f3adb882262cbf84a6f7d2102bc4108ec2
+size 12862760
diff --git a/out_tensor/model.layers.23.self_attn.q_proj.safetensors b/out_tensor/model.layers.23.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..02d3f3c5b602c4db87648a4bc74ab723a44991d5
--- /dev/null
+++ b/out_tensor/model.layers.23.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa86450a48afdd8c53fe0cbfbd2e60d52a7ab54e05219c7f2976f903adb6524c
+size 12862760
diff --git a/out_tensor/model.layers.23.self_attn.v_proj.safetensors b/out_tensor/model.layers.23.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2138cc73c2a2eada8d59adf8fe2bf3ecd058c09f
--- /dev/null
+++ b/out_tensor/model.layers.23.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0d8f2f8695ac78dcd9c1c42ac59f320ab6c9c09847486da7a5407ee73bfcb50
+size 4277536
diff --git a/out_tensor/model.layers.24.mlp.down_proj.safetensors b/out_tensor/model.layers.24.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f037d13996a636769c55301f05121dc78ebcbe30
--- /dev/null
+++ b/out_tensor/model.layers.24.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4a1d7f783fcc1c2ac7044dc53485ce967036328c49531dc3846571302217c23
+size 59008192
diff --git a/out_tensor/model.layers.24.mlp.gate_proj.safetensors b/out_tensor/model.layers.24.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..220993cb079ddcb7bb5ded4a18316549c65a01e7
--- /dev/null
+++ b/out_tensor/model.layers.24.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0fe7fd718cc7232855a8f73d70f3597d5c89c994eea3563e20ed70f56c18420
+size 58966744
diff --git a/out_tensor/model.layers.24.mlp.up_proj.safetensors b/out_tensor/model.layers.24.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..885d7d79abb80e6216d141e0bc0bae0252722c5c
--- /dev/null
+++ b/out_tensor/model.layers.24.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d23d1f1617a36e9071fae3660351d863b2e605b0ff9d9e14ad722f4950f83f2
+size 58966736
diff --git a/out_tensor/model.layers.24.self_attn.k_proj.safetensors b/out_tensor/model.layers.24.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..054a322184149a68175f048ef12e740f434ad61a
--- /dev/null
+++ b/out_tensor/model.layers.24.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3eeed41c1ced28f47c9a331abb726f4253356c60f94b376b8e00c9363a9a45b0
+size 4227808
diff --git a/out_tensor/model.layers.24.self_attn.o_proj.safetensors b/out_tensor/model.layers.24.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d651eb777f9b038ec41599fc6a2b1c3e9767912d
--- /dev/null
+++ b/out_tensor/model.layers.24.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3518f477cfd8274a38888ac69adb55fa2b407060feeca121a63e5d49a3fcc118
+size 16859880
diff --git a/out_tensor/model.layers.24.self_attn.q_proj.safetensors b/out_tensor/model.layers.24.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fba3af0bcd3a9c7567384f6fcfd22a3083912c01
--- /dev/null
+++ b/out_tensor/model.layers.24.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0bab9e464c6a4d983603699c17a58eb271300e429c7e674cc286f5911093fa54
+size 16859880
diff --git a/out_tensor/model.layers.24.self_attn.v_proj.safetensors b/out_tensor/model.layers.24.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8cc7c2e4b9fe235f074238eecf776bf7ba4b09c0
--- /dev/null
+++ b/out_tensor/model.layers.24.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8ca0774f2f3a6ee2947cb1e76995fe743e947119dfce07305933bb3b3564b36
+size 4227808
diff --git a/out_tensor/model.layers.25.mlp.down_proj.safetensors b/out_tensor/model.layers.25.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..97e444ad66c5464c8b8d6f6003c98bd4eca4f804
--- /dev/null
+++ b/out_tensor/model.layers.25.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49e352a957850da951c12dab5b63b8a76a0604b90724be9bc7cb5b7db7d07252
+size 59008192
diff --git a/out_tensor/model.layers.25.mlp.gate_proj.safetensors b/out_tensor/model.layers.25.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cea4d5aa3e334e4e1fcbf579fb637c023aaa7ed8
--- /dev/null
+++ b/out_tensor/model.layers.25.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01f7bdf6731444863a5dbcbfc7671fdaece90f83632564cadbda1cb25a8e9883
+size 58966744
diff --git a/out_tensor/model.layers.25.mlp.up_proj.safetensors b/out_tensor/model.layers.25.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6abc98c311fe88437b4219000247b91e750d8943
--- /dev/null
+++ b/out_tensor/model.layers.25.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13047d795602f3e907362670d3625e168d6c2e45bd38bf8f400bbc7b34302340
+size 58966736
diff --git a/out_tensor/model.layers.25.self_attn.k_proj.safetensors b/out_tensor/model.layers.25.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6ea96f84ebecd962565fd04f68d17ba3ff23d442
--- /dev/null
+++ b/out_tensor/model.layers.25.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b8547a944a9dcf0bc2c8cd1283158ae03457fd03f4adeef595b9ed9f31c2056
+size 4227808
diff --git a/out_tensor/model.layers.25.self_attn.o_proj.safetensors b/out_tensor/model.layers.25.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..df71b2aeb07bddb0d99524e0b53c8c22154106db
--- /dev/null
+++ b/out_tensor/model.layers.25.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:677786ff8e23db3ef14ce3141a43d5d33556d47952dbd56802791772a07c36be
+size 16859880
diff --git a/out_tensor/model.layers.25.self_attn.q_proj.safetensors b/out_tensor/model.layers.25.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..263e085de002d3650da4047f6db13827db17929b
--- /dev/null
+++ b/out_tensor/model.layers.25.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b021db68f4766d28822e064f565cea9fbe8fe5655fc24b8624b5bbcd7d5bdc33
+size 16859880
diff --git a/out_tensor/model.layers.25.self_attn.v_proj.safetensors b/out_tensor/model.layers.25.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1905efa3e95fa4f148b333554716765fa78287e2
--- /dev/null
+++ b/out_tensor/model.layers.25.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c00554838e10d0d2217a98a7e3b06fdcfe454e7bfb1b2ca7bdaacf8436e2b55
+size 4227808
diff --git a/out_tensor/model.layers.26.mlp.down_proj.safetensors b/out_tensor/model.layers.26.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0476a0a079dd5fe4a450ca34653826f736ac4f57
--- /dev/null
+++ b/out_tensor/model.layers.26.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5197e65a4164b34beb0da69fa05edff85530420ad9585ab58d899fdea405375a
+size 59008192
diff --git a/out_tensor/model.layers.26.mlp.gate_proj.safetensors b/out_tensor/model.layers.26.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..741ce6c5b6133bcc56727404f8c3b3b8eb4942ff
--- /dev/null
+++ b/out_tensor/model.layers.26.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce278b180009220c1994f9ef595dfb771e2efcf2947cbb6a0e12cc5be94a5ca3
+size 58966744
diff --git a/out_tensor/model.layers.26.mlp.up_proj.safetensors b/out_tensor/model.layers.26.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f165186938b82e129571282e4053cc07348ba347
--- /dev/null
+++ b/out_tensor/model.layers.26.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0edc4ba2b6143ef7149bcedb9bd1709070ccfcd51a50640d77a56dc7efb6987
+size 58966736
diff --git a/out_tensor/model.layers.26.self_attn.k_proj.safetensors b/out_tensor/model.layers.26.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..687b8a7c69af1f7e729c5907fc80be5e531f956d
--- /dev/null
+++ b/out_tensor/model.layers.26.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:438c74ff4ae76884abf78cfd07976cb038da6b9e1c39f4cbf9c6a3906697468f
+size 4227808
diff --git a/out_tensor/model.layers.26.self_attn.o_proj.safetensors b/out_tensor/model.layers.26.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b9ae1dd552fc744f5549ca807a83e6a13bdb0dae
--- /dev/null
+++ b/out_tensor/model.layers.26.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a737033ffad7729dde809e4b89da0e8568803e04fcb8dfda6cef75af53c54d13
+size 16859880
diff --git a/out_tensor/model.layers.26.self_attn.q_proj.safetensors b/out_tensor/model.layers.26.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..93993a60d242fe57c865d5a63c829bb400630864
--- /dev/null
+++ b/out_tensor/model.layers.26.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36d00d3140253c2c8f9f6af6c4c817c5cc65fd3dc748e604dc401d25c2095fb1
+size 16859880
diff --git a/out_tensor/model.layers.26.self_attn.v_proj.safetensors b/out_tensor/model.layers.26.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ccc9e33272aa152b3a04e80a3a9fb6cc78f563a6
--- /dev/null
+++ b/out_tensor/model.layers.26.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:911762bbc0f3b34c99bd22f2c51d0a25a3193628b16d36f36f55d4c359730b2a
+size 4227808
diff --git a/out_tensor/model.layers.27.mlp.down_proj.safetensors b/out_tensor/model.layers.27.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a1647eedbde9e979f53a979a0a0a6ade1a45c439
--- /dev/null
+++ b/out_tensor/model.layers.27.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74b7b4c3d30f9c524e943af42a73787bf461dd1ad62c271e846372db773fa2e1
+size 59008192
diff --git a/out_tensor/model.layers.27.mlp.gate_proj.safetensors b/out_tensor/model.layers.27.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..83a8b1b9776cecd442d49e397c3ab458f5921e58
--- /dev/null
+++ b/out_tensor/model.layers.27.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfae02c974eab4fd24078b30e3198c62bcd4470e92cffde27cf429931a9c64c6
+size 58966744
diff --git a/out_tensor/model.layers.27.mlp.up_proj.safetensors b/out_tensor/model.layers.27.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..aa26bdd332852b2abc26402e7d9190ab6af41b03
--- /dev/null
+++ b/out_tensor/model.layers.27.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9960e13d7db1218be090dc71bf9ca40fdefea04fb3592187daec00019d2c8431
+size 58966736
diff --git a/out_tensor/model.layers.27.self_attn.k_proj.safetensors b/out_tensor/model.layers.27.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ee8c5130c7bab01a13644ead290605405514a1f8
--- /dev/null
+++ b/out_tensor/model.layers.27.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a8b379b9ffc9d2ca353ec6999a160492275988ff71293014e54a61e0e9cbf1a
+size 4227808
diff --git a/out_tensor/model.layers.27.self_attn.o_proj.safetensors b/out_tensor/model.layers.27.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3b18d51b2c21ce34b54410b32750d9478e0e031b
--- /dev/null
+++ b/out_tensor/model.layers.27.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12c9cad415b207dea2c291286f745a735e9566c8918af0c985d28777d1b6a817
+size 16859880
diff --git a/out_tensor/model.layers.27.self_attn.q_proj.safetensors b/out_tensor/model.layers.27.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f4afbe2375daabbc92e54f0e5f8e0a33d5a9689f
--- /dev/null
+++ b/out_tensor/model.layers.27.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08547aab0144d71a62d021c5a93241090cd514ff7c12afc911a21e0cf8ecb25e
+size 16859880
diff --git a/out_tensor/model.layers.27.self_attn.v_proj.safetensors b/out_tensor/model.layers.27.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..996924ea4c5638b51827b9afbafd352ffd6d7243
--- /dev/null
+++ b/out_tensor/model.layers.27.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6522fd938aba1511d3947b5f879af64fb35786594892b4f6d0d06ed8baf92d6c
+size 4227808
diff --git a/out_tensor/model.layers.28.mlp.down_proj.safetensors b/out_tensor/model.layers.28.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..798d294d3b23ad1d433ee5ec0a3bba836c767e50
--- /dev/null
+++ b/out_tensor/model.layers.28.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f76b01f5635ce8daa2b05e4df1fbcf1509e1f3de3dcc363d8a4437bf29d2d84
+size 59008192
diff --git a/out_tensor/model.layers.28.mlp.gate_proj.safetensors b/out_tensor/model.layers.28.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9199a8ad46e967852c816a48ec80d29fc63d15c1
--- /dev/null
+++ b/out_tensor/model.layers.28.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf2d88f2418a94e701b588b7fe42a8d94b8ac8263d6c025b08c77e28a927f991
+size 58966744
diff --git a/out_tensor/model.layers.28.mlp.up_proj.safetensors b/out_tensor/model.layers.28.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e193c1873b488d6f39f60132db9c10a668a9a68e
--- /dev/null
+++ b/out_tensor/model.layers.28.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9cf48e0f8e899b3c7fb1ab9757749746ee7aed8f13603f73e3a339200d7ddb9a
+size 58966736
diff --git a/out_tensor/model.layers.28.self_attn.k_proj.safetensors b/out_tensor/model.layers.28.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f5a8146d52208e945de8028e30357ad49c07524b
--- /dev/null
+++ b/out_tensor/model.layers.28.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a43fc7af495cf8fcc03ab3b27cadec940c7256cd85761ef2f6ee7624f3a10d2a
+size 3228960
diff --git a/out_tensor/model.layers.28.self_attn.o_proj.safetensors b/out_tensor/model.layers.28.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..809bec0b4b4b4861964249dd4fa7aa83a5607e1f
--- /dev/null
+++ b/out_tensor/model.layers.28.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64c77d055d800105b0e7e7261fe8c1d958a0ab9a3830898dbd3614b3be0094a4
+size 12862760
diff --git a/out_tensor/model.layers.28.self_attn.q_proj.safetensors b/out_tensor/model.layers.28.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..104f462f53c237171953574be00175b1a3bcda31
--- /dev/null
+++ b/out_tensor/model.layers.28.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6777aa1d7eb8fae4280e4616c0f702e747169140bf86866db2cd6469d96787c
+size 12862760
diff --git a/out_tensor/model.layers.28.self_attn.v_proj.safetensors b/out_tensor/model.layers.28.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5ab7400f262aa9705725eaaaf6dad973c5e01858
--- /dev/null
+++ b/out_tensor/model.layers.28.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b9db68f20f20a81b9b14514c0e6db15d977067575959e339208426397515235
+size 4277536
diff --git a/out_tensor/model.layers.29.mlp.down_proj.safetensors b/out_tensor/model.layers.29.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fb4fb93b32993d1586641654dc8351be5d941771
--- /dev/null
+++ b/out_tensor/model.layers.29.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44084cccb17ef7bf59ce0e3fea9b699422ea64ca0740430e0f7f59c9aa4e9547
+size 59008192
diff --git a/out_tensor/model.layers.29.mlp.gate_proj.safetensors b/out_tensor/model.layers.29.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1ea12383cef458b2b6c0a90654f1a33ddc7d4f1c
--- /dev/null
+++ b/out_tensor/model.layers.29.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25592245970f47f4427fbbbef6e134612389dcd4663a6231f9df67aa2e9f8f23
+size 58966744
diff --git a/out_tensor/model.layers.29.mlp.up_proj.safetensors b/out_tensor/model.layers.29.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0676b00a7317148f60f068d1d5918899fbc7b1b0
--- /dev/null
+++ b/out_tensor/model.layers.29.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ef922b9926ba2525d85bdadb83bc28d693fcdf4e007a55fa555399078624ac7
+size 58966736
diff --git a/out_tensor/model.layers.29.self_attn.k_proj.safetensors b/out_tensor/model.layers.29.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ff4fa95c8c7b2e70ae05e2143387f78f4ae435ab
--- /dev/null
+++ b/out_tensor/model.layers.29.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48148ec73f77c58592f3b68589fa3e2411911790605dc7351c49998968013f5d
+size 3228960
diff --git a/out_tensor/model.layers.29.self_attn.o_proj.safetensors b/out_tensor/model.layers.29.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a08b6f95e69b71601585e7c5cdc47eace3aafaa3
--- /dev/null
+++ b/out_tensor/model.layers.29.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a509aed47e1f38f47f044bdb5b98760d222fe8a048e23476c28b68256052df3
+size 12862760
diff --git a/out_tensor/model.layers.29.self_attn.q_proj.safetensors b/out_tensor/model.layers.29.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dd26f9da26ff1ebb23e3af0d8634044d62946020
--- /dev/null
+++ b/out_tensor/model.layers.29.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b079e60bba4a52b5eeed3c9a023974c8cac232d8ad0df66778ab4e506a2536b
+size 12862760
diff --git a/out_tensor/model.layers.29.self_attn.v_proj.safetensors b/out_tensor/model.layers.29.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0e86fe27cc6a6ae17f73a1cb2f476bd8502f5316
--- /dev/null
+++ b/out_tensor/model.layers.29.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b60a4633e85d640110222515f6a06c1b31cfc9a58acc65068a4cc1bd6377275
+size 4277536
diff --git a/out_tensor/model.layers.3.mlp.down_proj.safetensors b/out_tensor/model.layers.3.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9ef76d205117e1da5032b7b099e8ec9a5b1ace0e
--- /dev/null
+++ b/out_tensor/model.layers.3.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10b334a3dc1e463a41285182bcb9baf63b02178a7cd8deae4aa5dafbdf598c80
+size 59008184
diff --git a/out_tensor/model.layers.3.mlp.gate_proj.safetensors b/out_tensor/model.layers.3.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..07eb284f3eb08867571701c6de73f0037c5b2eb4
--- /dev/null
+++ b/out_tensor/model.layers.3.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e32c48e360f01b4cf527746d8f1248b3a8d9677f32188e5acf0a1308b84fc09
+size 58966744
diff --git a/out_tensor/model.layers.3.mlp.up_proj.safetensors b/out_tensor/model.layers.3.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6be3e1019a5266c44daca6913aa2ec1beb2b3887
--- /dev/null
+++ b/out_tensor/model.layers.3.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6fc32df4d0edb3a8f0b48f36d4937b00fd961f2d5bf07e9571eb78bb6fa4a00
+size 58966728
diff --git a/out_tensor/model.layers.3.self_attn.k_proj.safetensors b/out_tensor/model.layers.3.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..da28a8a62140cff39ed2cf7da845fcdfe8952919
--- /dev/null
+++ b/out_tensor/model.layers.3.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5af541e865d4290357adff5aa7f3c7fee804c23bf5c262fcd066a18b1f12fbb6
+size 4227800
diff --git a/out_tensor/model.layers.3.self_attn.o_proj.safetensors b/out_tensor/model.layers.3.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f103458bc394006ba7667d0b7d8481bdf8178027
--- /dev/null
+++ b/out_tensor/model.layers.3.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56418ec1f8bc1463a01907cc329fbd4c4706a87b3eb6bee02891c66b5ee35ce6
+size 16859872
diff --git a/out_tensor/model.layers.3.self_attn.q_proj.safetensors b/out_tensor/model.layers.3.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b51fde46cae66071b754e976b9addb1525f7768d
--- /dev/null
+++ b/out_tensor/model.layers.3.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3f209bfbaf12680605eb0be70d6695742c72d52e024286b3abfacc220f59a3e
+size 16859872
diff --git a/out_tensor/model.layers.3.self_attn.v_proj.safetensors b/out_tensor/model.layers.3.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..309dafc727251b6471618398e4520856d3c7a83d
--- /dev/null
+++ b/out_tensor/model.layers.3.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d047472ce0bbd57057a6e91a5e6ab5a460d6c66c44506ee6e7f6e15abe86066
+size 4227800
diff --git a/out_tensor/model.layers.30.mlp.down_proj.safetensors b/out_tensor/model.layers.30.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..002413fc9aaf568e6e5203026acc96054185a6eb
--- /dev/null
+++ b/out_tensor/model.layers.30.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a51bffd5fb690291603783b444260002794b2abd6837a6549da85fbe90f4f5f9
+size 59008192
diff --git a/out_tensor/model.layers.30.mlp.gate_proj.safetensors b/out_tensor/model.layers.30.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..19f830e672e4d4e018e7b12d5aa02e7b5c52ccd5
--- /dev/null
+++ b/out_tensor/model.layers.30.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:077bf47e76e32dc44cc92a29b4fa809380a2198971d83bc85d88bccb4606b22a
+size 58966744
diff --git a/out_tensor/model.layers.30.mlp.up_proj.safetensors b/out_tensor/model.layers.30.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..aef808bc8238f7a745e26c057b27b9fffee082b3
--- /dev/null
+++ b/out_tensor/model.layers.30.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0639f4afc9f9c51e2ac74ef5ecefc7c5621cf59985c01ce7b85686c57a8e56b
+size 58966736
diff --git a/out_tensor/model.layers.30.self_attn.k_proj.safetensors b/out_tensor/model.layers.30.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..777d2bfd551d8d4a6d1b1d1c1d6578560cf83ad3
--- /dev/null
+++ b/out_tensor/model.layers.30.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0691893223f3cd81311ea934a1b1ce517d2d115de94024d30d787aa22167bdae
+size 4227808
diff --git a/out_tensor/model.layers.30.self_attn.o_proj.safetensors b/out_tensor/model.layers.30.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..00b595939ba9f3fe2f6ccec82a81c2ab228493dc
--- /dev/null
+++ b/out_tensor/model.layers.30.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2752b035ff54392d4fc5adc294cf90d298bb4a6c5f6aee591b77f5c9ed2dba46
+size 16859880
diff --git a/out_tensor/model.layers.30.self_attn.q_proj.safetensors b/out_tensor/model.layers.30.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5f6699912ee4bce64d3a1bd473c7b22dd89368a0
--- /dev/null
+++ b/out_tensor/model.layers.30.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a227ac755a9ca05bcb563537c34347861142923581664dc3567ef950ab8dc7c4
+size 16859880
diff --git a/out_tensor/model.layers.30.self_attn.v_proj.safetensors b/out_tensor/model.layers.30.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cc5b703784abcc6ec2a810f56a76aab1b442e3c8
--- /dev/null
+++ b/out_tensor/model.layers.30.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6230c339ac417e55a95002dfa498174ba1067005846e12666df72bb5668a4c9
+size 4227808
diff --git a/out_tensor/model.layers.31.mlp.down_proj.safetensors b/out_tensor/model.layers.31.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ff5d9b7c81a3286ccb1517dc42c2ab2f83c93c54
--- /dev/null
+++ b/out_tensor/model.layers.31.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff5dfb79ba2a710c45177ee18d08adf7e46b5ca0ab9f195521c4c67b4d641936
+size 59008192
diff --git a/out_tensor/model.layers.31.mlp.gate_proj.safetensors b/out_tensor/model.layers.31.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..526ed85bd4514260496e42a391df8f48e7394638
--- /dev/null
+++ b/out_tensor/model.layers.31.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:47b667d0efb9f7926f48f6114412e3de271c45b011c52eecc10fd38e088fa96e
+size 58966744
diff --git a/out_tensor/model.layers.31.mlp.up_proj.safetensors b/out_tensor/model.layers.31.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ff256f417f7b7c5a745480e1691baf8387b17ec0
--- /dev/null
+++ b/out_tensor/model.layers.31.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91b4771324771f12df134f5fcd8d2c797c6ae8e2ecb79c50d40f91b02d483c37
+size 58966736
diff --git a/out_tensor/model.layers.31.self_attn.k_proj.safetensors b/out_tensor/model.layers.31.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4e0b03c41767c44c28d1508907154015c6b22a54
--- /dev/null
+++ b/out_tensor/model.layers.31.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcfe2ecbf6a756a8a69738a61d62834966e12af50a888afa14ce3491721fd02d
+size 4227808
diff --git a/out_tensor/model.layers.31.self_attn.o_proj.safetensors b/out_tensor/model.layers.31.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..016476c1779a39d8fa2fc2c660176a807adc3a57
--- /dev/null
+++ b/out_tensor/model.layers.31.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:139532783087b1d3260aaa21b69d35b80c60db1fc842e36defcd0d324cc0a5c8
+size 16859880
diff --git a/out_tensor/model.layers.31.self_attn.q_proj.safetensors b/out_tensor/model.layers.31.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4929d379dde6f79503240b2dd57ab99cd17b0708
--- /dev/null
+++ b/out_tensor/model.layers.31.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e7058f29d0d5d05789957aa9ba34ecf16ad2b97c5afbe1913a0bc5c6c14f03d
+size 16859880
diff --git a/out_tensor/model.layers.31.self_attn.v_proj.safetensors b/out_tensor/model.layers.31.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..592b5ae1c659c2eb8989fec25259f820cda0dc37
--- /dev/null
+++ b/out_tensor/model.layers.31.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d542287db2c5fd30d6453583b0aed8ac864647d321e4ab47071bcef9c602fb40
+size 4227808
diff --git a/out_tensor/model.layers.32.mlp.down_proj.safetensors b/out_tensor/model.layers.32.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..49d15676af0550460604ff9ca95c6f394674d9d6
--- /dev/null
+++ b/out_tensor/model.layers.32.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85039faf2db4d0afc4afc4b04862afd09ee5dc086acfaf77d31d045c2df99488
+size 59008192
diff --git a/out_tensor/model.layers.32.mlp.gate_proj.safetensors b/out_tensor/model.layers.32.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..deee0b3a563a1fbce9d2171f50d34f3ca411cc6c
--- /dev/null
+++ b/out_tensor/model.layers.32.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7429c558bb4b72471a7a712856d91df664f9d1068a3e3ebcc817a11e92a584c
+size 58966744
diff --git a/out_tensor/model.layers.32.mlp.up_proj.safetensors b/out_tensor/model.layers.32.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3e19889ea64353b9db35ce14a0a8da9517c83886
--- /dev/null
+++ b/out_tensor/model.layers.32.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3ab5465c115432c6dc2672f68dd8ac683b4433ffbe283b90f9648bda2bf8c2e
+size 58966736
diff --git a/out_tensor/model.layers.32.self_attn.k_proj.safetensors b/out_tensor/model.layers.32.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..24f46b0bb8226b243c854cd3e0733483370f6eed
--- /dev/null
+++ b/out_tensor/model.layers.32.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1612bf30dbd5b75615bb30c771bf4193d511121ba9f8a54aa0fc7a8d28f49dd6
+size 4227808
diff --git a/out_tensor/model.layers.32.self_attn.o_proj.safetensors b/out_tensor/model.layers.32.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5485746ee9a4fab265f4ebcb81edec9f5a422853
--- /dev/null
+++ b/out_tensor/model.layers.32.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d1eb35aa7cf60a8aad2000d572091df8cb58f300729fdfd4a089ce708175489
+size 16859880
diff --git a/out_tensor/model.layers.32.self_attn.q_proj.safetensors b/out_tensor/model.layers.32.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d6b7dbfa102c290ac6677a6ac7e276d8e610bda1
--- /dev/null
+++ b/out_tensor/model.layers.32.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99392388ba46c56d0d99a2d2a7fda2007a7689cf4b5c9d93b3da158e87b982a2
+size 16859880
diff --git a/out_tensor/model.layers.32.self_attn.v_proj.safetensors b/out_tensor/model.layers.32.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8b3860840c419e5c1188ae457b929e41a335036e
--- /dev/null
+++ b/out_tensor/model.layers.32.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5098b672104df4b14a8ee1d72e706c7fa44e9eb2768bcd0e8faf891a085edbc4
+size 4227808
diff --git a/out_tensor/model.layers.33.mlp.down_proj.safetensors b/out_tensor/model.layers.33.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..986e4968b0f2c4c2116187d0b948bb73e4721c6d
--- /dev/null
+++ b/out_tensor/model.layers.33.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b610f2e255ef2b174cb9d1e2b93fd81ce6b72e54de9d39455653814243deda45
+size 59008192
diff --git a/out_tensor/model.layers.33.mlp.gate_proj.safetensors b/out_tensor/model.layers.33.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..763142d2eaa6706b292e43d1054c4f8a390d0745
--- /dev/null
+++ b/out_tensor/model.layers.33.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3e5fcf4c590107843bc2a8cd29454745821fdef151285c16b94bc75a095891a
+size 58966744
diff --git a/out_tensor/model.layers.33.mlp.up_proj.safetensors b/out_tensor/model.layers.33.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..42a439de0c5301d21d764cff8ba0423ee2f5106b
--- /dev/null
+++ b/out_tensor/model.layers.33.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b715caa7d13150bf07cbb30817651388f5f0d8994f922cc162cecb2b746835a2
+size 58966736
diff --git a/out_tensor/model.layers.33.self_attn.k_proj.safetensors b/out_tensor/model.layers.33.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ce7de4be150694f87f2b4f2b183bfeb43f505334
--- /dev/null
+++ b/out_tensor/model.layers.33.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c3a227f689ebda40b108e0dadc7b0265ea4431ac482a4ed7c447bcd1901f8f3
+size 4227808
diff --git a/out_tensor/model.layers.33.self_attn.o_proj.safetensors b/out_tensor/model.layers.33.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ee0b089774ab53ee9ba1e81de2b79c76243e028e
--- /dev/null
+++ b/out_tensor/model.layers.33.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4d9c046710664307a1b6f233f0e07da552f3951c75773de4d34815ca12c3ede
+size 16859880
diff --git a/out_tensor/model.layers.33.self_attn.q_proj.safetensors b/out_tensor/model.layers.33.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3f97ccb2bc1d53433bc8de005f0a6b197c7d1b60
--- /dev/null
+++ b/out_tensor/model.layers.33.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:52226a0f90a1771c6f03a5e18f275e02bfffe8bc13bd040d00ea3b2ed3e10c78
+size 16859880
diff --git a/out_tensor/model.layers.33.self_attn.v_proj.safetensors b/out_tensor/model.layers.33.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c8992007d1d2ef4ec581c35126be87fe30ba4581
--- /dev/null
+++ b/out_tensor/model.layers.33.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7887bb75bee7592259818d488f4b8cd3685d5159e01f8196c2d3721ddf1ca5c
+size 4227808
diff --git a/out_tensor/model.layers.34.mlp.down_proj.safetensors b/out_tensor/model.layers.34.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1005b9129661fbe4da9607053fc0e10afea66b0c
--- /dev/null
+++ b/out_tensor/model.layers.34.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da4868bf16b823d2bce55e40cdabb1b050c74dc9e5b8e2df27a9995fc65decf7
+size 59008192
diff --git a/out_tensor/model.layers.34.mlp.gate_proj.safetensors b/out_tensor/model.layers.34.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7af5346d129b316c05d1069a29f035324f9ae934
--- /dev/null
+++ b/out_tensor/model.layers.34.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7896f5749826cbeb8e9fd783e0485acbd464a1db7b5505f15b482048c9f129b4
+size 58966744
diff --git a/out_tensor/model.layers.34.mlp.up_proj.safetensors b/out_tensor/model.layers.34.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e3ae84cbf7e4e4137c6ac8be39045ba3a838bc6a
--- /dev/null
+++ b/out_tensor/model.layers.34.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd00bea5bb53e35029a9a953ff18f2c42cffd486a2594a8db343d0dff3f5fdb8
+size 58966736
diff --git a/out_tensor/model.layers.34.self_attn.k_proj.safetensors b/out_tensor/model.layers.34.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..45427696ebfa5c14d26ef8f976694b52b17c1983
--- /dev/null
+++ b/out_tensor/model.layers.34.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d759a387e5143c3111d31dab80463c578260b556b2e2857d87fc45c810f5500
+size 3228960
diff --git a/out_tensor/model.layers.34.self_attn.o_proj.safetensors b/out_tensor/model.layers.34.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2c45ab5e72fac9cdb83b79fcbe6f944aa6d15358
--- /dev/null
+++ b/out_tensor/model.layers.34.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95325119c5587fd6083b40f3eb0c58b917faa1727442c4211f75a93458be6219
+size 12862760
diff --git a/out_tensor/model.layers.34.self_attn.q_proj.safetensors b/out_tensor/model.layers.34.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b4a9af11e44e2a67257bace9f80dc65a492fb3f9
--- /dev/null
+++ b/out_tensor/model.layers.34.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c19af4d9bedb6187f6b33dffb5fa9621d16c00673c97a693ff1b522fcf76cc38
+size 12862760
diff --git a/out_tensor/model.layers.34.self_attn.v_proj.safetensors b/out_tensor/model.layers.34.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..213f796e55bc43b22b3f965975cb7a5bdf76da35
--- /dev/null
+++ b/out_tensor/model.layers.34.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0755452c8cfe65df22ed7696e06173792768ca5d322a64ad0086f52b673412c
+size 4277536
diff --git a/out_tensor/model.layers.35.mlp.down_proj.safetensors b/out_tensor/model.layers.35.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3ed06a903bfab0c6bee0194a7a6a9fddb50c7c77
--- /dev/null
+++ b/out_tensor/model.layers.35.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40baf03e6893e08a096abfeb47873c346f9230f634eed0c3061e2abbba544098
+size 59008192
diff --git a/out_tensor/model.layers.35.mlp.gate_proj.safetensors b/out_tensor/model.layers.35.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b8a658b7680d312cf9b3eb0fc649afbdeb4c2ec1
--- /dev/null
+++ b/out_tensor/model.layers.35.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a2e04f1269f705dec7a26340bf96bd97097f3cbde661684bd426d13aaa4958b
+size 58966744
diff --git a/out_tensor/model.layers.35.mlp.up_proj.safetensors b/out_tensor/model.layers.35.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a1e6b6bda2ddec5266fc27c9d5eb04f41fad002f
--- /dev/null
+++ b/out_tensor/model.layers.35.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:83814b6781e7e3a9c3102c7544da5399ad5cb915ee9825ce2b8d9296c1c2bf49
+size 58966736
diff --git a/out_tensor/model.layers.35.self_attn.k_proj.safetensors b/out_tensor/model.layers.35.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0cffe7932c0a6431a399535041a85fe00fea49c2
--- /dev/null
+++ b/out_tensor/model.layers.35.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65c57437b76580b9ec3105dd05d41dd2421786d722900a3f853c9021ddcfd6e3
+size 3228960
diff --git a/out_tensor/model.layers.35.self_attn.o_proj.safetensors b/out_tensor/model.layers.35.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..79303a773749cca660d5353ebcee493473031128
--- /dev/null
+++ b/out_tensor/model.layers.35.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eeb32e0ca08bb9063800c3feba71850b53a25a9a973fa34749187b9d3ab57cd8
+size 12862760
diff --git a/out_tensor/model.layers.35.self_attn.q_proj.safetensors b/out_tensor/model.layers.35.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e4c09a413cb11dd21c14eb0c2f743a3eb9285839
--- /dev/null
+++ b/out_tensor/model.layers.35.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71263681ef5d684f4e6d1705c6b4a15aa6ca32279ffd4f797764a3c15e760151
+size 12862760
diff --git a/out_tensor/model.layers.35.self_attn.v_proj.safetensors b/out_tensor/model.layers.35.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..eca95b1ccd6d4f1d3472968fd27ee43724592157
--- /dev/null
+++ b/out_tensor/model.layers.35.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0413c725efc7fea89f0c8c3f0a82e205af2d19742ab66ba5c96a33f0031b9a0f
+size 4277536
diff --git a/out_tensor/model.layers.36.mlp.down_proj.safetensors b/out_tensor/model.layers.36.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..41c90959634b47f3e00f665e06ba26289ac3d99a
--- /dev/null
+++ b/out_tensor/model.layers.36.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01d0805c58149d3852adb73cee64e415e5ac599140d05acdc20291b0524b25a3
+size 59008192
diff --git a/out_tensor/model.layers.36.mlp.gate_proj.safetensors b/out_tensor/model.layers.36.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7e56937fc4e112fe1a230ec0eb45c12fd79a4542
--- /dev/null
+++ b/out_tensor/model.layers.36.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71b0c6a9b29f9785481db364f133b224caa848ee25ce95145bc6c999465b9914
+size 58966744
diff --git a/out_tensor/model.layers.36.mlp.up_proj.safetensors b/out_tensor/model.layers.36.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9e79dfda7771974ca4f0ea3867b8f884e03f918b
--- /dev/null
+++ b/out_tensor/model.layers.36.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24de6d901c024cf244a5c579add8cc810624cb9b1a2a7f5cbfef965a9152d5e2
+size 58966736
diff --git a/out_tensor/model.layers.36.self_attn.k_proj.safetensors b/out_tensor/model.layers.36.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..672ba4d5b4b5ef41a0cdb1961c31c935ad95e78c
--- /dev/null
+++ b/out_tensor/model.layers.36.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:144205fb7e4bcc113b54ba7e4ca6feef9448badd77c14607e6553e83b6ee7137
+size 4227808
diff --git a/out_tensor/model.layers.36.self_attn.o_proj.safetensors b/out_tensor/model.layers.36.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6223028c6735b0b1fb7691c75118cfd1dcf6df76
--- /dev/null
+++ b/out_tensor/model.layers.36.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:299234f6ca1a485dbcf722ebbadcac89ac51dc2b31c2783d554ba5f610b2c5cf
+size 16859880
diff --git a/out_tensor/model.layers.36.self_attn.q_proj.safetensors b/out_tensor/model.layers.36.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..745fec33dfe54f11b4dd7ef6dda0f87533e9e6c6
--- /dev/null
+++ b/out_tensor/model.layers.36.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1efd6256519115fbd6aecc953ca3fb529a77027c461444b4731a76d9a33a346
+size 16859880
diff --git a/out_tensor/model.layers.36.self_attn.v_proj.safetensors b/out_tensor/model.layers.36.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e1831ddc9296c84c8342b096aeb11b182444be76
--- /dev/null
+++ b/out_tensor/model.layers.36.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ef51dee26fca6eedf2e4af62d62496e560e094058491e194e2b6d5ebc5f18a4
+size 4227808
diff --git a/out_tensor/model.layers.37.mlp.down_proj.safetensors b/out_tensor/model.layers.37.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fd8d76e053b3d172f61409ce4470994da05e2f47
--- /dev/null
+++ b/out_tensor/model.layers.37.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a877d8d7d639a9368eb2751c25c311e168cc6311aee0a099985d57b37be88aa2
+size 59008192
diff --git a/out_tensor/model.layers.37.mlp.gate_proj.safetensors b/out_tensor/model.layers.37.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..60db9d82fbf07dadd28545380330423632237acf
--- /dev/null
+++ b/out_tensor/model.layers.37.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe8b1a5f072c07db034ec75b87e7286c7d180e4e57f2dae0b0bdd840e541287d
+size 58966744
diff --git a/out_tensor/model.layers.37.mlp.up_proj.safetensors b/out_tensor/model.layers.37.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..168d75157bbd1b075619f828323b1832a388fe68
--- /dev/null
+++ b/out_tensor/model.layers.37.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:206bf36b50984e147ce768f337714e149fa8240e6897d0555b6e24dd76188770
+size 58966736
diff --git a/out_tensor/model.layers.37.self_attn.k_proj.safetensors b/out_tensor/model.layers.37.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ef9dd897d03189d02eef781fc2353b712b012bb6
--- /dev/null
+++ b/out_tensor/model.layers.37.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b70579ad20e0a6146742eaf705fcf06de553b031085bc7eea632069089037911
+size 4227808
diff --git a/out_tensor/model.layers.37.self_attn.o_proj.safetensors b/out_tensor/model.layers.37.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9af1e1e2a4fba048deae1aed6badfee600d9c803
--- /dev/null
+++ b/out_tensor/model.layers.37.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32aee0ad361fbca70bfca70e015f4483534b013113c38f0e21303689e44e9bf7
+size 16859880
diff --git a/out_tensor/model.layers.37.self_attn.q_proj.safetensors b/out_tensor/model.layers.37.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..205f3a19de124fc23336f53f4292c12b0b06ca4d
--- /dev/null
+++ b/out_tensor/model.layers.37.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:124a6d307400fefbc29b0884e5e2c86fccb114762a727c287e07314290d28392
+size 16859880
diff --git a/out_tensor/model.layers.37.self_attn.v_proj.safetensors b/out_tensor/model.layers.37.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..07f3884d36b32a53dffb6b79859f7e87712a1a7e
--- /dev/null
+++ b/out_tensor/model.layers.37.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73825ef0bd6763c38d12f844507f44379e9a1d08a7339d8836f15e64d49a1fa6
+size 4227808
diff --git a/out_tensor/model.layers.38.mlp.down_proj.safetensors b/out_tensor/model.layers.38.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..be7bcdb7436eda38b67b010405af8b42655c6ff6
--- /dev/null
+++ b/out_tensor/model.layers.38.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9d56638f40012b2da5a0f40106ed7563f3a936d375f2956c6e5e8b11a7c5570
+size 59008192
diff --git a/out_tensor/model.layers.38.mlp.gate_proj.safetensors b/out_tensor/model.layers.38.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f09adb0800bd0b7963adbb844bff48c299a8b911
--- /dev/null
+++ b/out_tensor/model.layers.38.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2949dc7826e5648b48fa834764c4e16cf6d79c7d318bc09e8ee1e0d5242f81ba
+size 58966744
diff --git a/out_tensor/model.layers.38.mlp.up_proj.safetensors b/out_tensor/model.layers.38.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a4eabb108b161706f6e3623d9324fd69ba80e92d
--- /dev/null
+++ b/out_tensor/model.layers.38.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:331c8693cc71b2c88c2efbb23251a1d4d0915cf711faaaae7985fac19db4ec65
+size 58966736
diff --git a/out_tensor/model.layers.38.self_attn.k_proj.safetensors b/out_tensor/model.layers.38.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7df0d50335647dcebdef2f5f31b7ac83536d1dd4
--- /dev/null
+++ b/out_tensor/model.layers.38.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d5a25ec61988412ef9a27f37da2b204c07c608782c29b547506fb73b3947478
+size 4227808
diff --git a/out_tensor/model.layers.38.self_attn.o_proj.safetensors b/out_tensor/model.layers.38.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c8714aeb46668924d008c07a8d8abf9c2a22c84e
--- /dev/null
+++ b/out_tensor/model.layers.38.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:844752fd63667ea4fb37f2b59ee24d1b71c440faafb89410aa1f1fbfb178980b
+size 16859880
diff --git a/out_tensor/model.layers.38.self_attn.q_proj.safetensors b/out_tensor/model.layers.38.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1aed3d476624ba0846baa0573c37a11cd080653c
--- /dev/null
+++ b/out_tensor/model.layers.38.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2fceac380c363c28c1d045df645e48390fc01d443cfce508d9e23d9264e7774
+size 16859880
diff --git a/out_tensor/model.layers.38.self_attn.v_proj.safetensors b/out_tensor/model.layers.38.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..694979a6a8772af2d18965cbc8b320ee5540c2a5
--- /dev/null
+++ b/out_tensor/model.layers.38.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:673fe90f447d8ae96081826db3586b08998d9b492bdbef5b2c2b7272e57a872b
+size 4227808
diff --git a/out_tensor/model.layers.39.mlp.down_proj.safetensors b/out_tensor/model.layers.39.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ce6c6dfc2039507572d459bdc172ed1331d25336
--- /dev/null
+++ b/out_tensor/model.layers.39.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e8f2c9f91470b5b01c12962b011542d6449e638288e269da730d3dd62e50331
+size 59008192
diff --git a/out_tensor/model.layers.39.mlp.gate_proj.safetensors b/out_tensor/model.layers.39.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..90087cde818b55cce1d5cd025e5aaf34259d4fdf
--- /dev/null
+++ b/out_tensor/model.layers.39.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:358bf9ba48e9e66e3614cedbbd4b6e72c7c2168c5aaba9f5d8ac75a3d906ea14
+size 58966744
diff --git a/out_tensor/model.layers.39.mlp.up_proj.safetensors b/out_tensor/model.layers.39.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f8517da308b30f32f1f8cafcf014252a149f3be7
--- /dev/null
+++ b/out_tensor/model.layers.39.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14aec99b5e6f9fd657967d335573ff16829f62c49d8ab6da0fdb7f6faf8cc0d2
+size 58966736
diff --git a/out_tensor/model.layers.39.self_attn.k_proj.safetensors b/out_tensor/model.layers.39.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0004d5e4074df89c98588da39ce57ccb31b42b01
--- /dev/null
+++ b/out_tensor/model.layers.39.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28bff62fa71bf0297dab289586c3f0a0e3ad5f446b8da27b6458379988a8a715
+size 4227808
diff --git a/out_tensor/model.layers.39.self_attn.o_proj.safetensors b/out_tensor/model.layers.39.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..93e5fae6f76a92804fc832b9ade8100bd061afaa
--- /dev/null
+++ b/out_tensor/model.layers.39.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:578ada8e30507f1009769ce7d8d455be06bee0fc9c36adeb1cd72c1a13e758d8
+size 16859880
diff --git a/out_tensor/model.layers.39.self_attn.q_proj.safetensors b/out_tensor/model.layers.39.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c821923b0d775e8b946711cd26ba5c8743faca07
--- /dev/null
+++ b/out_tensor/model.layers.39.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38fdfe5b6c9d31350043755b0c4f502e7005d131d35733c4af6b7703772a3730
+size 16859880
diff --git a/out_tensor/model.layers.39.self_attn.v_proj.safetensors b/out_tensor/model.layers.39.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cfe2992d72a8396dfdf70cb797635187c89266ea
--- /dev/null
+++ b/out_tensor/model.layers.39.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f0a169cad5b86453ccd6bbbd0225dcd36fc432b4ea9a35ecf97138a800a22dd
+size 4227808
diff --git a/out_tensor/model.layers.4.mlp.down_proj.safetensors b/out_tensor/model.layers.4.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6ad4270f00fed13c52adb7e2e893b083f1edeef0
--- /dev/null
+++ b/out_tensor/model.layers.4.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b114a7a371256457669e61debadbdf6b12ef5ea462dda32d1ec1fa0542034692
+size 59008184
diff --git a/out_tensor/model.layers.4.mlp.gate_proj.safetensors b/out_tensor/model.layers.4.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d91c07e011fe79c66071d3c77141e0506fc2045a
--- /dev/null
+++ b/out_tensor/model.layers.4.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6252528c06afb0872a92e5f25c61201d4c4d3dcadb0d42be1f990da4689fe750
+size 58966744
diff --git a/out_tensor/model.layers.4.mlp.up_proj.safetensors b/out_tensor/model.layers.4.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1d35b463d96891411bbffcdcf53eb9530deab5e6
--- /dev/null
+++ b/out_tensor/model.layers.4.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de8fad0694cf7231bb1335c48ad8c50ce22b1d7f631f4bb9685fee9d2f3c255b
+size 58966728
diff --git a/out_tensor/model.layers.4.self_attn.k_proj.safetensors b/out_tensor/model.layers.4.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5d68bdbfa9c6be0478bd28cd607f8432078cc51c
--- /dev/null
+++ b/out_tensor/model.layers.4.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a053b26f1f3b033082e36b112dc43e16310dd44706d3e5526ee48a36daaa4032
+size 3228960
diff --git a/out_tensor/model.layers.4.self_attn.o_proj.safetensors b/out_tensor/model.layers.4.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b5a65d8b47a9c2121ae966b16a37e7dd0b4262d3
--- /dev/null
+++ b/out_tensor/model.layers.4.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf365b6f5d96d80168df4e7ae32a38de4d2280d7d4f16b4dc97857c7c8a163c4
+size 12862760
diff --git a/out_tensor/model.layers.4.self_attn.q_proj.safetensors b/out_tensor/model.layers.4.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..49c326323bdace3d28a49f817c244969703f45aa
--- /dev/null
+++ b/out_tensor/model.layers.4.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27b3a4136e5fbfe3f58083d4be8e9767599b079e80097796246f6566e9a49e37
+size 12862760
diff --git a/out_tensor/model.layers.4.self_attn.v_proj.safetensors b/out_tensor/model.layers.4.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..73ad8fcdf2c229e2231224d9739744fcbe2ba68f
--- /dev/null
+++ b/out_tensor/model.layers.4.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d467ef776e7b76708e3a67067b2a42ed2d6091d691c8ef948edca8807c19f146
+size 4277536
diff --git a/out_tensor/model.layers.40.mlp.down_proj.safetensors b/out_tensor/model.layers.40.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4a979b98a88285e94592d85228cd75bfc70e11c1
--- /dev/null
+++ b/out_tensor/model.layers.40.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3127914a95cedd9459d26ddfda951815469d0b3fac9bd55a22511d945471b25
+size 59008192
diff --git a/out_tensor/model.layers.40.mlp.gate_proj.safetensors b/out_tensor/model.layers.40.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b2011fd7fb840882cb1bae1798c08b238bf7fdba
--- /dev/null
+++ b/out_tensor/model.layers.40.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e74caf2e838f2cebb9cc75e5d95f3dcafbc970e77e5f03824f1ee7d7e70b2653
+size 58966744
diff --git a/out_tensor/model.layers.40.mlp.up_proj.safetensors b/out_tensor/model.layers.40.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..408208ee7e59d8ecd8044966a34dd7c579829501
--- /dev/null
+++ b/out_tensor/model.layers.40.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3d5afc5dd47652ad3e7ecf14aacd08fc2d1ea562ece718cdd9f06db1d147ac8
+size 58966736
diff --git a/out_tensor/model.layers.40.self_attn.k_proj.safetensors b/out_tensor/model.layers.40.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..585e95682878fe248be2818f29a14f3170aa24d8
--- /dev/null
+++ b/out_tensor/model.layers.40.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ee2ad30dc9c2c302176c6cb8a2674420d34f3b5ed9276248d734a921dcb79b9
+size 3228960
diff --git a/out_tensor/model.layers.40.self_attn.o_proj.safetensors b/out_tensor/model.layers.40.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cc307093e1031809b70cdcb253126eb381ce9794
--- /dev/null
+++ b/out_tensor/model.layers.40.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f520e18b364e6c5f4815fb86a0f841db725fe5cca49c433227373c28018bcb8
+size 12862760
diff --git a/out_tensor/model.layers.40.self_attn.q_proj.safetensors b/out_tensor/model.layers.40.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d65917c741bd766f1768869bf32ee1faae962471
--- /dev/null
+++ b/out_tensor/model.layers.40.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dc0451d69085c40f17d30031a82f01d93ab774a77a255c0fa4d38b761f29ef2d
+size 12862760
diff --git a/out_tensor/model.layers.40.self_attn.v_proj.safetensors b/out_tensor/model.layers.40.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b8b324068357adbd135aa9a597a43ddbc397d6b9
--- /dev/null
+++ b/out_tensor/model.layers.40.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3aeedb134b945691def774aad3837b5e8219114b4a9dac5a492a01e5bd47942d
+size 4277536
diff --git a/out_tensor/model.layers.41.mlp.down_proj.safetensors b/out_tensor/model.layers.41.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7546264e8db0ac3b3c11dba43b83ec2bf401645b
--- /dev/null
+++ b/out_tensor/model.layers.41.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6380781f228a3551dcaec5336a5eff099cede6def47ffa5c4f94dc6b0844b210
+size 59008192
diff --git a/out_tensor/model.layers.41.mlp.gate_proj.safetensors b/out_tensor/model.layers.41.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c46f09dcff4efe113d198b4e37c932663497eefd
--- /dev/null
+++ b/out_tensor/model.layers.41.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69781f909c6ba0f53ebfa2cdf937f3840861ddbeb9c151364a096cbe588e9703
+size 58966744
diff --git a/out_tensor/model.layers.41.mlp.up_proj.safetensors b/out_tensor/model.layers.41.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8a72f7215439ce48cd7355802f4a795634720b67
--- /dev/null
+++ b/out_tensor/model.layers.41.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a58c2bd22b85640ab9441d87b83fc32cd8e3e8f9ed8f01c9160dd59a07978888
+size 58966736
diff --git a/out_tensor/model.layers.41.self_attn.k_proj.safetensors b/out_tensor/model.layers.41.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a1eface6bfa36841b9793f3170d371780b0175ef
--- /dev/null
+++ b/out_tensor/model.layers.41.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b944becf24f91794da5100470f67307d87ab3afe497c16a2164deb47a8dc5ad
+size 3228960
diff --git a/out_tensor/model.layers.41.self_attn.o_proj.safetensors b/out_tensor/model.layers.41.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cc7fd5efe9c48e339766416a7a7e43c10243f20e
--- /dev/null
+++ b/out_tensor/model.layers.41.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:449985c27cd8538912f8a868ade83630bf68ae15fff1bedb3238358b465b63fe
+size 12862760
diff --git a/out_tensor/model.layers.41.self_attn.q_proj.safetensors b/out_tensor/model.layers.41.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6f45409e90aa76750abeb35cfa06dc46e3a000be
--- /dev/null
+++ b/out_tensor/model.layers.41.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd2ad654a91b9e6f9a4c766777c3f124fcd4689df30e7e63afd0d663ac65bb81
+size 12862760
diff --git a/out_tensor/model.layers.41.self_attn.v_proj.safetensors b/out_tensor/model.layers.41.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f1739ca5ce0e81a22d6383abc4963abcfff41c6c
--- /dev/null
+++ b/out_tensor/model.layers.41.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ac3472aab796f132af748eb6cc6e463a6aa536b1e4d28238e113eaa9627b8cf
+size 4277536
diff --git a/out_tensor/model.layers.42.mlp.down_proj.safetensors b/out_tensor/model.layers.42.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fbef98f0ad6bc760e1ff11b1f45ad535a8912832
--- /dev/null
+++ b/out_tensor/model.layers.42.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d89855f9e452af379a9e1f172003c2f407d876885733b43e235dc39e0e03b060
+size 59008192
diff --git a/out_tensor/model.layers.42.mlp.gate_proj.safetensors b/out_tensor/model.layers.42.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3474980cc19c6bf91ddb0c9274d71bf77d3ec0b0
--- /dev/null
+++ b/out_tensor/model.layers.42.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97267df460d2a5f213ba49c1f49d885df258e68549556c0bf0552c85f157e36f
+size 58966744
diff --git a/out_tensor/model.layers.42.mlp.up_proj.safetensors b/out_tensor/model.layers.42.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6ec3d29606baf46df931d69d562195334d1fcc52
--- /dev/null
+++ b/out_tensor/model.layers.42.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6485caac2c2892ab2180b6a860c8aaeed080674de7f32602256b918a9db3173b
+size 58966736
diff --git a/out_tensor/model.layers.42.self_attn.k_proj.safetensors b/out_tensor/model.layers.42.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..65d880a3a98439b95785fc496d5362c13e528724
--- /dev/null
+++ b/out_tensor/model.layers.42.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0db61cb861fd57e99bf0481bb06065ac8abff0a345507acd4f7ecdbd39079c77
+size 4227808
diff --git a/out_tensor/model.layers.42.self_attn.o_proj.safetensors b/out_tensor/model.layers.42.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7f85451245b917f924b6e4cd8e9db66e84938903
--- /dev/null
+++ b/out_tensor/model.layers.42.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd617398ac22423f6aa98f21481fa8555b6e546305b3692247815c712b3044ff
+size 16859880
diff --git a/out_tensor/model.layers.42.self_attn.q_proj.safetensors b/out_tensor/model.layers.42.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..49f1ac70e2808aa1e3f507c0ab29d63f9adc4c5f
--- /dev/null
+++ b/out_tensor/model.layers.42.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa2bd65b76505ba44c230be175bdd604402121470eac75012382e70cc5bdc8b5
+size 16859880
diff --git a/out_tensor/model.layers.42.self_attn.v_proj.safetensors b/out_tensor/model.layers.42.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ae66b74e9a737dc425ab83581c2e0119520b5819
--- /dev/null
+++ b/out_tensor/model.layers.42.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2802e12265095012bd6dec9afbfbb8b2db6cee4ab2c2c4d722a47b2552e52e76
+size 4227808
diff --git a/out_tensor/model.layers.43.mlp.down_proj.safetensors b/out_tensor/model.layers.43.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0804ac675c0db4897b5602c0676078eca7daf599
--- /dev/null
+++ b/out_tensor/model.layers.43.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6c12f5a8c5d6a6e7163ac58ac848d5e05e70420d31484841e0ed0dd7e9ddf26
+size 59008192
diff --git a/out_tensor/model.layers.43.mlp.gate_proj.safetensors b/out_tensor/model.layers.43.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fa326ff7c641b0ad32773b849e9f60b16b132fb6
--- /dev/null
+++ b/out_tensor/model.layers.43.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecd0ea386a61c0bf0f18f4582dfaf653dd57f8ba2895955c662fd6b168afe212
+size 58966744
diff --git a/out_tensor/model.layers.43.mlp.up_proj.safetensors b/out_tensor/model.layers.43.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6fab3d780252611716d264568ca8c67e7838552e
--- /dev/null
+++ b/out_tensor/model.layers.43.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef482e14a151b42f37f688d5f4a2548f74bb621c022aaec73f122a5e6447a0c1
+size 58966736
diff --git a/out_tensor/model.layers.43.self_attn.k_proj.safetensors b/out_tensor/model.layers.43.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9d9053eca206190dd70164f40bfea3fd1fb3bab4
--- /dev/null
+++ b/out_tensor/model.layers.43.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a6304c06b6c9c1404e2cc999e4090c4d8575f7d717203570e95bdb862c4ef02
+size 4227808
diff --git a/out_tensor/model.layers.43.self_attn.o_proj.safetensors b/out_tensor/model.layers.43.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..67f3eac097c789ce77fae01b7a299087563c8fe3
--- /dev/null
+++ b/out_tensor/model.layers.43.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cefe9ada31f3514be1e2451d05989a60b919ca9f1b2756f9db50bfbabbf7841d
+size 16859880
diff --git a/out_tensor/model.layers.43.self_attn.q_proj.safetensors b/out_tensor/model.layers.43.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4c0831f8f02aa39f1fd20a8129fcfedad82c5e2f
--- /dev/null
+++ b/out_tensor/model.layers.43.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92877deeff1fd63637b06b39ca2312137c7825c1102dbc55cc4c4b6d980773e2
+size 16859880
diff --git a/out_tensor/model.layers.43.self_attn.v_proj.safetensors b/out_tensor/model.layers.43.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..653347b0ea46b51070323e35817b82b7140b1d5e
--- /dev/null
+++ b/out_tensor/model.layers.43.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed1dbf11417a972c1467f40b5c4cc7f40a6ab64e3c6b68379790747049f9941f
+size 4227808
diff --git a/out_tensor/model.layers.44.mlp.down_proj.safetensors b/out_tensor/model.layers.44.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b84a7b8658ba4cd6501667e8f6b555e282b36cab
--- /dev/null
+++ b/out_tensor/model.layers.44.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3973bdf1263b834c704b77bd3c6e7d2871809890260c991915faf88814fff03c
+size 59008192
diff --git a/out_tensor/model.layers.44.mlp.gate_proj.safetensors b/out_tensor/model.layers.44.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b84e70aca2e1bca9c780e756cf94e8533eda68e9
--- /dev/null
+++ b/out_tensor/model.layers.44.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0325e6e16950216dc9c8d81f0e0478d637e96d1e37b11fa5b8bc664b0dbd286e
+size 58966744
diff --git a/out_tensor/model.layers.44.mlp.up_proj.safetensors b/out_tensor/model.layers.44.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..29121fbd0cc1fdf81a0944a775c8f1a533af7370
--- /dev/null
+++ b/out_tensor/model.layers.44.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8de9ff3a08456f86c1b9edcd5fa2eb7591676423cf0adf28a2fba4b6277e321d
+size 58966736
diff --git a/out_tensor/model.layers.44.self_attn.k_proj.safetensors b/out_tensor/model.layers.44.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6e0e2a659320aae4acf5663290d4e1618ccdb744
--- /dev/null
+++ b/out_tensor/model.layers.44.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:544e84f9fb7e705aa16e75e2ae6e03129802c3f85d56b7a35dd85432756263e9
+size 4227808
diff --git a/out_tensor/model.layers.44.self_attn.o_proj.safetensors b/out_tensor/model.layers.44.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8dcc1de8f265f3d8a07de11fe6604ef8c924c318
--- /dev/null
+++ b/out_tensor/model.layers.44.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3624852a4a6beaee2e00643ca4527b2d0dbb711b9e1fc028d72470c1b1e7f8ef
+size 16859880
diff --git a/out_tensor/model.layers.44.self_attn.q_proj.safetensors b/out_tensor/model.layers.44.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fa15b118af9bd2512bc4210e3471d4680d397a61
--- /dev/null
+++ b/out_tensor/model.layers.44.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b366c7d803bd0ad8b47a179188a3493c0dc03a30d633478b0d9d9fa75acbb5b0
+size 16859880
diff --git a/out_tensor/model.layers.44.self_attn.v_proj.safetensors b/out_tensor/model.layers.44.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..675aed10ebb40e3cb6a920224a53701558d6ae2f
--- /dev/null
+++ b/out_tensor/model.layers.44.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81753ffd31c6d95fd9ace15131456dbbbde9deca1c6b57c25e0d04c095ed220b
+size 4227808
diff --git a/out_tensor/model.layers.45.mlp.down_proj.safetensors b/out_tensor/model.layers.45.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a62b39b80c16519af67666152f984addf5601cce
--- /dev/null
+++ b/out_tensor/model.layers.45.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48b6f605a1b36a4b03a6a3658d044276c1e0c44e427ff72a9ea9baf6c872d03d
+size 59008192
diff --git a/out_tensor/model.layers.45.mlp.gate_proj.safetensors b/out_tensor/model.layers.45.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e9c486f627e221b10dfe73ee7372aea15844879f
--- /dev/null
+++ b/out_tensor/model.layers.45.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19dd56e0218441a2ab3df6d6f5def75d0d0d8a156fc0b5458aefd97f746dfed9
+size 58966744
diff --git a/out_tensor/model.layers.45.mlp.up_proj.safetensors b/out_tensor/model.layers.45.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..605b74fc4fb50dd4895e36b3f98019062b54d9d8
--- /dev/null
+++ b/out_tensor/model.layers.45.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:637e728e42bec2887b3361adb492cb863bd03c229dbc2a672fd542995b9e90cd
+size 58966736
diff --git a/out_tensor/model.layers.45.self_attn.k_proj.safetensors b/out_tensor/model.layers.45.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cdab964349fff648c9ea32e94d8ee2523d1a1dc1
--- /dev/null
+++ b/out_tensor/model.layers.45.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b5f5bd41312060d97d25b8b5b959f07c1cd2a811d58914e4c1629bb5acba618
+size 4227808
diff --git a/out_tensor/model.layers.45.self_attn.o_proj.safetensors b/out_tensor/model.layers.45.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..40a66af9547d73cfab0b408318a4fb47ff229ae6
--- /dev/null
+++ b/out_tensor/model.layers.45.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a36ebfb70cf992f20951d3e5aa5d5a7516d797cbfd16b69009a869db84f70c0
+size 16859880
diff --git a/out_tensor/model.layers.45.self_attn.q_proj.safetensors b/out_tensor/model.layers.45.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..46d6c67edb9cb83dd27fa857977ca64c42455621
--- /dev/null
+++ b/out_tensor/model.layers.45.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54c8cc40fa5cb36a7a3f6f60ba215e3817838b46968620b2de732a44c112a03f
+size 16859880
diff --git a/out_tensor/model.layers.45.self_attn.v_proj.safetensors b/out_tensor/model.layers.45.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c2d29a308c75ee824760524bb11889631725ca9c
--- /dev/null
+++ b/out_tensor/model.layers.45.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b40a943ae4c9d6e0b0b209875d6a655f92029128ff76be0893401638721dcfe7
+size 4227808
diff --git a/out_tensor/model.layers.46.mlp.down_proj.safetensors b/out_tensor/model.layers.46.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6bbb849dc6a6a61f01aa3892937c9149457325de
--- /dev/null
+++ b/out_tensor/model.layers.46.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:636cc5e2f70dc6eebbda9afefa03e1f2c1435e0069ba0f30d065515b3d8db5c3
+size 59008192
diff --git a/out_tensor/model.layers.46.mlp.gate_proj.safetensors b/out_tensor/model.layers.46.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0a295d2ad3045c63cf1e97cdf2ffafe34c268448
--- /dev/null
+++ b/out_tensor/model.layers.46.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abc32a6eb3fc94d582209958df937625c7689070c9c32c56a3171d4bfe792f8e
+size 58966744
diff --git a/out_tensor/model.layers.46.mlp.up_proj.safetensors b/out_tensor/model.layers.46.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4d90ef9bc5a37361daf6d5948f10f0ec1c0c92e3
--- /dev/null
+++ b/out_tensor/model.layers.46.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a6675568337d2ac82bfba7e8b5a4d565a83a3f3b3c3a13e912932a9b92b65ac
+size 58966736
diff --git a/out_tensor/model.layers.46.self_attn.k_proj.safetensors b/out_tensor/model.layers.46.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0c061d9f037eb5bbc74f1021f7a6853c9c90101f
--- /dev/null
+++ b/out_tensor/model.layers.46.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c26de3b2f25cea14cb056e922c7fa67c0e15e669cf2b2ce642d03bb44d320e56
+size 4227808
diff --git a/out_tensor/model.layers.46.self_attn.o_proj.safetensors b/out_tensor/model.layers.46.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a06537584c353bd9791a938707b5b8440ccfa00a
--- /dev/null
+++ b/out_tensor/model.layers.46.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a60cced404fb4388690b3e3537fc284f885c61eea9fe573c490c9a4a976b622
+size 16859880
diff --git a/out_tensor/model.layers.46.self_attn.q_proj.safetensors b/out_tensor/model.layers.46.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8265a15769dd7889f789a9bf5f09f1bcec1d6ab3
--- /dev/null
+++ b/out_tensor/model.layers.46.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0af08a7dc16cf0b7f5a0a726e0bd47a139ba2d8cf37728f64f505aba99cde01e
+size 16859880
diff --git a/out_tensor/model.layers.46.self_attn.v_proj.safetensors b/out_tensor/model.layers.46.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f8e4fe024e8c3f5173fa0ae2fb09f39db26ab451
--- /dev/null
+++ b/out_tensor/model.layers.46.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e411ce135853a78198eac6f0771bccf62026e6f5992d8f52a4620c6d9799ea1b
+size 4227808
diff --git a/out_tensor/model.layers.47.mlp.down_proj.safetensors b/out_tensor/model.layers.47.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2cb325551e6f2d88cfd0b435bea09cae269b2bf4
--- /dev/null
+++ b/out_tensor/model.layers.47.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bee50b2d955f770fd4ded1b4fa640f8ac7c9647f95305cfe002bf97a96ca8fa9
+size 59008192
diff --git a/out_tensor/model.layers.47.mlp.gate_proj.safetensors b/out_tensor/model.layers.47.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d34af6243c5020b5e69ddb0d9d93da16eb0f538a
--- /dev/null
+++ b/out_tensor/model.layers.47.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f13541fa573ce733dca2846268dd7741ffdfb19faab7a8e8cbd4f129b4922126
+size 58966744
diff --git a/out_tensor/model.layers.47.mlp.up_proj.safetensors b/out_tensor/model.layers.47.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..decec9c2a814b7c760621cb928e47f4f7f7a0b7b
--- /dev/null
+++ b/out_tensor/model.layers.47.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:def7614bc9c0d102b26b44ac4a6ee8001632e0faa7e646f0eddc39a3a70ee19d
+size 58966736
diff --git a/out_tensor/model.layers.47.self_attn.k_proj.safetensors b/out_tensor/model.layers.47.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f7ba68a06062e432c1af48e0ede2d92923d22ea7
--- /dev/null
+++ b/out_tensor/model.layers.47.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e605d6ab171a577de0a9e32639452d98dd3663dcef2e6d807a0101ce632d8c0c
+size 4227808
diff --git a/out_tensor/model.layers.47.self_attn.o_proj.safetensors b/out_tensor/model.layers.47.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..70b3535fa68d994db7afba323eda92eb9d51989c
--- /dev/null
+++ b/out_tensor/model.layers.47.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0255863140dca68872334a6c5b43aa6a669018427e63413ec6735678ecc643e
+size 16859880
diff --git a/out_tensor/model.layers.47.self_attn.q_proj.safetensors b/out_tensor/model.layers.47.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..60925108e4b693bdb653eb0bda3bc1e85985bfb3
--- /dev/null
+++ b/out_tensor/model.layers.47.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a00e467c3f5a4c6281347d75d71460a0e84b3ba6d9dc605972d6ae39056f22db
+size 16859880
diff --git a/out_tensor/model.layers.47.self_attn.v_proj.safetensors b/out_tensor/model.layers.47.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8562db09c6858b915d0bb7813299959c28738cad
--- /dev/null
+++ b/out_tensor/model.layers.47.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8be9fd350ec4cd7b2d3f93a997e1b76dcdba4b213e838b19432b179c9b98b64b
+size 4227808
diff --git a/out_tensor/model.layers.5.mlp.down_proj.safetensors b/out_tensor/model.layers.5.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e7a7881a1ff01b066e1b6334b9c659146b26f05a
--- /dev/null
+++ b/out_tensor/model.layers.5.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f7d7799f60a5d1d4e14d36ab75ebe7f500f0be5b3fafb25e7d936e39515ccdc
+size 59008184
diff --git a/out_tensor/model.layers.5.mlp.gate_proj.safetensors b/out_tensor/model.layers.5.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..36ca9d18bfa33237b85707b6d6613ef9f4508f2f
--- /dev/null
+++ b/out_tensor/model.layers.5.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28b3d9bf9ed2b99a6a7aa5d52fb38dc0151fd5d34315372a30f3a494fec29945
+size 58966744
diff --git a/out_tensor/model.layers.5.mlp.up_proj.safetensors b/out_tensor/model.layers.5.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e3d18369bcbbecf99547dd5e976362e69d70e460
--- /dev/null
+++ b/out_tensor/model.layers.5.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:379934285f8cce3dc667a105912cfbb210766fc02b0b37b310db4797c12fb6c5
+size 58966728
diff --git a/out_tensor/model.layers.5.self_attn.k_proj.safetensors b/out_tensor/model.layers.5.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c178b73f5ac4a613ef097eba1da705170e6d339e
--- /dev/null
+++ b/out_tensor/model.layers.5.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d84a75cb937e02af484bafc44058649ca1969028669d862090376dd2a51736db
+size 3228960
diff --git a/out_tensor/model.layers.5.self_attn.o_proj.safetensors b/out_tensor/model.layers.5.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..adce28ab16b3755cee3a52ef49cb909b73d93efe
--- /dev/null
+++ b/out_tensor/model.layers.5.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d49afb03b22f964d943cfeee19b23d4ab0db155a1a0678467ea834d50447e8b4
+size 12862760
diff --git a/out_tensor/model.layers.5.self_attn.q_proj.safetensors b/out_tensor/model.layers.5.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9cb86b8c6b69b735294da6e681c7931de6046f89
--- /dev/null
+++ b/out_tensor/model.layers.5.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf2a9f254d98caa6468ed82ccf3ca2afb5863160a3d20823eb725e1f519ec0ba
+size 12862760
diff --git a/out_tensor/model.layers.5.self_attn.v_proj.safetensors b/out_tensor/model.layers.5.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cdf2f4e6af75d7ac98f11dda1500c69904b6de3d
--- /dev/null
+++ b/out_tensor/model.layers.5.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11b0e9f9e2e0973d1e8bafcf06527024a957a2f30e07863676aa8d7d341d0f69
+size 4277536
diff --git a/out_tensor/model.layers.6.mlp.down_proj.safetensors b/out_tensor/model.layers.6.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f63c9e432a3e9dc81212cdaa3c94ceca061e0f03
--- /dev/null
+++ b/out_tensor/model.layers.6.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e6bcadb3f3743adb25fae7743df8a455ad90ab7e142b8ee0b2ac968444361f8
+size 59008184
diff --git a/out_tensor/model.layers.6.mlp.gate_proj.safetensors b/out_tensor/model.layers.6.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8653bd44fbe5991567d52a8f3b3e43201a0f0c59
--- /dev/null
+++ b/out_tensor/model.layers.6.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:607047f54ca0423232aba46a4b2f02f3aefe0e924d601930b9bec3d4f29e9d79
+size 58966744
diff --git a/out_tensor/model.layers.6.mlp.up_proj.safetensors b/out_tensor/model.layers.6.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1d156a5c74ac262ffdcb1c22e6a686a9f012cc83
--- /dev/null
+++ b/out_tensor/model.layers.6.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee6c875a9cf39a5ce78d9c7442e068d36db0f8c6e66d384efaedacca7bd636d3
+size 58966728
diff --git a/out_tensor/model.layers.6.self_attn.k_proj.safetensors b/out_tensor/model.layers.6.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b2073d68fa3e75d89215df94e0e6b0f0793da1e7
--- /dev/null
+++ b/out_tensor/model.layers.6.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:959ded211e216a2097ddb3861db158eeedfd27d8113b846469ae363ea5afe3a2
+size 4227800
diff --git a/out_tensor/model.layers.6.self_attn.o_proj.safetensors b/out_tensor/model.layers.6.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..29e38b7d02fd3944f0081b686faaef8d97c7c4c3
--- /dev/null
+++ b/out_tensor/model.layers.6.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c93b835aecc9c6485558bbad6ba6e7ac24d51e5763ef08f9da1e3521c7b2b58
+size 16859872
diff --git a/out_tensor/model.layers.6.self_attn.q_proj.safetensors b/out_tensor/model.layers.6.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c78ea6fcb0109c6a88abcbf591bbea2e1bc21039
--- /dev/null
+++ b/out_tensor/model.layers.6.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86ef28c9d4f8537622fe091d871e862e2fcbeaa4f232de1a46532b440cf87c0e
+size 16859872
diff --git a/out_tensor/model.layers.6.self_attn.v_proj.safetensors b/out_tensor/model.layers.6.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4b8c4487cfa30e2813255254137047adf633a07c
--- /dev/null
+++ b/out_tensor/model.layers.6.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5bf09e11cac70f869161c11e38200c33d3ad2c6b84c2217131bb44d882e785d4
+size 4227800
diff --git a/out_tensor/model.layers.7.mlp.down_proj.safetensors b/out_tensor/model.layers.7.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..405de0a4d89a93eb3b49ae3e3e45cc209572373c
--- /dev/null
+++ b/out_tensor/model.layers.7.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d146f138c5f86ee9ab4f1ad775d764ee36272806bfec7462cfbcb96c47da7843
+size 59008184
diff --git a/out_tensor/model.layers.7.mlp.gate_proj.safetensors b/out_tensor/model.layers.7.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..edcbe43285fad00ea457f0158b9dd6e0850100ff
--- /dev/null
+++ b/out_tensor/model.layers.7.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be5f3705e7204e8526776553ddcf7a8ae3ca8f5e8bfbe5b83078fa06ff2d0d7a
+size 58966744
diff --git a/out_tensor/model.layers.7.mlp.up_proj.safetensors b/out_tensor/model.layers.7.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..49383202fba4704d8948cf5361b679781da191f5
--- /dev/null
+++ b/out_tensor/model.layers.7.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e5124e4511379ee8fe2398c47cfd8a71cb79b67dfd98739dc0b4093ed1accba
+size 58966728
diff --git a/out_tensor/model.layers.7.self_attn.k_proj.safetensors b/out_tensor/model.layers.7.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..06fa80c241dd07374956186f7b0d477de0408e3c
--- /dev/null
+++ b/out_tensor/model.layers.7.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20e40607ceda2f742e26da9c9403a429e3d820fd2a009c64fb5790696b077acd
+size 4227800
diff --git a/out_tensor/model.layers.7.self_attn.o_proj.safetensors b/out_tensor/model.layers.7.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9aed9b4fe2a3406064fc9fee37593f231234bdca
--- /dev/null
+++ b/out_tensor/model.layers.7.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ee9031ba6e4bce412d4b3ae6ca879673de405d22ef25c18044b96ef0ff63171
+size 16859872
diff --git a/out_tensor/model.layers.7.self_attn.q_proj.safetensors b/out_tensor/model.layers.7.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c7d8bc35df44256ce5b18af860661f0e271a7b39
--- /dev/null
+++ b/out_tensor/model.layers.7.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:853219bba7fc1d0982c02c95fd598fbd1c24c3428850e0997c26d79adf89731b
+size 16859872
diff --git a/out_tensor/model.layers.7.self_attn.v_proj.safetensors b/out_tensor/model.layers.7.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..103807639a27e0b8f9b2594cc45ebfdbf8a20f81
--- /dev/null
+++ b/out_tensor/model.layers.7.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5629f44d57b4e496f88b65f0470669bbe9493e4107841ba06880b5544d57039
+size 4227800
diff --git a/out_tensor/model.layers.8.mlp.down_proj.safetensors b/out_tensor/model.layers.8.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9e068147147534287f5b137f150287f5799d484c
--- /dev/null
+++ b/out_tensor/model.layers.8.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72b76488a22c3da2e9376267ad6cc1601578b974fb4e45a7c9394f309ad9835c
+size 59008184
diff --git a/out_tensor/model.layers.8.mlp.gate_proj.safetensors b/out_tensor/model.layers.8.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..31dea09d6447c26ba55ef167b4382f7c5839719a
--- /dev/null
+++ b/out_tensor/model.layers.8.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d638619dd87f3eba142719f946f193d3532d2737950af4330a45ba1f0c0aebfd
+size 58966744
diff --git a/out_tensor/model.layers.8.mlp.up_proj.safetensors b/out_tensor/model.layers.8.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0bfd0a1aab030a2e5098d104d2a94c2d4fbc41e4
--- /dev/null
+++ b/out_tensor/model.layers.8.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7cfba7dcbf0d4521b8532bb70e0481c6d15b6a0d8bf7b055b60b7333a4cbcb6f
+size 58966728
diff --git a/out_tensor/model.layers.8.self_attn.k_proj.safetensors b/out_tensor/model.layers.8.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3f557bcac7a2a972058a33466e625c4546e7fe60
--- /dev/null
+++ b/out_tensor/model.layers.8.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f082bb7e4a265fede944069d219e4b4938e673064da5c834a81b971ff00127c
+size 4227800
diff --git a/out_tensor/model.layers.8.self_attn.o_proj.safetensors b/out_tensor/model.layers.8.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ec3346cac5b65151a3de46cc526dfca1533a9f77
--- /dev/null
+++ b/out_tensor/model.layers.8.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f91bda0d85cdf1b0bcdf928c2831b3a1e033555d6d74988b3a50b3755a9b386
+size 16859872
diff --git a/out_tensor/model.layers.8.self_attn.q_proj.safetensors b/out_tensor/model.layers.8.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dbba1487763c6ea167f382b16c203722fb26eaaa
--- /dev/null
+++ b/out_tensor/model.layers.8.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22348fcfbd17f966eaca69eafce8f82e6610de66cb945e7f7f95d19a1160dcce
+size 16859872
diff --git a/out_tensor/model.layers.8.self_attn.v_proj.safetensors b/out_tensor/model.layers.8.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..57a21c509cd168101c067495a81d7e1b56dabe53
--- /dev/null
+++ b/out_tensor/model.layers.8.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10b431ef84c496f02074941b9007e195b7a46e2e2d1422c503034ad2576ba9ff
+size 4227800
diff --git a/out_tensor/model.layers.9.mlp.down_proj.safetensors b/out_tensor/model.layers.9.mlp.down_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4d319699455be65304afd87c228499c81ff5cd2b
--- /dev/null
+++ b/out_tensor/model.layers.9.mlp.down_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6befcf29c5e397f5f9f0b7528457e8e007f23c735b5b63df36cc8effdddd5429
+size 59008184
diff --git a/out_tensor/model.layers.9.mlp.gate_proj.safetensors b/out_tensor/model.layers.9.mlp.gate_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..12f033a920d2feb73b58790f4ac29bc9b2af08ec
--- /dev/null
+++ b/out_tensor/model.layers.9.mlp.gate_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b81963a91c39b1e324163f965cf16027c59171b85b6d5b8373b1121e990bfc84
+size 58966744
diff --git a/out_tensor/model.layers.9.mlp.up_proj.safetensors b/out_tensor/model.layers.9.mlp.up_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..15b484e22e580628125f909d5e400cfe15c69f05
--- /dev/null
+++ b/out_tensor/model.layers.9.mlp.up_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7045ae12f004c75d94307daec293414b2c5986284264e5e4af9ef8c40aded8c
+size 58966728
diff --git a/out_tensor/model.layers.9.self_attn.k_proj.safetensors b/out_tensor/model.layers.9.self_attn.k_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ff4102bbbce26c0b52f850ac85747b067ab6ad09
--- /dev/null
+++ b/out_tensor/model.layers.9.self_attn.k_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e50d6c775be53b94e59950a6785ea530242ee2b24498cb97c9a36a499aefebe
+size 4227800
diff --git a/out_tensor/model.layers.9.self_attn.o_proj.safetensors b/out_tensor/model.layers.9.self_attn.o_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..aa9b6c7d80c04bfa550c5bfe133df052cefedfb0
--- /dev/null
+++ b/out_tensor/model.layers.9.self_attn.o_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14846994208af1b2205394ba1b3be42aa7555a79d5bff8c61eb673fd3bf05449
+size 16859872
diff --git a/out_tensor/model.layers.9.self_attn.q_proj.safetensors b/out_tensor/model.layers.9.self_attn.q_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c5338d19973bf52e228376ca22d4e0a20456fd84
--- /dev/null
+++ b/out_tensor/model.layers.9.self_attn.q_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eafdec88b58cbeaf77ffd94bc79fedc5bc9609527123030491b22aa6d369263c
+size 16859872
diff --git a/out_tensor/model.layers.9.self_attn.v_proj.safetensors b/out_tensor/model.layers.9.self_attn.v_proj.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f88c5a5f4b6615d23785f5be143e7941283a4292
--- /dev/null
+++ b/out_tensor/model.layers.9.self_attn.v_proj.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56a49bd71645f5d1070d606bc1be7ebe28f25196fdf52ef83826a87d161915c2
+size 4227800
diff --git a/output-00001-of-00002.safetensors b/output-00001-of-00002.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3a522d7e0a28b767e84e6588ab3b4aa53238ab97
--- /dev/null
+++ b/output-00001-of-00002.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4dd25a9712adbf4f54366416a8452b15651fb07036a4a9e907d837065f05d4b
+size 8583602648
diff --git a/output-00002-of-00002.safetensors b/output-00002-of-00002.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d212b32288c86a3a700c8514d38005e01d58dd13
--- /dev/null
+++ b/output-00002-of-00002.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86e680422b8eda7e1fc4025fbc1278f3edc1e61db6afb64cc7b83b5b4be4e62f
+size 2175719824
diff --git a/rubra-11b-h.png b/rubra-11b-h.png
new file mode 100644
index 0000000000000000000000000000000000000000..b3d1fae01bcb58bd6515628004b9ee6079f5533a
--- /dev/null
+++ b/rubra-11b-h.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e273bdc068c7f7065c1c5eba64e19ac2b4da620a59a966265026713902ca905
+size 1735652
diff --git a/rubra-11b-h/README.md b/rubra-11b-h/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7981f5287e8b27ca8e5ddaec72943c2bf75143d1
--- /dev/null
+++ b/rubra-11b-h/README.md
@@ -0,0 +1,162 @@
+---
+license: apache-2.0
+base_model: mistralai/Mistral-7B-Instruct-v0.2
+tags:
+- freeze
+- generated_from_trainer
+model-index:
+- name: rubra-11b-h
+ results: []
+---
+
+
+
+# rubra-11b-h
+
+![image/png](./rubra-11b-h.png)
+
+## Model description
+This is a mistral based model trained on a lot of code and conversational data. The goal of this model was to retain the abilities of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) while making it better at reasoning.
+
+* 32k context window
+* Rope-theta = 1e6
+
+Fine tuning instruct models has proven to be a challenge when the goal is to create a model that is not meant for a particular task. Specifically, fine tuning an instruct model often leads to destructive forgetting in the resulting model. This is why most fine tunes select a base (non-instruct) language model. The rubra-v0.1 series models aim to add desired traits like multi-turn chat capabilites while retaining knowledge found in instruct models
+
+## Chat format
+
+Your prompt should be surrounded by `[INST]` and `[/INST]` tokens. The very first instruction should begin with a begin of sentence id. The next instructions should not. The assistant generation will be ended by the end-of-sentence token id.
+
+E.g.
+```
+text = "[INST] What is your favourite condiment? [/INST]"
+"Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen! "
+"[INST] Do you have mayonnaise recipes? [/INST]"
+```
+
+This format is available as a [chat template](https://huggingface.co/docs/transformers/main/chat_templating) via the `apply_chat_template()` method:
+
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+device = "cuda" # the device to load the model onto
+
+model = AutoModelForCausalLM.from_pretrained("sanjay920/rubra-11b-h")
+tokenizer = AutoTokenizer.from_pretrained("sanjay920/rubra-11b-h")
+
+messages = [
+ {"role": "user", "content": "What is your favourite condiment?"},
+ {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+ {"role": "user", "content": "Do you have mayonnaise recipes?"}
+]
+
+encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
+
+model_inputs = encodeds.to(device)
+model.to(device)
+
+generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
+decoded = tokenizer.batch_decode(generated_ids)
+print(decoded[0])
+```
+
+### GGUF
+
+GGUF quants are available at [sanjay920/rubra-11b-h-GGUF](https://huggingface.co/sanjay920/rubra-11b-h-GGUF)
+
+Create a new discussion for other quant variant requests!
+
+#### LM Studio
+ Set "Chat Preset" to `Mistral Instruct`
+
+## Training procedure
+* Block expansion and further training
+
+### Training hyperparameters
+
+The following hyperparameters were used during training:
+- learning_rate: 3e-05
+- train_batch_size: 1
+- eval_batch_size: 1
+- seed: 42
+- gradient_accumulation_steps: 12
+- total_train_batch_size: 12
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- num_epochs: 3.0
+- mixed_precision_training: Native AMP
+
+### Training results
+```
+{
+ "epoch": 3.0,
+ "train_loss": 0.5844329195557438,
+ "train_runtime": 307371.8718,
+ "train_samples_per_second": 0.246,
+ "train_steps_per_second": 0.021
+}
+```
+
+
+### Framework versions
+
+- Transformers 4.38.2
+- Pytorch 2.2.1+cu121
+- Datasets 2.18.0
+- Tokenizers 0.15.2
+
+## Evaluation Results
+
+```
+########## Average ##########
+ score
+model
+gpt-4-0613 9.020312
+gpt-3.5-turbo-0125 7.970219
+rubra-11b-h 7.800000
+Mistral-7B-Instruct-v0.2 7.793750
+Mistral-7B-Instruct-v0.1 6.721875
+Llama-2-13b-chat 6.650000
+```
+
+![image/png](./mtbench-comparison.png)
+
+## Intended uses & limitations
+
+This model is still under development. It hasn't been aligned for safety, so it may produce biased generations. It appears to produce thoughtful answers compared to other small LLMs. See below:
+
+## Examples
+
+![image/png](./example1.png)
+![image/png](./example2.png)
+![image/png](./example3.png)
+
+### Needle in a haystack test:
+
+The input text can be found in [needle-in-a-haystack.txt](./needle-in-a-haystack.txt)
+
+* 14,334 input tokens
+* "Random number is 23." inserted into the middle of the long document text. Asked the LLM what the random number is.
+
+**rubra-11b-h**:
+```
+The random number is 23.
+```
+* Correct
+
+**mistral-7b-instruct-v0.2**:
+```
+The random number generated from this text is 23. However, it's important to note that the method used to generate this number from the text is not a secure or reliable one and should not be used for any purpose where security is a concern. This was just a fun exercise to extract a number from the text.
+
+As for the content of the text, it discusses the idea of creating a technology hub or "silicon valley" by attracting the right people - specifically, rich people and nerds. The author argues that these two groups are the limiting reagents in the reaction that produces startups, as they are the only ones present when startups get started. The text also touches on the idea that universities can act as magnets for these people and that personality and youth are important factors in creating a successful technology hub.
+```
+* Correct
+
+**hermes-2-pro**:
+```
+ the'1 Question1 question2 ..... in3d}l kennis4ily}2-^{ a}3. a} toion} people a a}5- a Question1}y}6. a}1}l,l},7. l Fuß in the
+ a a'2 a}4 ál a Question3o kennis in if to more a or a'5 1}1syy'6 Question9 Question1'2 Questionsy biologie1yy-3'1 Question. a* the}y Question a4,ll Question1 Question64}2}1}l
+ if a Questionia kennisa ah}2a égal
+```
+* Incorrect. Unusable output
\ No newline at end of file
diff --git a/rubra-11b-h/config.json b/rubra-11b-h/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..22c8db31fd74541581d5b0dc2cbc7c402724e0ba
--- /dev/null
+++ b/rubra-11b-h/config.json
@@ -0,0 +1,26 @@
+{
+ "_name_or_path": "sanjay920/rubra-11b-h",
+ "architectures": [
+ "MistralForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 32768,
+ "model_type": "mistral",
+ "num_attention_heads": 32,
+ "num_hidden_layers": 48,
+ "num_key_value_heads": 8,
+ "rms_norm_eps": 1e-05,
+ "rope_theta": 1000000.0,
+ "sliding_window": null,
+ "tie_word_embeddings": false,
+ "torch_dtype": "float16",
+ "transformers_version": "4.38.2",
+ "use_cache": false,
+ "vocab_size": 32000
+}
diff --git a/rubra-11b-h/example1.png b/rubra-11b-h/example1.png
new file mode 100644
index 0000000000000000000000000000000000000000..e9f6f26a3ab7d0de6534adc2192454b2b1f92ecd
Binary files /dev/null and b/rubra-11b-h/example1.png differ
diff --git a/rubra-11b-h/example2.png b/rubra-11b-h/example2.png
new file mode 100644
index 0000000000000000000000000000000000000000..1e0041d83e7c53413c66988c9bd3b5b5fa280b2f
Binary files /dev/null and b/rubra-11b-h/example2.png differ
diff --git a/rubra-11b-h/example3.png b/rubra-11b-h/example3.png
new file mode 100644
index 0000000000000000000000000000000000000000..1ab190c3029d443abb020d670a5a7a44abe03f23
Binary files /dev/null and b/rubra-11b-h/example3.png differ
diff --git a/rubra-11b-h/generation_config.json b/rubra-11b-h/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..81e4802e5819d7759c46acbe055c75e4b6d092c5
--- /dev/null
+++ b/rubra-11b-h/generation_config.json
@@ -0,0 +1,7 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "transformers_version": "4.38.2",
+ "use_cache": false
+}
diff --git a/rubra-11b-h/model.safetensors.index.json b/rubra-11b-h/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..fd50792103e113781267be8add7e594d9a147499
--- /dev/null
+++ b/rubra-11b-h/model.safetensors.index.json
@@ -0,0 +1,442 @@
+{
+ "metadata": {
+ "total_size": 24952840192
+ },
+ "weight_map": {
+ "lm_head.weight": "model-00006-of-00006.safetensors",
+ "model.embed_tokens.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.19.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.19.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.19.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.20.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.29.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.29.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.29.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.29.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.29.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.30.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.30.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.31.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.32.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.33.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.34.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.35.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.36.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.input_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.37.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.38.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.38.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.38.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.38.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.38.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.38.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.38.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.38.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.38.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
+ "model.layers.39.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.39.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.40.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.40.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.41.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.42.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.43.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.44.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.45.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.46.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.input_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.47.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
+ "model.norm.weight": "model-00005-of-00006.safetensors"
+ }
+}
diff --git a/rubra-11b-h/mtbench-comparison.png b/rubra-11b-h/mtbench-comparison.png
new file mode 100644
index 0000000000000000000000000000000000000000..87fb2c26715940a222e99a74c5562402dca6d40f
Binary files /dev/null and b/rubra-11b-h/mtbench-comparison.png differ
diff --git a/rubra-11b-h/needle-in-a-haystack.txt b/rubra-11b-h/needle-in-a-haystack.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43b6cd4182b1d1ffafd0cae6ea2a51af9579becf
--- /dev/null
+++ b/rubra-11b-h/needle-in-a-haystack.txt
@@ -0,0 +1,898 @@
+what is the random number?
+```
+May 2006(This essay is derived from a keynote at Xtech.)Could you reproduce Silicon Valley elsewhere, or is there something
+unique about it?It wouldn't be surprising if it were hard to reproduce in other
+countries, because you couldn't reproduce it in most of the US
+either. What does it take to make a silicon valley even here?What it takes is the right people. If you could get the right ten
+thousand people to move from Silicon Valley to Buffalo, Buffalo
+would become Silicon Valley.
+[1]That's a striking departure from the past. Up till a couple decades
+ago, geography was destiny for cities. All great cities were located
+on waterways, because cities made money by trade, and water was the
+only economical way to ship.Now you could make a great city anywhere, if you could get the right
+people to move there. So the question of how to make a silicon
+valley becomes: who are the right people, and how do you get them
+to move?Two TypesI think you only need two kinds of people to create a technology
+hub: rich people and nerds. They're the limiting reagents in the
+reaction that produces startups, because they're the only ones
+present when startups get started. Everyone else will move.Observation bears this out: within the US, towns have become startup
+hubs if and only if they have both rich people and nerds. Few
+startups happen in Miami, for example, because although it's full
+of rich people, it has few nerds. It's not the kind of place nerds
+like.Whereas Pittsburgh has the opposite problem: plenty of nerds, but
+no rich people. The top US Computer Science departments are said
+to be MIT, Stanford, Berkeley, and Carnegie-Mellon. MIT yielded
+Route 128. Stanford and Berkeley yielded Silicon Valley. But
+Carnegie-Mellon? The record skips at that point. Lower down the
+list, the University of Washington yielded a high-tech community
+in Seattle, and the University of Texas at Austin yielded one in
+Austin. But what happened in Pittsburgh? And in Ithaca, home of
+Cornell, which is also high on the list?I grew up in Pittsburgh and went to college at Cornell, so I can
+answer for both. The weather is terrible, particularly in winter,
+and there's no interesting old city to make up for it, as there is
+in Boston. Rich people don't want to live in Pittsburgh or Ithaca.
+So while there are plenty of hackers who could start startups,
+there's no one to invest in them.Not BureaucratsDo you really need the rich people? Wouldn't it work to have the
+government invest in the nerds? No, it would not. Startup investors
+are a distinct type of rich people. They tend to have a lot of
+experience themselves in the technology business. This (a) helps
+them pick the right startups, and (b) means they can supply advice
+and connections as well as money. And the fact that they have a
+personal stake in the outcome makes them really pay attention.Bureaucrats by their nature are the exact opposite sort of people
+from startup investors. The idea of them making startup investments
+is comic. It would be like mathematicians running Vogue-- or
+perhaps more accurately, Vogue editors running a math journal.
+[2]Though indeed, most things bureaucrats do, they do badly. We just
+don't notice usually, because they only have to compete against
+other bureaucrats. But as startup investors they'd have to compete
+against pros with a great deal more experience and motivation.Even corporations that have in-house VC groups generally forbid
+them to make their own investment decisions. Most are only allowed
+to invest in deals where some reputable private VC firm is willing
+to act as lead investor.Not BuildingsIf you go to see Silicon Valley, what you'll see are buildings.
+But it's the people that make it Silicon Valley, not the buildings.
+I read occasionally about attempts to set up "technology
+parks" in other places, as if the active ingredient of Silicon
+Valley were the office space. An article about Sophia Antipolis
+bragged that companies there included Cisco, Compaq, IBM, NCR, and
+Nortel. Don't the French realize these aren't startups?Building office buildings for technology companies won't get you a
+silicon valley, because the key stage in the life of a startup
+happens before they want that kind of space. The key stage is when
+they're three guys operating out of an apartment. Wherever the
+startup is when it gets funded, it will stay. The defining quality
+of Silicon Valley is not that Intel or Apple or Google have offices
+there, but that they were started there.So if you want to reproduce Silicon Valley, what you need to reproduce
+is those two or three founders sitting around a kitchen table
+deciding to start a company. And to reproduce that you need those
+people.UniversitiesThe exciting thing is, all you need are the people. If you could
+attract a critical mass of nerds and investors to live somewhere,
+you could reproduce Silicon Valley. And both groups are highly
+mobile. They'll go where life is good. So what makes a place good
+to them?What nerds like is other nerds. Smart people will go wherever other
+smart people are. And in particular, to great universities. In
+theory there could be other ways to attract them, but so far
+universities seem to be indispensable. Within the US, there are
+no technology hubs without first-rate universities-- or at least,
+first-rate computer science departments.So if you want to make a silicon valley, you not only need a
+university, but one of the top handful in the world. It has to be
+good enough to act as a magnet, drawing the best people from thousands
+of miles away. And that means it has to stand up to existing magnets
+like MIT and Stanford.This sounds hard. Actually it might be easy. My professor friends,
+when they're deciding where they'd like to work, consider one thing
+above all: the quality of the other faculty. What attracts professors
+is good colleagues. So if you managed to recruit, en masse, a
+significant number of the best young researchers, you could create
+a first-rate university from nothing overnight. And you could do
+that for surprisingly little. If you paid 200 people hiring bonuses
+of $3 million apiece, you could put together a faculty that would
+bear comparison with any in the world. And from that point the
+chain reaction would be self-sustaining. So whatever it costs to
+establish a mediocre university, for an additional half billion or
+so you could have a great one.
+Random number is 23.
+[3]PersonalityHowever, merely creating a new university would not be enough to
+start a silicon valley. The university is just the seed. It has
+to be planted in the right soil, or it won't germinate. Plant it
+in the wrong place, and you just create Carnegie-Mellon.To spawn startups, your university has to be in a town that has
+attractions other than the university. It has to be a place where
+investors want to live, and students want to stay after they graduate.The two like much the same things, because most startup investors
+are nerds themselves. So what do nerds look for in a town? Their
+tastes aren't completely different from other people's, because a
+lot of the towns they like most in the US are also big tourist
+destinations: San Francisco, Boston, Seattle. But their tastes
+can't be quite mainstream either, because they dislike other big
+tourist destinations, like New York, Los Angeles, and Las Vegas.There has been a lot written lately about the "creative class." The
+thesis seems to be that as wealth derives increasingly from ideas,
+cities will prosper only if they attract those who have them. That
+is certainly true; in fact it was the basis of Amsterdam's prosperity
+400 years ago.A lot of nerd tastes they share with the creative class in general.
+For example, they like well-preserved old neighborhoods instead of
+cookie-cutter suburbs, and locally-owned shops and restaurants
+instead of national chains. Like the rest of the creative class,
+they want to live somewhere with personality.What exactly is personality? I think it's the feeling that each
+building is the work of a distinct group of people. A town with
+personality is one that doesn't feel mass-produced. So if you want
+to make a startup hub-- or any town to attract the "creative class"--
+you probably have to ban large development projects.
+When a large tract has been developed by a single organization, you
+can always tell.
+[4]Most towns with personality are old, but they don't have to be.
+Old towns have two advantages: they're denser, because they were
+laid out before cars, and they're more varied, because they were
+built one building at a time. You could have both now. Just have
+building codes that ensure density, and ban large scale developments.A corollary is that you have to keep out the biggest developer of
+all: the government. A government that asks "How can we build a
+silicon valley?" has probably ensured failure by the way they framed
+the question. You don't build a silicon valley; you let one grow.NerdsIf you want to attract nerds, you need more than a town with
+personality. You need a town with the right personality. Nerds
+are a distinct subset of the creative class, with different tastes
+from the rest. You can see this most clearly in New York, which
+attracts a lot of creative people, but few nerds.
+[5]What nerds like is the kind of town where people walk around smiling.
+This excludes LA, where no one walks at all, and also New York,
+where people walk, but not smiling. When I was in grad school in
+Boston, a friend came to visit from New York. On the subway back
+from the airport she asked "Why is everyone smiling?" I looked and
+they weren't smiling. They just looked like they were compared to
+the facial expressions she was used to.If you've lived in New York, you know where these facial expressions
+come from. It's the kind of place where your mind may be excited,
+but your body knows it's having a bad time. People don't so much
+enjoy living there as endure it for the sake of the excitement.
+And if you like certain kinds of excitement, New York is incomparable.
+It's a hub of glamour, a magnet for all the shorter half-life
+isotopes of style and fame.Nerds don't care about glamour, so to them the appeal of New York
+is a mystery. People who like New York will pay a fortune for a
+small, dark, noisy apartment in order to live in a town where the
+cool people are really cool. A nerd looks at that deal and sees
+only: pay a fortune for a small, dark, noisy apartment.Nerds will pay a premium to live in a town where the smart people
+are really smart, but you don't have to pay as much for that. It's
+supply and demand: glamour is popular, so you have to pay a lot for
+it.Most nerds like quieter pleasures. They like cafes instead of
+clubs; used bookshops instead of fashionable clothing shops; hiking
+instead of dancing; sunlight instead of tall buildings. A nerd's
+idea of paradise is Berkeley or Boulder.YouthIt's the young nerds who start startups, so it's those specifically
+the city has to appeal to. The startup hubs in the US are all
+young-feeling towns. This doesn't mean they have to be new.
+Cambridge has the oldest town plan in America, but it feels young
+because it's full of students.What you can't have, if you want to create a silicon valley, is a
+large, existing population of stodgy people. It would be a waste
+of time to try to reverse the fortunes of a declining industrial town
+like Detroit or Philadelphia by trying to encourage startups. Those
+places have too much momentum in the wrong direction. You're better
+off starting with a blank slate in the form of a small town. Or
+better still, if there's a town young people already flock to, that
+one.The Bay Area was a magnet for the young and optimistic for decades
+before it was associated with technology. It was a place people
+went in search of something new. And so it became synonymous with
+California nuttiness. There's still a lot of that there. If you
+wanted to start a new fad-- a new way to focus one's "energy," for
+example, or a new category of things not to eat-- the Bay Area would
+be the place to do it. But a place that tolerates oddness in the
+search for the new is exactly what you want in a startup hub, because
+economically that's what startups are. Most good startup ideas
+seem a little crazy; if they were obviously good ideas, someone
+would have done them already.(How many people are going to want computers in their houses?
+What, another search engine?)That's the connection between technology and liberalism. Without
+exception the high-tech cities in the US are also the most liberal.
+But it's not because liberals are smarter that this is so. It's
+because liberal cities tolerate odd ideas, and smart people by
+definition have odd ideas.Conversely, a town that gets praised for being "solid" or representing
+"traditional values" may be a fine place to live, but it's never
+going to succeed as a startup hub. The 2004 presidential election,
+though a disaster in other respects, conveniently supplied us with
+a county-by-county
+map of such places.
+[6]To attract the young, a town must have an intact center. In most
+American cities the center has been abandoned, and the growth, if
+any, is in the suburbs. Most American cities have been turned
+inside out. But none of the startup hubs has: not San Francisco,
+or Boston, or Seattle. They all have intact centers.
+[7]
+My guess is that no city with a dead center could be turned into a
+startup hub. Young people don't want to live in the suburbs.Within the US, the two cities I think could most easily be turned
+into new silicon valleys are Boulder and Portland. Both have the
+kind of effervescent feel that attracts the young. They're each
+only a great university short of becoming a silicon valley, if they
+wanted to.TimeA great university near an attractive town. Is that all it takes?
+That was all it took to make the original Silicon Valley. Silicon
+Valley traces its origins to William Shockley, one of the inventors
+of the transistor. He did the research that won him the Nobel Prize
+at Bell Labs, but when he started his own company in 1956 he moved
+to Palo Alto to do it. At the time that was an odd thing to do.
+Why did he? Because he had grown up there and remembered how nice
+it was. Now Palo Alto is suburbia, but then it was a charming
+college town-- a charming college town with perfect weather and San
+Francisco only an hour away.The companies that rule Silicon Valley now are all descended in
+various ways from Shockley Semiconductor. Shockley was a difficult
+man, and in 1957 his top people-- "the traitorous eight"-- left to
+start a new company, Fairchild Semiconductor. Among them were
+Gordon Moore and Robert Noyce, who went on to found Intel, and
+Eugene Kleiner, who founded the VC firm Kleiner Perkins. Forty-two
+years later, Kleiner Perkins funded Google, and the partner responsible
+for the deal was John Doerr, who came to Silicon Valley in 1974 to
+work for Intel.So although a lot of the newest companies in Silicon Valley don't
+make anything out of silicon, there always seem to be multiple links
+back to Shockley. There's a lesson here: startups beget startups.
+People who work for startups start their own. People who get rich
+from startups fund new ones. I suspect this kind of organic growth
+is the only way to produce a startup hub, because it's the only way
+to grow the expertise you need.That has two important implications. The first is that you need
+time to grow a silicon valley. The university you could create in
+a couple years, but the startup community around it has to grow
+organically. The cycle time is limited by the time it takes a
+company to succeed, which probably averages about five years.The other implication of the organic growth hypothesis is that you
+can't be somewhat of a startup hub. You either have a self-sustaining
+chain reaction, or not. Observation confirms this too: cities
+either have a startup scene, or they don't. There is no middle
+ground. Chicago has the third largest metropolitan area in America.
+As source of startups it's negligible compared to Seattle, number 15.The good news is that the initial seed can be quite small. Shockley
+Semiconductor, though itself not very successful, was big enough.
+It brought a critical mass of experts in an important new technology
+together in a place they liked enough to stay.CompetingOf course, a would-be silicon valley faces an obstacle the original
+one didn't: it has to compete with Silicon Valley. Can that be
+done? Probably.One of Silicon Valley's biggest advantages is its venture capital
+firms. This was not a factor in Shockley's day, because VC funds
+didn't exist. In fact, Shockley Semiconductor and Fairchild
+Semiconductor were not startups at all in our sense. They were
+subsidiaries-- of Beckman Instruments and Fairchild Camera and
+Instrument respectively. Those companies were apparently willing
+to establish subsidiaries wherever the experts wanted to live.Venture investors, however, prefer to fund startups within an hour's
+drive. For one, they're more likely to notice startups nearby.
+But when they do notice startups in other towns they prefer them
+to move. They don't want to have to travel to attend board meetings,
+and in any case the odds of succeeding are higher in a startup hub.The centralizing effect of venture firms is a double one: they cause
+startups to form around them, and those draw in more startups through
+acquisitions. And although the first may be weakening because it's
+now so cheap to start some startups, the second seems as strong as ever.
+Three of the most admired
+"Web 2.0" companies were started outside the usual startup hubs,
+but two of them have already been reeled in through acquisitions.Such centralizing forces make it harder for new silicon valleys to
+get started. But by no means impossible. Ultimately power rests
+with the founders. A startup with the best people will beat one
+with funding from famous VCs, and a startup that was sufficiently
+successful would never have to move. So a town that
+could exert enough pull over the right people could resist and
+perhaps even surpass Silicon Valley.For all its power, Silicon Valley has a great weakness: the paradise
+Shockley found in 1956 is now one giant parking lot. San Francisco
+and Berkeley are great, but they're forty miles away. Silicon
+Valley proper is soul-crushing suburban sprawl. It
+has fabulous weather, which makes it significantly better than the
+soul-crushing sprawl of most other American cities. But a competitor
+that managed to avoid sprawl would have real leverage. All a city
+needs is to be the kind of place the next traitorous eight look at
+and say "I want to stay here," and that would be enough to get the
+chain reaction started.Notes[1]
+It's interesting to consider how low this number could be
+made. I suspect five hundred would be enough, even if they could
+bring no assets with them. Probably just thirty, if I could pick them,
+would be enough to turn Buffalo into a significant startup hub.[2]
+Bureaucrats manage to allocate research funding moderately
+well, but only because (like an in-house VC fund) they outsource
+most of the work of selection. A professor at a famous university
+who is highly regarded by his peers will get funding, pretty much
+regardless of the proposal. That wouldn't work for startups, whose
+founders aren't sponsored by organizations, and are often unknowns.[3]
+You'd have to do it all at once, or at least a whole department
+at a time, because people would be more likely to come if they
+knew their friends were. And you should probably start from scratch,
+rather than trying to upgrade an existing university, or much energy
+would be lost in friction.[4]
+Hypothesis: Any plan in which multiple independent buildings
+are gutted or demolished to be "redeveloped" as a single project
+is a net loss of personality for the city, with the exception of
+the conversion of buildings not previously public, like warehouses.[5]
+A few startups get started in New York, but less
+than a tenth as many per capita as in Boston, and mostly
+in less nerdy fields like finance and media.[6]
+Some blue counties are false positives (reflecting the
+remaining power of Democractic party machines), but there are no
+false negatives. You can safely write off all the red counties.[7]
+Some "urban renewal" experts took a shot at destroying Boston's
+in the 1960s, leaving the area around city hall a bleak wasteland,
+but most neighborhoods successfully resisted them.Thanks to Chris Anderson, Trevor Blackwell, Marc Hedlund,
+Jessica Livingston, Robert Morris, Greg Mcadoo, Fred Wilson,
+and Stephen Wolfram for
+reading drafts of this, and to Ed Dumbill for inviting me to speak.(The second part of this talk became Why Startups
+Condense in America.)
+May 2001(This article was written as a kind of business plan for a
+new language.
+So it is missing (because it takes for granted) the most important
+feature of a good programming language: very powerful abstractions.)A friend of mine once told an eminent operating systems
+expert that he wanted to design a really good
+programming language. The expert told him that it would be a
+waste of time, that programming languages don't become popular
+or unpopular based on their merits, and so no matter how
+good his language was, no one would use it. At least, that
+was what had happened to the language he had designed.What does make a language popular? Do popular
+languages deserve their popularity? Is it worth trying to
+define a good programming language? How would you do it?I think the answers to these questions can be found by looking
+at hackers, and learning what they want. Programming
+languages are for hackers, and a programming language
+is good as a programming language (rather than, say, an
+exercise in denotational semantics or compiler design)
+if and only if hackers like it.1 The Mechanics of PopularityIt's true, certainly, that most people don't choose programming
+languages simply based on their merits. Most programmers are told
+what language to use by someone else. And yet I think the effect
+of such external factors on the popularity of programming languages
+is not as great as it's sometimes thought to be. I think a bigger
+problem is that a hacker's idea of a good programming language is
+not the same as most language designers'.Between the two, the hacker's opinion is the one that matters.
+Programming languages are not theorems. They're tools, designed
+for people, and they have to be designed to suit human strengths
+and weaknesses as much as shoes have to be designed for human feet.
+If a shoe pinches when you put it on, it's a bad shoe, however
+elegant it may be as a piece of sculpture.It may be that the majority of programmers can't tell a good language
+from a bad one. But that's no different with any other tool. It
+doesn't mean that it's a waste of time to try designing a good
+language. Expert hackers
+can tell a good language when they see
+one, and they'll use it. Expert hackers are a tiny minority,
+admittedly, but that tiny minority write all the good software,
+and their influence is such that the rest of the programmers will
+tend to use whatever language they use. Often, indeed, it is not
+merely influence but command: often the expert hackers are the very
+people who, as their bosses or faculty advisors, tell the other
+programmers what language to use.The opinion of expert hackers is not the only force that determines
+the relative popularity of programming languages — legacy software
+(Cobol) and hype (Ada, Java) also play a role — but I think it is
+the most powerful force over the long term. Given an initial critical
+mass and enough time, a programming language probably becomes about
+as popular as it deserves to be. And popularity further separates
+good languages from bad ones, because feedback from real live users
+always leads to improvements. Look at how much any popular language
+has changed during its life. Perl and Fortran are extreme cases,
+but even Lisp has changed a lot. Lisp 1.5 didn't have macros, for
+example; these evolved later, after hackers at MIT had spent a
+couple years using Lisp to write real programs. [1]So whether or not a language has to be good to be popular, I think
+a language has to be popular to be good. And it has to stay popular
+to stay good. The state of the art in programming languages doesn't
+stand still. And yet the Lisps we have today are still pretty much
+what they had at MIT in the mid-1980s, because that's the last time
+Lisp had a sufficiently large and demanding user base.Of course, hackers have to know about a language before they can
+use it. How are they to hear? From other hackers. But there has to
+be some initial group of hackers using the language for others even
+to hear about it. I wonder how large this group has to be; how many
+users make a critical mass? Off the top of my head, I'd say twenty.
+If a language had twenty separate users, meaning twenty users who
+decided on their own to use it, I'd consider it to be real.Getting there can't be easy. I would not be surprised if it is
+harder to get from zero to twenty than from twenty to a thousand.
+The best way to get those initial twenty users is probably to use
+a trojan horse: to give people an application they want, which
+happens to be written in the new language.2 External FactorsLet's start by acknowledging one external factor that does affect
+the popularity of a programming language. To become popular, a
+programming language has to be the scripting language of a popular
+system. Fortran and Cobol were the scripting languages of early
+IBM mainframes. C was the scripting language of Unix, and so, later,
+was Perl. Tcl is the scripting language of Tk. Java and Javascript
+are intended to be the scripting languages of web browsers.Lisp is not a massively popular language because it is not the
+scripting language of a massively popular system. What popularity
+it retains dates back to the 1960s and 1970s, when it was the
+scripting language of MIT. A lot of the great programmers of the
+day were associated with MIT at some point. And in the early 1970s,
+before C, MIT's dialect of Lisp, called MacLisp, was one of the
+only programming languages a serious hacker would want to use.Today Lisp is the scripting language of two moderately popular
+systems, Emacs and Autocad, and for that reason I suspect that most
+of the Lisp programming done today is done in Emacs Lisp or AutoLisp.Programming languages don't exist in isolation. To hack is a
+transitive verb — hackers are usually hacking something — and in
+practice languages are judged relative to whatever they're used to
+hack. So if you want to design a popular language, you either have
+to supply more than a language, or you have to design your language
+to replace the scripting language of some existing system.Common Lisp is unpopular partly because it's an orphan. It did
+originally come with a system to hack: the Lisp Machine. But Lisp
+Machines (along with parallel computers) were steamrollered by the
+increasing power of general purpose processors in the 1980s. Common
+Lisp might have remained popular if it had been a good scripting
+language for Unix. It is, alas, an atrociously bad one.One way to describe this situation is to say that a language isn't
+judged on its own merits. Another view is that a programming language
+really isn't a programming language unless it's also the scripting
+language of something. This only seems unfair if it comes as a
+surprise. I think it's no more unfair than expecting a programming
+language to have, say, an implementation. It's just part of what
+a programming language is.A programming language does need a good implementation, of course,
+and this must be free. Companies will pay for software, but individual
+hackers won't, and it's the hackers you need to attract.A language also needs to have a book about it. The book should be
+thin, well-written, and full of good examples. K&R is the ideal
+here. At the moment I'd almost say that a language has to have a
+book published by O'Reilly. That's becoming the test of mattering
+to hackers.There should be online documentation as well. In fact, the book
+can start as online documentation. But I don't think that physical
+books are outmoded yet. Their format is convenient, and the de
+facto censorship imposed by publishers is a useful if imperfect
+filter. Bookstores are one of the most important places for learning
+about new languages.3 BrevityGiven that you can supply the three things any language needs — a
+free implementation, a book, and something to hack — how do you
+make a language that hackers will like?One thing hackers like is brevity. Hackers are lazy, in the same
+way that mathematicians and modernist architects are lazy: they
+hate anything extraneous. It would not be far from the truth to
+say that a hacker about to write a program decides what language
+to use, at least subconsciously, based on the total number of
+characters he'll have to type. If this isn't precisely how hackers
+think, a language designer would do well to act as if it were.It is a mistake to try to baby the user with long-winded expressions
+that are meant to resemble English. Cobol is notorious for this
+flaw. A hacker would consider being asked to writeadd x to y giving zinstead ofz = x+yas something between an insult to his intelligence and a sin against
+God.It has sometimes been said that Lisp should use first and rest
+instead of car and cdr, because it would make programs easier to
+read. Maybe for the first couple hours. But a hacker can learn
+quickly enough that car means the first element of a list and cdr
+means the rest. Using first and rest means 50% more typing. And
+they are also different lengths, meaning that the arguments won't
+line up when they're called, as car and cdr often are, in successive
+lines. I've found that it matters a lot how code lines up on the
+page. I can barely read Lisp code when it is set in a variable-width
+font, and friends say this is true for other languages too.Brevity is one place where strongly typed languages lose. All other
+things being equal, no one wants to begin a program with a bunch
+of declarations. Anything that can be implicit, should be.The individual tokens should be short as well. Perl and Common Lisp
+occupy opposite poles on this question. Perl programs can be almost
+cryptically dense, while the names of built-in Common Lisp operators
+are comically long. The designers of Common Lisp probably expected
+users to have text editors that would type these long names for
+them. But the cost of a long name is not just the cost of typing
+it. There is also the cost of reading it, and the cost of the space
+it takes up on your screen.4 HackabilityThere is one thing more important than brevity to a hacker: being
+able to do what you want. In the history of programming languages
+a surprising amount of effort has gone into preventing programmers
+from doing things considered to be improper. This is a dangerously
+presumptuous plan. How can the language designer know what the
+programmer is going to need to do? I think language designers would
+do better to consider their target user to be a genius who will
+need to do things they never anticipated, rather than a bumbler
+who needs to be protected from himself. The bumbler will shoot
+himself in the foot anyway. You may save him from referring to
+variables in another package, but you can't save him from writing
+a badly designed program to solve the wrong problem, and taking
+forever to do it.Good programmers often want to do dangerous and unsavory things.
+By unsavory I mean things that go behind whatever semantic facade
+the language is trying to present: getting hold of the internal
+representation of some high-level abstraction, for example. Hackers
+like to hack, and hacking means getting inside things and second
+guessing the original designer.Let yourself be second guessed. When you make any tool, people use
+it in ways you didn't intend, and this is especially true of a
+highly articulated tool like a programming language. Many a hacker
+will want to tweak your semantic model in a way that you never
+imagined. I say, let them; give the programmer access to as much
+internal stuff as you can without endangering runtime systems like
+the garbage collector.In Common Lisp I have often wanted to iterate through the fields
+of a struct — to comb out references to a deleted object, for example,
+or find fields that are uninitialized. I know the structs are just
+vectors underneath. And yet I can't write a general purpose function
+that I can call on any struct. I can only access the fields by
+name, because that's what a struct is supposed to mean.A hacker may only want to subvert the intended model of things once
+or twice in a big program. But what a difference it makes to be
+able to. And it may be more than a question of just solving a
+problem. There is a kind of pleasure here too. Hackers share the
+surgeon's secret pleasure in poking about in gross innards, the
+teenager's secret pleasure in popping zits. [2] For boys, at least,
+certain kinds of horrors are fascinating. Maxim magazine publishes
+an annual volume of photographs, containing a mix of pin-ups and
+grisly accidents. They know their audience.Historically, Lisp has been good at letting hackers have their way.
+The political correctness of Common Lisp is an aberration. Early
+Lisps let you get your hands on everything. A good deal of that
+spirit is, fortunately, preserved in macros. What a wonderful thing,
+to be able to make arbitrary transformations on the source code.Classic macros are a real hacker's tool — simple, powerful, and
+dangerous. It's so easy to understand what they do: you call a
+function on the macro's arguments, and whatever it returns gets
+inserted in place of the macro call. Hygienic macros embody the
+opposite principle. They try to protect you from understanding what
+they're doing. I have never heard hygienic macros explained in one
+sentence. And they are a classic example of the dangers of deciding
+what programmers are allowed to want. Hygienic macros are intended
+to protect me from variable capture, among other things, but variable
+capture is exactly what I want in some macros.A really good language should be both clean and dirty: cleanly
+designed, with a small core of well understood and highly orthogonal
+operators, but dirty in the sense that it lets hackers have their
+way with it. C is like this. So were the early Lisps. A real hacker's
+language will always have a slightly raffish character.A good programming language should have features that make the kind
+of people who use the phrase "software engineering" shake their
+heads disapprovingly. At the other end of the continuum are languages
+like Ada and Pascal, models of propriety that are good for teaching
+and not much else.5 Throwaway ProgramsTo be attractive to hackers, a language must be good for writing
+the kinds of programs they want to write. And that means, perhaps
+surprisingly, that it has to be good for writing throwaway programs.A throwaway program is a program you write quickly for some limited
+task: a program to automate some system administration task, or
+generate test data for a simulation, or convert data from one format
+to another. The surprising thing about throwaway programs is that,
+like the "temporary" buildings built at so many American universities
+during World War II, they often don't get thrown away. Many evolve
+into real programs, with real features and real users.I have a hunch that the best big programs begin life this way,
+rather than being designed big from the start, like the Hoover Dam.
+It's terrifying to build something big from scratch. When people
+take on a project that's too big, they become overwhelmed. The
+project either gets bogged down, or the result is sterile and
+wooden: a shopping mall rather than a real downtown, Brasilia rather
+than Rome, Ada rather than C.Another way to get a big program is to start with a throwaway
+program and keep improving it. This approach is less daunting, and
+the design of the program benefits from evolution. I think, if one
+looked, that this would turn out to be the way most big programs
+were developed. And those that did evolve this way are probably
+still written in whatever language they were first written in,
+because it's rare for a program to be ported, except for political
+reasons. And so, paradoxically, if you want to make a language that
+is used for big systems, you have to make it good for writing
+throwaway programs, because that's where big systems come from.Perl is a striking example of this idea. It was not only designed
+for writing throwaway programs, but was pretty much a throwaway
+program itself. Perl began life as a collection of utilities for
+generating reports, and only evolved into a programming language
+as the throwaway programs people wrote in it grew larger. It was
+not until Perl 5 (if then) that the language was suitable for
+writing serious programs, and yet it was already massively popular.What makes a language good for throwaway programs? To start with,
+it must be readily available. A throwaway program is something that
+you expect to write in an hour. So the language probably must
+already be installed on the computer you're using. It can't be
+something you have to install before you use it. It has to be there.
+C was there because it came with the operating system. Perl was
+there because it was originally a tool for system administrators,
+and yours had already installed it.Being available means more than being installed, though. An
+interactive language, with a command-line interface, is more
+available than one that you have to compile and run separately. A
+popular programming language should be interactive, and start up
+fast.Another thing you want in a throwaway program is brevity. Brevity
+is always attractive to hackers, and never more so than in a program
+they expect to turn out in an hour.6 LibrariesOf course the ultimate in brevity is to have the program already
+written for you, and merely to call it. And this brings us to what
+I think will be an increasingly important feature of programming
+languages: library functions. Perl wins because it has large
+libraries for manipulating strings. This class of library functions
+are especially important for throwaway programs, which are often
+originally written for converting or extracting data. Many Perl
+programs probably begin as just a couple library calls stuck
+together.I think a lot of the advances that happen in programming languages
+in the next fifty years will have to do with library functions. I
+think future programming languages will have libraries that are as
+carefully designed as the core language. Programming language design
+will not be about whether to make your language strongly or weakly
+typed, or object oriented, or functional, or whatever, but about
+how to design great libraries. The kind of language designers who
+like to think about how to design type systems may shudder at this.
+It's almost like writing applications! Too bad. Languages are for
+programmers, and libraries are what programmers need.It's hard to design good libraries. It's not simply a matter of
+writing a lot of code. Once the libraries get too big, it can
+sometimes take longer to find the function you need than to write
+the code yourself. Libraries need to be designed using a small set
+of orthogonal operators, just like the core language. It ought to
+be possible for the programmer to guess what library call will do
+what he needs.Libraries are one place Common Lisp falls short. There are only
+rudimentary libraries for manipulating strings, and almost none
+for talking to the operating system. For historical reasons, Common
+Lisp tries to pretend that the OS doesn't exist. And because you
+can't talk to the OS, you're unlikely to be able to write a serious
+program using only the built-in operators in Common Lisp. You have
+to use some implementation-specific hacks as well, and in practice
+these tend not to give you everything you want. Hackers would think
+a lot more highly of Lisp if Common Lisp had powerful string
+libraries and good OS support.7 SyntaxCould a language with Lisp's syntax, or more precisely, lack of
+syntax, ever become popular? I don't know the answer to this
+question. I do think that syntax is not the main reason Lisp isn't
+currently popular. Common Lisp has worse problems than unfamiliar
+syntax. I know several programmers who are comfortable with prefix
+syntax and yet use Perl by default, because it has powerful string
+libraries and can talk to the os.There are two possible problems with prefix notation: that it is
+unfamiliar to programmers, and that it is not dense enough. The
+conventional wisdom in the Lisp world is that the first problem is
+the real one. I'm not so sure. Yes, prefix notation makes ordinary
+programmers panic. But I don't think ordinary programmers' opinions
+matter. Languages become popular or unpopular based on what expert
+hackers think of them, and I think expert hackers might be able to
+deal with prefix notation. Perl syntax can be pretty incomprehensible,
+but that has not stood in the way of Perl's popularity. If anything
+it may have helped foster a Perl cult.A more serious problem is the diffuseness of prefix notation. For
+expert hackers, that really is a problem. No one wants to write
+(aref a x y) when they could write a[x,y].In this particular case there is a way to finesse our way out of
+the problem. If we treat data structures as if they were functions
+on indexes, we could write (a x y) instead, which is even shorter
+than the Perl form. Similar tricks may shorten other types of
+expressions.We can get rid of (or make optional) a lot of parentheses by making
+indentation significant. That's how programmers read code anyway:
+when indentation says one thing and delimiters say another, we go
+by the indentation. Treating indentation as significant would
+eliminate this common source of bugs as well as making programs
+shorter.Sometimes infix syntax is easier to read. This is especially true
+for math expressions. I've used Lisp my whole programming life and
+I still don't find prefix math expressions natural. And yet it is
+convenient, especially when you're generating code, to have operators
+that take any number of arguments. So if we do have infix syntax,
+it should probably be implemented as some kind of read-macro.I don't think we should be religiously opposed to introducing syntax
+into Lisp, as long as it translates in a well-understood way into
+underlying s-expressions. There is already a good deal of syntax
+in Lisp. It's not necessarily bad to introduce more, as long as no
+one is forced to use it. In Common Lisp, some delimiters are reserved
+for the language, suggesting that at least some of the designers
+intended to have more syntax in the future.One of the most egregiously unlispy pieces of syntax in Common Lisp
+occurs in format strings; format is a language in its own right,
+and that language is not Lisp. If there were a plan for introducing
+more syntax into Lisp, format specifiers might be able to be included
+in it. It would be a good thing if macros could generate format
+specifiers the way they generate any other kind of code.An eminent Lisp hacker told me that his copy of CLTL falls open to
+the section format. Mine too. This probably indicates room for
+improvement. It may also mean that programs do a lot of I/O.8 EfficiencyA good language, as everyone knows, should generate fast code. But
+in practice I don't think fast code comes primarily from things
+you do in the design of the language. As Knuth pointed out long
+ago, speed only matters in certain critical bottlenecks. And as
+many programmers have observed since, one is very often mistaken
+about where these bottlenecks are.So, in practice, the way to get fast code is to have a very good
+profiler, rather than by, say, making the language strongly typed.
+You don't need to know the type of every argument in every call in
+the program. You do need to be able to declare the types of arguments
+in the bottlenecks. And even more, you need to be able to find out
+where the bottlenecks are.One complaint people have had with Lisp is that it's hard to tell
+what's expensive. This might be true. It might also be inevitable,
+if you want to have a very abstract language. And in any case I
+think good profiling would go a long way toward fixing the problem:
+you'd soon learn what was expensive.Part of the problem here is social. Language designers like to
+write fast compilers. That's how they measure their skill. They
+think of the profiler as an add-on, at best. But in practice a good
+profiler may do more to improve the speed of actual programs written
+in the language than a compiler that generates fast code. Here,
+again, language designers are somewhat out of touch with their
+users. They do a really good job of solving slightly the wrong
+problem.It might be a good idea to have an active profiler — to push
+performance data to the programmer instead of waiting for him to
+come asking for it. For example, the editor could display bottlenecks
+in red when the programmer edits the source code. Another approach
+would be to somehow represent what's happening in running programs.
+This would be an especially big win in server-based applications,
+where you have lots of running programs to look at. An active
+profiler could show graphically what's happening in memory as a
+program's running, or even make sounds that tell what's happening.Sound is a good cue to problems. In one place I worked, we had a
+big board of dials showing what was happening to our web servers.
+The hands were moved by little servomotors that made a slight noise
+when they turned. I couldn't see the board from my desk, but I
+found that I could tell immediately, by the sound, when there was
+a problem with a server.It might even be possible to write a profiler that would automatically
+detect inefficient algorithms. I would not be surprised if certain
+patterns of memory access turned out to be sure signs of bad
+algorithms. If there were a little guy running around inside the
+computer executing our programs, he would probably have as long
+and plaintive a tale to tell about his job as a federal government
+employee. I often have a feeling that I'm sending the processor on
+a lot of wild goose chases, but I've never had a good way to look
+at what it's doing.A number of Lisps now compile into byte code, which is then executed
+by an interpreter. This is usually done to make the implementation
+easier to port, but it could be a useful language feature. It might
+be a good idea to make the byte code an official part of the
+language, and to allow programmers to use inline byte code in
+bottlenecks. Then such optimizations would be portable too.The nature of speed, as perceived by the end-user, may be changing.
+With the rise of server-based applications, more and more programs
+may turn out to be i/o-bound. It will be worth making i/o fast.
+The language can help with straightforward measures like simple,
+fast, formatted output functions, and also with deep structural
+changes like caching and persistent objects.Users are interested in response time. But another kind of efficiency
+will be increasingly important: the number of simultaneous users
+you can support per processor. Many of the interesting applications
+written in the near future will be server-based, and the number of
+users per server is the critical question for anyone hosting such
+applications. In the capital cost of a business offering a server-based
+application, this is the divisor.For years, efficiency hasn't mattered much in most end-user
+applications. Developers have been able to assume that each user
+would have an increasingly powerful processor sitting on their
+desk. And by Parkinson's Law, software has expanded to use the
+resources available. That will change with server-based applications.
+In that world, the hardware and software will be supplied together.
+For companies that offer server-based applications, it will make
+a very big difference to the bottom line how many users they can
+support per server.In some applications, the processor will be the limiting factor,
+and execution speed will be the most important thing to optimize.
+But often memory will be the limit; the number of simultaneous
+users will be determined by the amount of memory you need for each
+user's data. The language can help here too. Good support for
+threads will enable all the users to share a single heap. It may
+also help to have persistent objects and/or language level support
+for lazy loading.9 TimeThe last ingredient a popular language needs is time. No one wants
+to write programs in a language that might go away, as so many
+programming languages do. So most hackers will tend to wait until
+a language has been around for a couple years before even considering
+using it.Inventors of wonderful new things are often surprised to discover
+this, but you need time to get any message through to people. A
+friend of mine rarely does anything the first time someone asks
+him. He knows that people sometimes ask for things that they turn
+out not to want. To avoid wasting his time, he waits till the third
+or fourth time he's asked to do something; by then, whoever's asking
+him may be fairly annoyed, but at least they probably really do
+want whatever they're asking for.Most people have learned to do a similar sort of filtering on new
+things they hear about. They don't even start paying attention
+until they've heard about something ten times. They're perfectly
+justified: the majority of hot new whatevers do turn out to be a
+waste of time, and eventually go away. By delaying learning VRML,
+I avoided having to learn it at all.So anyone who invents something new has to expect to keep repeating
+their message for years before people will start to get it. We
+wrote what was, as far as I know, the first web-server based
+application, and it took us years to get it through to people that
+it didn't have to be downloaded. It wasn't that they were stupid.
+They just had us tuned out.The good news is, simple repetition solves the problem. All you
+have to do is keep telling your story, and eventually people will
+start to hear. It's not when people notice you're there that they
+pay attention; it's when they notice you're still there.It's just as well that it usually takes a while to gain momentum.
+Most technologies evolve a good deal even after they're first
+launched — programming languages especially. Nothing could be better,
+for a new techology, than a few years of being used only by a small
+number of early adopters. Early adopters are sophisticated and
+demanding, and quickly flush out whatever flaws remain in your
+technology. When you only have a few users you can be in close
+contact with all of them. And early adopters are forgiving when
+you improve your system, even if this causes some breakage.There are two ways new technology gets introduced: the organic
+growth method, and the big bang method. The organic growth method
+is exemplified by the classic seat-of-the-pants underfunded garage
+startup. A couple guys, working in obscurity, develop some new
+technology. They launch it with no marketing and initially have
+only a few (fanatically devoted) users. They continue to improve
+the technology, and meanwhile their user base grows by word of
+mouth. Before they know it, they're big.The other approach, the big bang method, is exemplified by the
+VC-backed, heavily marketed startup. They rush to develop a product,
+launch it with great publicity, and immediately (they hope) have
+a large user base.Generally, the garage guys envy the big bang guys. The big bang
+guys are smooth and confident and respected by the VCs. They can
+afford the best of everything, and the PR campaign surrounding the
+launch has the side effect of making them celebrities. The organic
+growth guys, sitting in their garage, feel poor and unloved. And
+yet I think they are often mistaken to feel sorry for themselves.
+Organic growth seems to yield better technology and richer founders
+than the big bang method. If you look at the dominant technologies
+today, you'll find that most of them grew organically.This pattern doesn't only apply to companies. You see it in sponsored
+research too. Multics and Common Lisp were big-bang projects, and
+Unix and MacLisp were organic growth projects.10 Redesign"The best writing is rewriting," wrote E. B. White. Every good
+writer knows this, and it's true for software too. The most important
+part of design is redesign. Programming languages, especially,
+don't get redesigned enough.To write good software you must simultaneously keep two opposing
+ideas in your head. You need the young hacker's naive faith in
+his abilities, and at the same time the veteran's skepticism. You
+have to be able to think
+how hard can it be? with one half of
+your brain while thinking
+it will never work with the other.The trick is to realize that there's no real contradiction here.
+You want to be optimistic and skeptical about two different things.
+You have to be optimistic about the possibility of solving the
+problem, but skeptical about the value of whatever solution you've
+got so far.People who do good work often think that whatever they're working
+on is no good. Others see what they've done and are full of wonder,
+but the creator is full of worry. This pattern is no coincidence:
+it is the worry that made the work good.If you can keep hope and worry balanced, they will drive a project
+forward the same way your two legs drive a bicycle forward. In the
+first phase of the two-cycle innovation engine, you work furiously
+on some problem, inspired by your confidence that you'll be able
+to solve it. In the second phase, you look at what you've done in
+the cold light of morning, and see all its flaws very clearly. But
+as long as your critical spirit doesn't outweigh your hope, you'll
+be able to look at your admittedly incomplete system, and think,
+how hard can it be to get the rest of the way?, thereby continuing
+the cycle.It's tricky to keep the two forces balanced. In young hackers,
+optimism predominates. They produce something, are convinced it's
+great, and never improve it. In old hackers, skepticism predominates,
+and they won't even dare to take on ambitious projects.Anything you can do to keep the redesign cycle going is good. Prose
+can be rewritten over and over until you're happy with it. But
+software, as a rule, doesn't get redesigned enough. Prose has
+readers, but software has users. If a writer rewrites an essay,
+people who read the old version are unlikely to complain that their
+thoughts have been broken by some newly introduced incompatibility.Users are a double-edged sword. They can help you improve your
+language, but they can also deter you from improving it. So choose
+your users carefully, and be slow to grow their number. Having
+users is like optimization: the wise course is to delay it. Also,
+as a general rule, you can at any given time get away with changing
+more than you think. Introducing change is like pulling off a
+bandage: the pain is a memory almost as soon as you feel it.Everyone knows that it's not a good idea to have a language designed
+by a committee. Committees yield bad design. But I think the worst
+danger of committees is that they interfere with redesign. It is
+so much work to introduce changes that no one wants to bother.
+Whatever a committee decides tends to stay that way, even if most
+of the members don't like it.Even a committee of two gets in the way of redesign. This happens
+particularly in the interfaces between pieces of software written
+by two different people. To change the interface both have to agree
+to change it at once. And so interfaces tend not to change at all,
+which is a problem because they tend to be one of the most ad hoc
+parts of any system.One solution here might be to design systems so that interfaces
+are horizontal instead of vertical — so that modules are always
+vertically stacked strata of abstraction. Then the interface will
+tend to be owned by one of them. The lower of two levels will either
+be a language in which the upper is written, in which case the
+lower level will own the interface, or it will be a slave, in which
+case the interface can be dictated by the upper level.11 LispWhat all this implies is that there is hope for a new Lisp. There
+is hope for any language that gives hackers what they want, including
+Lisp. I think we may have made a mistake in thinking that hackers
+are turned off by Lisp's strangeness. This comforting illusion may
+have prevented us from seeing the real problem with Lisp, or at
+least Common Lisp, which is that it sucks for doing what hackers
+want to do. A hacker's language needs powerful libraries and
+something to hack. Common Lisp has neither. A hacker's language is
+terse and hackable. Common Lisp is not.The good news is, it's not Lisp that sucks, but Common Lisp. If we
+can develop a new Lisp that is a real hacker's language, I think
+hackers will use it. They will use whatever language does the job.
+All we have to do is make sure this new Lisp does some important
+job better than other languages.History offers some encouragement. Over time, successive new
+programming languages have taken more and more features from Lisp.
+There is no longer much left to copy before the language you've
+made is Lisp. The latest hot language, Python, is a watered-down
+Lisp with infix syntax and no macros. A new Lisp would be a natural
+step in this progression.I sometimes think that it would be a good marketing trick to call
+it an improved version of Python. That sounds hipper than Lisp. To
+many people, Lisp is a slow AI language with a lot of parentheses.
+Fritz Kunze's official biography carefully avoids mentioning the
+L-word. But my guess is that we shouldn't be afraid to call the
+new Lisp Lisp. Lisp still has a lot of latent respect among the
+very best hackers — the ones who took 6.001 and understood it, for
+example. And those are the users you need to win.In "How to Become a Hacker," Eric Raymond describes Lisp as something
+like Latin or Greek — a language you should learn as an intellectual
+exercise, even though you won't actually use it:
+
+ Lisp is worth learning for the profound enlightenment experience
+ you will have when you finally get it; that experience will make
+ you a better programmer for the rest of your days, even if you
+ never actually use Lisp itself a lot.
+
+If I didn't know Lisp, reading this would set me asking questions.
+A language that would make me a better programmer, if it means
+anything at all, means a language that would be better for programming.
+And that is in fact the implication of what Eric is saying.As long as that idea is still floating around, I think hackers will
+be receptive enough to a new Lisp, even if it is called Lisp. But
+this Lisp must be a hacker's language, like the classic Lisps of
+the 1970s. It must be terse, simple, and hackable. And it must have
+powerful libraries for doing what hackers want to do now.In the matter of libraries I think there is room to beat languages
+like Perl and Python at their own game. A lot of the new applications
+that will need to be written in the coming years will be
+server-based
+applications. There's no reason a new Lisp shouldn't have string
+libraries as good as Perl, and if this new Lisp also had powerful
+libraries for server-based applications, it could be very popular.
+Real hackers won't turn up their noses at a new tool that will let
+them solve hard problems with a few library calls. Remember, hackers
+are lazy.It could be an even bigger win to have core language support for
+server-based applications. For example, explicit support for programs
+with multiple users, or data ownership at the level of type tags.Server-based applications also give us the answer to the question
+of what this new Lisp will be used to hack. It would not hurt to
+make Lisp better as a scripting language for Unix. (It would be
+hard to make it worse.) But I think there are areas where existing
+languages would be easier to beat. I think it might be better to
+follow the model of Tcl, and supply the Lisp together with a complete
+system for supporting server-based applications. Lisp is a natural
+fit for server-based applications. Lexical closures provide a way
+to get the effect of subroutines when the ui is just a series of
+web pages. S-expressions map nicely onto html, and macros are good
+at generating it. There need to be better tools for writing
+server-based applications, and there needs to be a new Lisp, and
+the two would work very well together.12 The Dream LanguageBy way of summary, let's try describing the hacker's dream language.
+The dream language is
+beautiful, clean, and terse. It has an
+interactive toplevel that starts up fast. You can write programs
+to solve common problems with very little code. Nearly all the
+code in any program you write is code that's specific to your
+application. Everything else has been done for you.The syntax of the language is brief to a fault. You never have to
+type an unnecessary character, or even to use the shift key much.Using big abstractions you can write the first version of a program
+very quickly. Later, when you want to optimize, there's a really
+good profiler that tells you where to focus your attention. You
+can make inner loops blindingly fast, even writing inline byte code
+if you need to.There are lots of good examples to learn from, and the language is
+intuitive enough that you can learn how to use it from examples in
+a couple minutes. You don't need to look in the manual much. The
+manual is thin, and has few warnings and qualifications.The language has a small core, and powerful, highly orthogonal
+libraries that are as carefully designed as the core language. The
+libraries all work well together; everything in the language fits
+together like the parts in a fine camera. Nothing is deprecated,
+or retained for compatibility. The source code of all the libraries
+is readily available. It's easy to talk to the operating system
+and to applications written in other languages.The language is built in layers. The higher-level abstractions are
+built in a very transparent way out of lower-level abstractions,
+which you can get hold of if you want.Nothing is hidden from you that doesn't absolutely have to be. The
+language offers abstractions only as a way of saving you work,
+rather than as a way of telling you what to do. In fact, the language
+encourages you to be an equal participant in its design. You can
+change everything about it, including even its syntax, and anything
+you write has, as much as possible, the same status as what comes
+predefined.Notes[1] Macros very close to the modern idea were proposed by Timothy
+Hart in 1964, two years after Lisp 1.5 was released. What was
+missing, initially, were ways to avoid variable capture and multiple
+evaluation; Hart's examples are subject to both.[2] In When the Air Hits Your Brain, neurosurgeon Frank Vertosick
+recounts a conversation in which his chief resident, Gary, talks
+about the difference between surgeons and internists ("fleas"):
+
+ Gary and I ordered a large pizza and found an open booth. The
+ chief lit a cigarette. "Look at those goddamn fleas, jabbering
+ about some disease they'll see once in their lifetimes. That's
+ the trouble with fleas, they only like the bizarre stuff. They
+ hate their bread and butter cases. That's the difference between
+ us and the fucking fleas. See, we love big juicy lumbar disc
+ herniations, but they hate hypertension...."
+
+It's hard to think of a lumbar disc herniation as juicy (except
+literally). And yet I think I know what they mean. I've often had
+a juicy bug to track down. Someone who's not a programmer would
+find it hard to imagine that there could be pleasure in a bug.
+Surely it's better if everything just works. In one way, it is.
+And yet there is undeniably a grim satisfaction in hunting down
+certain sorts of bugs.
+```
\ No newline at end of file
diff --git a/rubra-11b-h/rubra-11b-h.png b/rubra-11b-h/rubra-11b-h.png
new file mode 100644
index 0000000000000000000000000000000000000000..b3d1fae01bcb58bd6515628004b9ee6079f5533a
--- /dev/null
+++ b/rubra-11b-h/rubra-11b-h.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e273bdc068c7f7065c1c5eba64e19ac2b4da620a59a966265026713902ca905
+size 1735652
diff --git a/rubra-11b-h/special_tokens_map.json b/rubra-11b-h/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..492d4b2966a1763442d426d880dbc29f94906e4c
--- /dev/null
+++ b/rubra-11b-h/special_tokens_map.json
@@ -0,0 +1,30 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/rubra-11b-h/tokenizer.model b/rubra-11b-h/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6
--- /dev/null
+++ b/rubra-11b-h/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443
diff --git a/rubra-11b-h/tokenizer_config.json b/rubra-11b-h/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ed358013d41334780151bb4ab85d8770bb7316da
--- /dev/null
+++ b/rubra-11b-h/tokenizer_config.json
@@ -0,0 +1,46 @@
+{
+ "add_bos_token": true,
+ "add_eos_token": false,
+ "add_prefix_space": true,
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [],
+ "bos_token": "",
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{{ '' + system_message }}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '[INST] ' + content + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ content + '' }}{% elif message['role'] == 'function' %}{{ '<>' + content + '' }}{% elif message['role'] == 'observation' %}{{ '[INST] <>' + content + ' [/INST]' }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "",
+ "legacy": true,
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "padding_side": "right",
+ "sp_model_kwargs": {},
+ "spaces_between_special_tokens": false,
+ "split_special_tokens": false,
+ "tokenizer_class": "LlamaTokenizer",
+ "unk_token": "",
+ "use_default_system_prompt": false
+}
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..492d4b2966a1763442d426d880dbc29f94906e4c
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,30 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ed358013d41334780151bb4ab85d8770bb7316da
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,46 @@
+{
+ "add_bos_token": true,
+ "add_eos_token": false,
+ "add_prefix_space": true,
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [],
+ "bos_token": "",
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{{ '' + system_message }}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '[INST] ' + content + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ content + '' }}{% elif message['role'] == 'function' %}{{ '<>' + content + '' }}{% elif message['role'] == 'observation' %}{{ '[INST] <>' + content + ' [/INST]' }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "",
+ "legacy": true,
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "padding_side": "right",
+ "sp_model_kwargs": {},
+ "spaces_between_special_tokens": false,
+ "split_special_tokens": false,
+ "tokenizer_class": "LlamaTokenizer",
+ "unk_token": "",
+ "use_default_system_prompt": false
+}