diff --git "a/quant_strategy.json" "b/quant_strategy.json" new file mode 100644--- /dev/null +++ "b/quant_strategy.json" @@ -0,0 +1,5636 @@ +{ + "measurement": { + "model.layers.0": { + "accuracy": 0.890477180480957, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.1": { + "accuracy": 0.9157891273498535, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.2": { + "accuracy": 0.8203115463256836, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.3": { + "accuracy": 0.9056191444396973, + "total_bits": 1018223424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.4": { + "accuracy": 0.8945178985595703, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.5": { + "accuracy": 0.894472599029541, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.6": { + "accuracy": 0.8993639945983887, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.7": { + "accuracy": 0.9051589965820312, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.8": { + "accuracy": 0.911219596862793, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.9": { + "accuracy": 0.9410309791564941, + "total_bits": 1284004512, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.10": { + "accuracy": 0.9792273044586182, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.11": { + "accuracy": 0.9785254597663879, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.12": { + "accuracy": 0.9760403633117676, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.13": { + "accuracy": 0.9748064875602722, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.14": { + "accuracy": 0.9750298857688904, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.15": { + "accuracy": 0.9738214015960693, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.16": { + "accuracy": 0.9730130434036255, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.17": { + "accuracy": 0.9719024896621704, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.18": { + "accuracy": 0.9715174436569214, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.19": { + "accuracy": 0.9709410667419434, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.20": { + "accuracy": 0.9703754186630249, + "total_bits": 1018223424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.21": { + "accuracy": 0.9691040515899658, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.22": { + "accuracy": 0.9683681726455688, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.23": { + "accuracy": 0.968040943145752, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.24": { + "accuracy": 0.9702473878860474, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.25": { + "accuracy": 0.9718453884124756, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.26": { + "accuracy": 0.9731786251068115, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.27": { + "accuracy": 0.9746567010879517, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.28": { + "accuracy": 0.9770183563232422, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.29": { + "accuracy": 0.977944552898407, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.30": { + "accuracy": 0.9781940579414368, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.31": { + "accuracy": 0.9774672985076904, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.32": { + "accuracy": 0.9732013940811157, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.33": { + "accuracy": 0.9707193374633789, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.34": { + "accuracy": 0.9686504602432251, + "total_bits": 1018223424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.35": { + "accuracy": 0.9660801887512207, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.36": { + "accuracy": 0.962059497833252, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.37": { + "accuracy": 0.9613919258117676, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.38": { + "accuracy": 0.9594907760620117, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.39": { + "accuracy": 0.9589303731918335, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.40": { + "accuracy": 0.9561096429824829, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.41": { + "accuracy": 0.9535393714904785, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.42": { + "accuracy": 0.9512386322021484, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.43": { + "accuracy": 0.9472916126251221, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.44": { + "accuracy": 0.9438014030456543, + "total_bits": 1007903424, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.45": { + "accuracy": 0.9417228698730469, + "total_bits": 1059511104, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.46": { + "accuracy": 0.9483001232147217, + "total_bits": 1284004512, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.47": { + "accuracy": 0.9506616592407227, + "total_bits": 1335612192, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.48": { + "accuracy": 0.9525680541992188, + "total_bits": 1397539872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.49": { + "accuracy": 0.9508843421936035, + "total_bits": 1397539872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.50": { + "accuracy": 0.9601802825927734, + "total_bits": 1611721632, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.51": { + "accuracy": 0.9612672328948975, + "total_bits": 1611721632, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.52": { + "accuracy": 0.9624916315078735, + "total_bits": 1611721632, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.53": { + "accuracy": 0.9628530740737915, + "total_bits": 1611721632, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.54": { + "accuracy": 0.9641364812850952, + "total_bits": 1611721632, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.55": { + "accuracy": 0.9832327961921692, + "total_bits": 1898151072, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.56": { + "accuracy": 0.983729898929596, + "total_bits": 1898151072, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.57": { + "accuracy": 0.9834256768226624, + "total_bits": 1898151072, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.58": { + "accuracy": 0.9826788902282715, + "total_bits": 1898151072, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.59": { + "accuracy": 0.9762988090515137, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.60": { + "accuracy": 0.9840570092201233, + "total_bits": 1898151072, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.61": { + "accuracy": 0.9840413331985474, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.62": { + "accuracy": 0.9892150163650513, + "total_bits": 1898151072, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.63": { + "accuracy": 0.9909077882766724, + "total_bits": 1898151072, + "q_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + } +} \ No newline at end of file