diff --git "a/quant_strategy.json" "b/quant_strategy.json" new file mode 100644--- /dev/null +++ "b/quant_strategy.json" @@ -0,0 +1,6407 @@ +{ + "measurement": { + "model.layers.0": { + "accuracy": 0.938117504119873, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.1": { + "accuracy": 0.9385285377502441, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.2": { + "accuracy": 0.9178314208984375, + "total_bits": 1014035744, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.3": { + "accuracy": 0.9398744106292725, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.4": { + "accuracy": 0.932077169418335, + "total_bits": 1031206176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.5": { + "accuracy": 0.9244847297668457, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.6": { + "accuracy": 0.9192135334014893, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.7": { + "accuracy": 0.9200577735900879, + "total_bits": 1057813792, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + "model.layers.8": { + "accuracy": 0.9151592254638672, + "total_bits": 1057813792, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + "model.layers.9": { + "accuracy": 0.9430499076843262, + "total_bits": 1006171424, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.10": { + "accuracy": 0.9748049974441528, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.11": { + "accuracy": 0.9726496934890747, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.12": { + "accuracy": 0.9691994786262512, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.13": { + "accuracy": 0.9673866033554077, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.14": { + "accuracy": 0.9652252197265625, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.15": { + "accuracy": 0.9634624719619751, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.16": { + "accuracy": 0.9620521068572998, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.17": { + "accuracy": 0.9614913463592529, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.18": { + "accuracy": 0.9601767063140869, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.19": { + "accuracy": 0.9585492610931396, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.20": { + "accuracy": 0.9575080871582031, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.21": { + "accuracy": 0.9563714265823364, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.22": { + "accuracy": 0.9561275243759155, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.23": { + "accuracy": 0.95439612865448, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.24": { + "accuracy": 0.9584873914718628, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.25": { + "accuracy": 0.9604376554489136, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.26": { + "accuracy": 0.9616951942443848, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.27": { + "accuracy": 0.9634686708450317, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.28": { + "accuracy": 0.9646973609924316, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.29": { + "accuracy": 0.9643039107322693, + "total_bits": 1014822176, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.30": { + "accuracy": 0.9647260308265686, + "total_bits": 1057813792, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + "model.layers.31": { + "accuracy": 0.9716804623603821, + "total_bits": 1341060384, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.32": { + "accuracy": 0.9701521396636963, + "total_bits": 1341060384, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.33": { + "accuracy": 0.9710032939910889, + "total_bits": 1442510112, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.34": { + "accuracy": 0.9695323705673218, + "total_bits": 1442510112, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.35": { + "accuracy": 0.974990963935852, + "total_bits": 1611592992, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.5, + 0.5 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.5, + 0.5 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.5, + 0.5 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.5, + 0.5 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.5, + 0.5 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.36": { + "accuracy": 0.9803920388221741, + "total_bits": 1780675872, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.37": { + "accuracy": 0.9799299836158752, + "total_bits": 1780675872, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.38": { + "accuracy": 0.9789667725563049, + "total_bits": 1780675872, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.39": { + "accuracy": 0.9891021251678467, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.40": { + "accuracy": 0.9887650012969971, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.41": { + "accuracy": 0.9885014891624451, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.42": { + "accuracy": 0.988054633140564, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.43": { + "accuracy": 0.9874053597450256, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.44": { + "accuracy": 0.9866098761558533, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.45": { + "accuracy": 0.9856432676315308, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.46": { + "accuracy": 0.9848724603652954, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.47": { + "accuracy": 0.9841241836547852, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.48": { + "accuracy": 0.9846011400222778, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.49": { + "accuracy": 0.9843735694885254, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.50": { + "accuracy": 0.9848182797431946, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.51": { + "accuracy": 0.9850360155105591, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.52": { + "accuracy": 0.9854971170425415, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.53": { + "accuracy": 0.9858990907669067, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.54": { + "accuracy": 0.9864022135734558, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.55": { + "accuracy": 0.9867092370986938, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.56": { + "accuracy": 0.986831784248352, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.57": { + "accuracy": 0.9866400957107544, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.58": { + "accuracy": 0.9869475960731506, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.59": { + "accuracy": 0.9808470010757446, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.60": { + "accuracy": 0.9877333045005798, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.61": { + "accuracy": 0.9868653416633606, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.62": { + "accuracy": 0.9976152181625366, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.63": { + "accuracy": 0.9992709159851074, + "total_bits": 1949758752, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + } +} \ No newline at end of file