|
{ |
|
"measurement": { |
|
"model.layers.0": { |
|
"accuracy": 0.7379617691040039, |
|
"total_bits": 108214800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.1": { |
|
"accuracy": 0.8626322746276855, |
|
"total_bits": 108214800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.2": { |
|
"accuracy": 0.8636598587036133, |
|
"total_bits": 108214800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.3": { |
|
"accuracy": 0.8549518585205078, |
|
"total_bits": 108214800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.4": { |
|
"accuracy": 0.8379402160644531, |
|
"total_bits": 108214800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.5": { |
|
"accuracy": 0.8145275115966797, |
|
"total_bits": 108214800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.6": { |
|
"accuracy": 0.9044299125671387, |
|
"total_bits": 108214800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.7": { |
|
"accuracy": 0.912956953048706, |
|
"total_bits": 108214800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.8": { |
|
"accuracy": 0.9059407711029053, |
|
"total_bits": 108214800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.9": { |
|
"accuracy": 0.9044547080993652, |
|
"total_bits": 108214800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.10": { |
|
"accuracy": 0.8962442874908447, |
|
"total_bits": 108214800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.11": { |
|
"accuracy": 0.894075870513916, |
|
"total_bits": 108214800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.12": { |
|
"accuracy": 0.8837118148803711, |
|
"total_bits": 108214800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.13": { |
|
"accuracy": 0.8791499137878418, |
|
"total_bits": 108214800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.14": { |
|
"accuracy": 0.8694100379943848, |
|
"total_bits": 108214800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.15": { |
|
"accuracy": 0.8679704666137695, |
|
"total_bits": 109263376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.16": { |
|
"accuracy": 0.8759989738464355, |
|
"total_bits": 111884816, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.17": { |
|
"accuracy": 0.8716182708740234, |
|
"total_bits": 112409104, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.18": { |
|
"accuracy": 0.8721990585327148, |
|
"total_bits": 112409104, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.19": { |
|
"accuracy": 0.8741440773010254, |
|
"total_bits": 112409104, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.20": { |
|
"accuracy": 0.8825793266296387, |
|
"total_bits": 111884816, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.21": { |
|
"accuracy": 0.8796310424804688, |
|
"total_bits": 112409104, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.22": { |
|
"accuracy": 0.8898491859436035, |
|
"total_bits": 120273424, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.4, |
|
0.6 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.4, |
|
0.6 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.23": { |
|
"accuracy": 0.9079098701477051, |
|
"total_bits": 144849424, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.35, |
|
0.65 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.35, |
|
0.65 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.35, |
|
0.65 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.35, |
|
0.65 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.35, |
|
0.65 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
} |
|
} |
|
} |
|
} |