|
{ |
|
"measurement": { |
|
"model.layers.0": { |
|
"accuracy": 0.8929605484008789, |
|
"total_bits": 661754128, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.1": { |
|
"accuracy": 0.9168710708618164, |
|
"total_bits": 661754128, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.2": { |
|
"accuracy": 0.9242000579833984, |
|
"total_bits": 657821968, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.3": { |
|
"accuracy": 0.9581476449966431, |
|
"total_bits": 656511248, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.4": { |
|
"accuracy": 0.9504798650741577, |
|
"total_bits": 657821968, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.5": { |
|
"accuracy": 0.9427821636199951, |
|
"total_bits": 656511248, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.6": { |
|
"accuracy": 0.936309814453125, |
|
"total_bits": 661754128, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.7": { |
|
"accuracy": 0.9291403293609619, |
|
"total_bits": 661754128, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.8": { |
|
"accuracy": 0.9180412292480469, |
|
"total_bits": 661754128, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.9": { |
|
"accuracy": 0.9271588325500488, |
|
"total_bits": 657821968, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.10": { |
|
"accuracy": 0.9456520080566406, |
|
"total_bits": 657821968, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.11": { |
|
"accuracy": 0.9409334659576416, |
|
"total_bits": 657821968, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.12": { |
|
"accuracy": 0.9337412118911743, |
|
"total_bits": 661754128, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.13": { |
|
"accuracy": 0.9283764362335205, |
|
"total_bits": 661754128, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.14": { |
|
"accuracy": 0.9217686653137207, |
|
"total_bits": 661754128, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.15": { |
|
"accuracy": 0.918848991394043, |
|
"total_bits": 661754128, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.16": { |
|
"accuracy": 0.9216296672821045, |
|
"total_bits": 678793488, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.17": { |
|
"accuracy": 0.9213476181030273, |
|
"total_bits": 690589968, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.18": { |
|
"accuracy": 0.9418046474456787, |
|
"total_bits": 876777744, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.19": { |
|
"accuracy": 0.9392967224121094, |
|
"total_bits": 876777744, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.20": { |
|
"accuracy": 0.9362101554870605, |
|
"total_bits": 876777744, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.21": { |
|
"accuracy": 0.9523159265518188, |
|
"total_bits": 1051889936, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.22": { |
|
"accuracy": 0.9505712985992432, |
|
"total_bits": 1051889936, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.23": { |
|
"accuracy": 0.9630558490753174, |
|
"total_bits": 1161335056, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.24": { |
|
"accuracy": 0.9636557102203369, |
|
"total_bits": 1161335056, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.25": { |
|
"accuracy": 0.9625805616378784, |
|
"total_bits": 1161335056, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.26": { |
|
"accuracy": 0.9636578559875488, |
|
"total_bits": 1161335056, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.75, |
|
0.25 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.27": { |
|
"accuracy": 0.9815378189086914, |
|
"total_bits": 1270780176, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.28": { |
|
"accuracy": 0.9821363687515259, |
|
"total_bits": 1270780176, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.29": { |
|
"accuracy": 0.9825566411018372, |
|
"total_bits": 1270780176, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.30": { |
|
"accuracy": 0.9833992123603821, |
|
"total_bits": 1270780176, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.31": { |
|
"accuracy": 0.983980119228363, |
|
"total_bits": 1270780176, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.32": { |
|
"accuracy": 0.984345555305481, |
|
"total_bits": 1270780176, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.33": { |
|
"accuracy": 0.9841893315315247, |
|
"total_bits": 1270780176, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.34": { |
|
"accuracy": 0.9848365783691406, |
|
"total_bits": 1270780176, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.35": { |
|
"accuracy": 0.9698885679244995, |
|
"total_bits": 1270780176, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.36": { |
|
"accuracy": 0.9867296814918518, |
|
"total_bits": 1270780176, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.37": { |
|
"accuracy": 0.9842843413352966, |
|
"total_bits": 1270780176, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.38": { |
|
"accuracy": 0.9846153259277344, |
|
"total_bits": 1270780176, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.39": { |
|
"accuracy": 0.9736208915710449, |
|
"total_bits": 1270780176, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
} |
|
} |
|
} |