NicoNico6
update
f1661ea
raw
history blame
68.9 kB
{
"measurement": {
"model.layers.0": {
"accuracy": 0.8315210342407227,
"total_bits": 606753024,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.1": {
"accuracy": 0.8099503517150879,
"total_bits": 573724416,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.2": {
"accuracy": 0.9151215553283691,
"total_bits": 458124288,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.3": {
"accuracy": 0.8967597484588623,
"total_bits": 458124288,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.4": {
"accuracy": 0.8822777271270752,
"total_bits": 458124288,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.5": {
"accuracy": 0.8791213035583496,
"total_bits": 491152896,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.6": {
"accuracy": 0.8766169548034668,
"total_bits": 491152896,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.7": {
"accuracy": 0.8512115478515625,
"total_bits": 458124288,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.8": {
"accuracy": 0.8560552597045898,
"total_bits": 466380288,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.9": {
"accuracy": 0.8650345802307129,
"total_bits": 491152896,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.10": {
"accuracy": 0.8646550178527832,
"total_bits": 491152896,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.11": {
"accuracy": 0.8666439056396484,
"total_bits": 491152896,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.12": {
"accuracy": 0.8785800933837891,
"total_bits": 573724416,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.13": {
"accuracy": 0.9117043018341064,
"total_bits": 648037632,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.14": {
"accuracy": 0.8958101272583008,
"total_bits": 615009024,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.15": {
"accuracy": 0.9006896018981934,
"total_bits": 615009024,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.16": {
"accuracy": 0.9052057266235352,
"total_bits": 615009024,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.17": {
"accuracy": 0.9068913459777832,
"total_bits": 615009024,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.18": {
"accuracy": 0.9185070991516113,
"total_bits": 648037632,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.19": {
"accuracy": 0.9402451515197754,
"total_bits": 722356992,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.20": {
"accuracy": 0.9397883415222168,
"total_bits": 722356992,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.21": {
"accuracy": 0.9382126331329346,
"total_bits": 722356992,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.22": {
"accuracy": 0.9447140693664551,
"total_bits": 722356992,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.23": {
"accuracy": 0.9728553295135498,
"total_bits": 846216960,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.24": {
"accuracy": 0.9704793691635132,
"total_bits": 846216960,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.25": {
"accuracy": 0.9679086208343506,
"total_bits": 846216960,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.26": {
"accuracy": 0.9668022394180298,
"total_bits": 846216960,
"q_proj": {
"group_size": {
"2": 64
},
"bits": [
2
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.27": {
"accuracy": 0.9765444993972778,
"total_bits": 879245568,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.28": {
"accuracy": 0.9767729043960571,
"total_bits": 879245568,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.29": {
"accuracy": 0.97376549243927,
"total_bits": 879245568,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.30": {
"accuracy": 0.9674742221832275,
"total_bits": 879245568,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
},
"model.layers.31": {
"accuracy": 0.9513812065124512,
"total_bits": 879245568,
"q_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"k_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"v_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"o_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"up_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"gate_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
},
"down_proj": {
"group_size": {
"4": 128
},
"bits": [
4
],
"bits_prop": [
1
],
"scale_bits": 4
}
}
}
}