NicoNico6
commited on
Commit
•
1e7e24b
1
Parent(s):
5c1d2a3
update
Browse files- quant_strategy.json +136 -136
quant_strategy.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"measurement": {
|
3 |
"model.layers.0": {
|
4 |
-
"accuracy": 0.
|
5 |
-
"total_bits":
|
6 |
"q_proj": {
|
7 |
"group_size": {
|
8 |
"2": 64
|
@@ -65,10 +65,10 @@
|
|
65 |
},
|
66 |
"gate_proj": {
|
67 |
"group_size": {
|
68 |
-
"
|
69 |
},
|
70 |
"bits": [
|
71 |
-
|
72 |
],
|
73 |
"bits_prop": [
|
74 |
1
|
@@ -89,14 +89,14 @@
|
|
89 |
}
|
90 |
},
|
91 |
"model.layers.1": {
|
92 |
-
"accuracy": 0.
|
93 |
-
"total_bits":
|
94 |
"q_proj": {
|
95 |
"group_size": {
|
96 |
-
"
|
97 |
},
|
98 |
"bits": [
|
99 |
-
|
100 |
],
|
101 |
"bits_prop": [
|
102 |
1
|
@@ -105,10 +105,10 @@
|
|
105 |
},
|
106 |
"k_proj": {
|
107 |
"group_size": {
|
108 |
-
"
|
109 |
},
|
110 |
"bits": [
|
111 |
-
|
112 |
],
|
113 |
"bits_prop": [
|
114 |
1
|
@@ -141,10 +141,10 @@
|
|
141 |
},
|
142 |
"up_proj": {
|
143 |
"group_size": {
|
144 |
-
"
|
145 |
},
|
146 |
"bits": [
|
147 |
-
|
148 |
],
|
149 |
"bits_prop": [
|
150 |
1
|
@@ -153,10 +153,10 @@
|
|
153 |
},
|
154 |
"gate_proj": {
|
155 |
"group_size": {
|
156 |
-
"
|
157 |
},
|
158 |
"bits": [
|
159 |
-
|
160 |
],
|
161 |
"bits_prop": [
|
162 |
1
|
@@ -165,10 +165,10 @@
|
|
165 |
},
|
166 |
"down_proj": {
|
167 |
"group_size": {
|
168 |
-
"
|
169 |
},
|
170 |
"bits": [
|
171 |
-
|
172 |
],
|
173 |
"bits_prop": [
|
174 |
1
|
@@ -177,14 +177,14 @@
|
|
177 |
}
|
178 |
},
|
179 |
"model.layers.2": {
|
180 |
-
"accuracy": 0.
|
181 |
-
"total_bits":
|
182 |
"q_proj": {
|
183 |
"group_size": {
|
184 |
-
"
|
185 |
},
|
186 |
"bits": [
|
187 |
-
|
188 |
],
|
189 |
"bits_prop": [
|
190 |
1
|
@@ -193,10 +193,10 @@
|
|
193 |
},
|
194 |
"k_proj": {
|
195 |
"group_size": {
|
196 |
-
"
|
197 |
},
|
198 |
"bits": [
|
199 |
-
|
200 |
],
|
201 |
"bits_prop": [
|
202 |
1
|
@@ -229,10 +229,10 @@
|
|
229 |
},
|
230 |
"up_proj": {
|
231 |
"group_size": {
|
232 |
-
"
|
233 |
},
|
234 |
"bits": [
|
235 |
-
|
236 |
],
|
237 |
"bits_prop": [
|
238 |
1
|
@@ -241,10 +241,10 @@
|
|
241 |
},
|
242 |
"gate_proj": {
|
243 |
"group_size": {
|
244 |
-
"
|
245 |
},
|
246 |
"bits": [
|
247 |
-
|
248 |
],
|
249 |
"bits_prop": [
|
250 |
1
|
@@ -253,10 +253,10 @@
|
|
253 |
},
|
254 |
"down_proj": {
|
255 |
"group_size": {
|
256 |
-
"
|
257 |
},
|
258 |
"bits": [
|
259 |
-
|
260 |
],
|
261 |
"bits_prop": [
|
262 |
1
|
@@ -265,8 +265,8 @@
|
|
265 |
}
|
266 |
},
|
267 |
"model.layers.3": {
|
268 |
-
"accuracy": 0.
|
269 |
-
"total_bits":
|
270 |
"q_proj": {
|
271 |
"group_size": {
|
272 |
"2": 64
|
@@ -305,10 +305,10 @@
|
|
305 |
},
|
306 |
"o_proj": {
|
307 |
"group_size": {
|
308 |
-
"
|
309 |
},
|
310 |
"bits": [
|
311 |
-
|
312 |
],
|
313 |
"bits_prop": [
|
314 |
1
|
@@ -317,10 +317,10 @@
|
|
317 |
},
|
318 |
"up_proj": {
|
319 |
"group_size": {
|
320 |
-
"
|
321 |
},
|
322 |
"bits": [
|
323 |
-
|
324 |
],
|
325 |
"bits_prop": [
|
326 |
1
|
@@ -353,8 +353,8 @@
|
|
353 |
}
|
354 |
},
|
355 |
"model.layers.4": {
|
356 |
-
"accuracy": 0.
|
357 |
-
"total_bits":
|
358 |
"q_proj": {
|
359 |
"group_size": {
|
360 |
"2": 64
|
@@ -369,10 +369,10 @@
|
|
369 |
},
|
370 |
"k_proj": {
|
371 |
"group_size": {
|
372 |
-
"
|
373 |
},
|
374 |
"bits": [
|
375 |
-
|
376 |
],
|
377 |
"bits_prop": [
|
378 |
1
|
@@ -405,10 +405,10 @@
|
|
405 |
},
|
406 |
"up_proj": {
|
407 |
"group_size": {
|
408 |
-
"
|
409 |
},
|
410 |
"bits": [
|
411 |
-
|
412 |
],
|
413 |
"bits_prop": [
|
414 |
1
|
@@ -417,10 +417,10 @@
|
|
417 |
},
|
418 |
"gate_proj": {
|
419 |
"group_size": {
|
420 |
-
"
|
421 |
},
|
422 |
"bits": [
|
423 |
-
|
424 |
],
|
425 |
"bits_prop": [
|
426 |
1
|
@@ -529,8 +529,8 @@
|
|
529 |
}
|
530 |
},
|
531 |
"model.layers.6": {
|
532 |
-
"accuracy": 0.
|
533 |
-
"total_bits":
|
534 |
"q_proj": {
|
535 |
"group_size": {
|
536 |
"2": 64
|
@@ -569,10 +569,10 @@
|
|
569 |
},
|
570 |
"o_proj": {
|
571 |
"group_size": {
|
572 |
-
"
|
573 |
},
|
574 |
"bits": [
|
575 |
-
|
576 |
],
|
577 |
"bits_prop": [
|
578 |
1
|
@@ -617,8 +617,8 @@
|
|
617 |
}
|
618 |
},
|
619 |
"model.layers.7": {
|
620 |
-
"accuracy": 0.
|
621 |
-
"total_bits":
|
622 |
"q_proj": {
|
623 |
"group_size": {
|
624 |
"2": 64
|
@@ -633,10 +633,10 @@
|
|
633 |
},
|
634 |
"k_proj": {
|
635 |
"group_size": {
|
636 |
-
"
|
637 |
},
|
638 |
"bits": [
|
639 |
-
|
640 |
],
|
641 |
"bits_prop": [
|
642 |
1
|
@@ -705,8 +705,8 @@
|
|
705 |
}
|
706 |
},
|
707 |
"model.layers.8": {
|
708 |
-
"accuracy": 0.
|
709 |
-
"total_bits":
|
710 |
"q_proj": {
|
711 |
"group_size": {
|
712 |
"2": 64
|
@@ -721,10 +721,10 @@
|
|
721 |
},
|
722 |
"k_proj": {
|
723 |
"group_size": {
|
724 |
-
"
|
725 |
},
|
726 |
"bits": [
|
727 |
-
|
728 |
],
|
729 |
"bits_prop": [
|
730 |
1
|
@@ -881,8 +881,8 @@
|
|
881 |
}
|
882 |
},
|
883 |
"model.layers.10": {
|
884 |
-
"accuracy": 0.
|
885 |
-
"total_bits":
|
886 |
"q_proj": {
|
887 |
"group_size": {
|
888 |
"2": 64
|
@@ -921,10 +921,10 @@
|
|
921 |
},
|
922 |
"o_proj": {
|
923 |
"group_size": {
|
924 |
-
"
|
925 |
},
|
926 |
"bits": [
|
927 |
-
|
928 |
],
|
929 |
"bits_prop": [
|
930 |
1
|
@@ -969,8 +969,8 @@
|
|
969 |
}
|
970 |
},
|
971 |
"model.layers.11": {
|
972 |
-
"accuracy": 0.
|
973 |
-
"total_bits":
|
974 |
"q_proj": {
|
975 |
"group_size": {
|
976 |
"2": 64
|
@@ -985,10 +985,10 @@
|
|
985 |
},
|
986 |
"k_proj": {
|
987 |
"group_size": {
|
988 |
-
"
|
989 |
},
|
990 |
"bits": [
|
991 |
-
|
992 |
],
|
993 |
"bits_prop": [
|
994 |
1
|
@@ -1009,10 +1009,10 @@
|
|
1009 |
},
|
1010 |
"o_proj": {
|
1011 |
"group_size": {
|
1012 |
-
"
|
1013 |
},
|
1014 |
"bits": [
|
1015 |
-
|
1016 |
],
|
1017 |
"bits_prop": [
|
1018 |
1
|
@@ -1045,10 +1045,10 @@
|
|
1045 |
},
|
1046 |
"down_proj": {
|
1047 |
"group_size": {
|
1048 |
-
"
|
1049 |
},
|
1050 |
"bits": [
|
1051 |
-
|
1052 |
],
|
1053 |
"bits_prop": [
|
1054 |
1
|
@@ -1057,8 +1057,8 @@
|
|
1057 |
}
|
1058 |
},
|
1059 |
"model.layers.12": {
|
1060 |
-
"accuracy": 0.
|
1061 |
-
"total_bits":
|
1062 |
"q_proj": {
|
1063 |
"group_size": {
|
1064 |
"2": 64
|
@@ -1133,10 +1133,10 @@
|
|
1133 |
},
|
1134 |
"down_proj": {
|
1135 |
"group_size": {
|
1136 |
-
"
|
1137 |
},
|
1138 |
"bits": [
|
1139 |
-
|
1140 |
],
|
1141 |
"bits_prop": [
|
1142 |
1
|
@@ -1321,8 +1321,8 @@
|
|
1321 |
}
|
1322 |
},
|
1323 |
"model.layers.15": {
|
1324 |
-
"accuracy": 0.
|
1325 |
-
"total_bits":
|
1326 |
"q_proj": {
|
1327 |
"group_size": {
|
1328 |
"2": 64
|
@@ -1373,10 +1373,10 @@
|
|
1373 |
},
|
1374 |
"up_proj": {
|
1375 |
"group_size": {
|
1376 |
-
"
|
1377 |
},
|
1378 |
"bits": [
|
1379 |
-
|
1380 |
],
|
1381 |
"bits_prop": [
|
1382 |
1
|
@@ -1397,10 +1397,10 @@
|
|
1397 |
},
|
1398 |
"down_proj": {
|
1399 |
"group_size": {
|
1400 |
-
"
|
1401 |
},
|
1402 |
"bits": [
|
1403 |
-
|
1404 |
],
|
1405 |
"bits_prop": [
|
1406 |
1
|
@@ -1409,8 +1409,8 @@
|
|
1409 |
}
|
1410 |
},
|
1411 |
"model.layers.16": {
|
1412 |
-
"accuracy": 0.
|
1413 |
-
"total_bits":
|
1414 |
"q_proj": {
|
1415 |
"group_size": {
|
1416 |
"2": 64
|
@@ -1461,10 +1461,10 @@
|
|
1461 |
},
|
1462 |
"up_proj": {
|
1463 |
"group_size": {
|
1464 |
-
"
|
1465 |
},
|
1466 |
"bits": [
|
1467 |
-
|
1468 |
],
|
1469 |
"bits_prop": [
|
1470 |
1
|
@@ -1497,14 +1497,14 @@
|
|
1497 |
}
|
1498 |
},
|
1499 |
"model.layers.17": {
|
1500 |
-
"accuracy": 0.
|
1501 |
-
"total_bits":
|
1502 |
"q_proj": {
|
1503 |
"group_size": {
|
1504 |
-
"
|
1505 |
},
|
1506 |
"bits": [
|
1507 |
-
|
1508 |
],
|
1509 |
"bits_prop": [
|
1510 |
1
|
@@ -1513,10 +1513,10 @@
|
|
1513 |
},
|
1514 |
"k_proj": {
|
1515 |
"group_size": {
|
1516 |
-
"
|
1517 |
},
|
1518 |
"bits": [
|
1519 |
-
|
1520 |
],
|
1521 |
"bits_prop": [
|
1522 |
1
|
@@ -1561,10 +1561,10 @@
|
|
1561 |
},
|
1562 |
"gate_proj": {
|
1563 |
"group_size": {
|
1564 |
-
"
|
1565 |
},
|
1566 |
"bits": [
|
1567 |
-
|
1568 |
],
|
1569 |
"bits_prop": [
|
1570 |
1
|
@@ -1585,14 +1585,14 @@
|
|
1585 |
}
|
1586 |
},
|
1587 |
"model.layers.18": {
|
1588 |
-
"accuracy": 0.
|
1589 |
-
"total_bits":
|
1590 |
"q_proj": {
|
1591 |
"group_size": {
|
1592 |
-
"
|
1593 |
},
|
1594 |
"bits": [
|
1595 |
-
|
1596 |
],
|
1597 |
"bits_prop": [
|
1598 |
1
|
@@ -1601,10 +1601,10 @@
|
|
1601 |
},
|
1602 |
"k_proj": {
|
1603 |
"group_size": {
|
1604 |
-
"
|
1605 |
},
|
1606 |
"bits": [
|
1607 |
-
|
1608 |
],
|
1609 |
"bits_prop": [
|
1610 |
1
|
@@ -1649,10 +1649,10 @@
|
|
1649 |
},
|
1650 |
"gate_proj": {
|
1651 |
"group_size": {
|
1652 |
-
"
|
1653 |
},
|
1654 |
"bits": [
|
1655 |
-
|
1656 |
],
|
1657 |
"bits_prop": [
|
1658 |
1
|
@@ -1673,14 +1673,14 @@
|
|
1673 |
}
|
1674 |
},
|
1675 |
"model.layers.19": {
|
1676 |
-
"accuracy": 0.
|
1677 |
-
"total_bits":
|
1678 |
"q_proj": {
|
1679 |
"group_size": {
|
1680 |
-
"
|
1681 |
},
|
1682 |
"bits": [
|
1683 |
-
|
1684 |
],
|
1685 |
"bits_prop": [
|
1686 |
1
|
@@ -1689,10 +1689,10 @@
|
|
1689 |
},
|
1690 |
"k_proj": {
|
1691 |
"group_size": {
|
1692 |
-
"
|
1693 |
},
|
1694 |
"bits": [
|
1695 |
-
|
1696 |
],
|
1697 |
"bits_prop": [
|
1698 |
1
|
@@ -1725,10 +1725,10 @@
|
|
1725 |
},
|
1726 |
"up_proj": {
|
1727 |
"group_size": {
|
1728 |
-
"
|
1729 |
},
|
1730 |
"bits": [
|
1731 |
-
|
1732 |
],
|
1733 |
"bits_prop": [
|
1734 |
1
|
@@ -1737,10 +1737,10 @@
|
|
1737 |
},
|
1738 |
"gate_proj": {
|
1739 |
"group_size": {
|
1740 |
-
"
|
1741 |
},
|
1742 |
"bits": [
|
1743 |
-
|
1744 |
],
|
1745 |
"bits_prop": [
|
1746 |
1
|
@@ -1761,14 +1761,14 @@
|
|
1761 |
}
|
1762 |
},
|
1763 |
"model.layers.20": {
|
1764 |
-
"accuracy": 0.
|
1765 |
-
"total_bits":
|
1766 |
"q_proj": {
|
1767 |
"group_size": {
|
1768 |
-
"
|
1769 |
},
|
1770 |
"bits": [
|
1771 |
-
|
1772 |
],
|
1773 |
"bits_prop": [
|
1774 |
1
|
@@ -1813,10 +1813,10 @@
|
|
1813 |
},
|
1814 |
"up_proj": {
|
1815 |
"group_size": {
|
1816 |
-
"
|
1817 |
},
|
1818 |
"bits": [
|
1819 |
-
|
1820 |
],
|
1821 |
"bits_prop": [
|
1822 |
1
|
@@ -1825,10 +1825,10 @@
|
|
1825 |
},
|
1826 |
"gate_proj": {
|
1827 |
"group_size": {
|
1828 |
-
"
|
1829 |
},
|
1830 |
"bits": [
|
1831 |
-
|
1832 |
],
|
1833 |
"bits_prop": [
|
1834 |
1
|
@@ -1849,14 +1849,14 @@
|
|
1849 |
}
|
1850 |
},
|
1851 |
"model.layers.21": {
|
1852 |
-
"accuracy": 0.
|
1853 |
-
"total_bits":
|
1854 |
"q_proj": {
|
1855 |
"group_size": {
|
1856 |
-
"
|
1857 |
},
|
1858 |
"bits": [
|
1859 |
-
|
1860 |
],
|
1861 |
"bits_prop": [
|
1862 |
1
|
@@ -1901,10 +1901,10 @@
|
|
1901 |
},
|
1902 |
"up_proj": {
|
1903 |
"group_size": {
|
1904 |
-
"
|
1905 |
},
|
1906 |
"bits": [
|
1907 |
-
|
1908 |
],
|
1909 |
"bits_prop": [
|
1910 |
1
|
@@ -1913,10 +1913,10 @@
|
|
1913 |
},
|
1914 |
"gate_proj": {
|
1915 |
"group_size": {
|
1916 |
-
"
|
1917 |
},
|
1918 |
"bits": [
|
1919 |
-
|
1920 |
],
|
1921 |
"bits_prop": [
|
1922 |
1
|
@@ -1937,14 +1937,14 @@
|
|
1937 |
}
|
1938 |
},
|
1939 |
"model.layers.22": {
|
1940 |
-
"accuracy": 0.
|
1941 |
-
"total_bits":
|
1942 |
"q_proj": {
|
1943 |
"group_size": {
|
1944 |
-
"
|
1945 |
},
|
1946 |
"bits": [
|
1947 |
-
|
1948 |
],
|
1949 |
"bits_prop": [
|
1950 |
1
|
@@ -1953,10 +1953,10 @@
|
|
1953 |
},
|
1954 |
"k_proj": {
|
1955 |
"group_size": {
|
1956 |
-
"
|
1957 |
},
|
1958 |
"bits": [
|
1959 |
-
|
1960 |
],
|
1961 |
"bits_prop": [
|
1962 |
1
|
@@ -2001,10 +2001,10 @@
|
|
2001 |
},
|
2002 |
"gate_proj": {
|
2003 |
"group_size": {
|
2004 |
-
"
|
2005 |
},
|
2006 |
"bits": [
|
2007 |
-
|
2008 |
],
|
2009 |
"bits_prop": [
|
2010 |
1
|
@@ -2025,14 +2025,14 @@
|
|
2025 |
}
|
2026 |
},
|
2027 |
"model.layers.23": {
|
2028 |
-
"accuracy": 0.
|
2029 |
-
"total_bits":
|
2030 |
"q_proj": {
|
2031 |
"group_size": {
|
2032 |
-
"
|
2033 |
},
|
2034 |
"bits": [
|
2035 |
-
|
2036 |
],
|
2037 |
"bits_prop": [
|
2038 |
1
|
@@ -2041,10 +2041,10 @@
|
|
2041 |
},
|
2042 |
"k_proj": {
|
2043 |
"group_size": {
|
2044 |
-
"
|
2045 |
},
|
2046 |
"bits": [
|
2047 |
-
|
2048 |
],
|
2049 |
"bits_prop": [
|
2050 |
1
|
|
|
1 |
{
|
2 |
"measurement": {
|
3 |
"model.layers.0": {
|
4 |
+
"accuracy": 0.8778786659240723,
|
5 |
+
"total_bits": 41980320,
|
6 |
"q_proj": {
|
7 |
"group_size": {
|
8 |
"2": 64
|
|
|
65 |
},
|
66 |
"gate_proj": {
|
67 |
"group_size": {
|
68 |
+
"2": 64
|
69 |
},
|
70 |
"bits": [
|
71 |
+
2
|
72 |
],
|
73 |
"bits_prop": [
|
74 |
1
|
|
|
89 |
}
|
90 |
},
|
91 |
"model.layers.1": {
|
92 |
+
"accuracy": 0.8126821517944336,
|
93 |
+
"total_bits": 30627648,
|
94 |
"q_proj": {
|
95 |
"group_size": {
|
96 |
+
"2": 64
|
97 |
},
|
98 |
"bits": [
|
99 |
+
2
|
100 |
],
|
101 |
"bits_prop": [
|
102 |
1
|
|
|
105 |
},
|
106 |
"k_proj": {
|
107 |
"group_size": {
|
108 |
+
"2": 64
|
109 |
},
|
110 |
"bits": [
|
111 |
+
2
|
112 |
],
|
113 |
"bits_prop": [
|
114 |
1
|
|
|
141 |
},
|
142 |
"up_proj": {
|
143 |
"group_size": {
|
144 |
+
"2": 64
|
145 |
},
|
146 |
"bits": [
|
147 |
+
2
|
148 |
],
|
149 |
"bits_prop": [
|
150 |
1
|
|
|
153 |
},
|
154 |
"gate_proj": {
|
155 |
"group_size": {
|
156 |
+
"2": 64
|
157 |
},
|
158 |
"bits": [
|
159 |
+
2
|
160 |
],
|
161 |
"bits_prop": [
|
162 |
1
|
|
|
165 |
},
|
166 |
"down_proj": {
|
167 |
"group_size": {
|
168 |
+
"2": 64
|
169 |
},
|
170 |
"bits": [
|
171 |
+
2
|
172 |
],
|
173 |
"bits_prop": [
|
174 |
1
|
|
|
177 |
}
|
178 |
},
|
179 |
"model.layers.2": {
|
180 |
+
"accuracy": 0.8082990646362305,
|
181 |
+
"total_bits": 30627648,
|
182 |
"q_proj": {
|
183 |
"group_size": {
|
184 |
+
"2": 64
|
185 |
},
|
186 |
"bits": [
|
187 |
+
2
|
188 |
],
|
189 |
"bits_prop": [
|
190 |
1
|
|
|
193 |
},
|
194 |
"k_proj": {
|
195 |
"group_size": {
|
196 |
+
"2": 64
|
197 |
},
|
198 |
"bits": [
|
199 |
+
2
|
200 |
],
|
201 |
"bits_prop": [
|
202 |
1
|
|
|
229 |
},
|
230 |
"up_proj": {
|
231 |
"group_size": {
|
232 |
+
"2": 64
|
233 |
},
|
234 |
"bits": [
|
235 |
+
2
|
236 |
],
|
237 |
"bits_prop": [
|
238 |
1
|
|
|
241 |
},
|
242 |
"gate_proj": {
|
243 |
"group_size": {
|
244 |
+
"2": 64
|
245 |
},
|
246 |
"bits": [
|
247 |
+
2
|
248 |
],
|
249 |
"bits_prop": [
|
250 |
1
|
|
|
253 |
},
|
254 |
"down_proj": {
|
255 |
"group_size": {
|
256 |
+
"2": 64
|
257 |
},
|
258 |
"bits": [
|
259 |
+
2
|
260 |
],
|
261 |
"bits_prop": [
|
262 |
1
|
|
|
265 |
}
|
266 |
},
|
267 |
"model.layers.3": {
|
268 |
+
"accuracy": 0.8619828224182129,
|
269 |
+
"total_bits": 34239648,
|
270 |
"q_proj": {
|
271 |
"group_size": {
|
272 |
"2": 64
|
|
|
305 |
},
|
306 |
"o_proj": {
|
307 |
"group_size": {
|
308 |
+
"2": 64
|
309 |
},
|
310 |
"bits": [
|
311 |
+
2
|
312 |
],
|
313 |
"bits_prop": [
|
314 |
1
|
|
|
317 |
},
|
318 |
"up_proj": {
|
319 |
"group_size": {
|
320 |
+
"2": 64
|
321 |
},
|
322 |
"bits": [
|
323 |
+
2
|
324 |
],
|
325 |
"bits_prop": [
|
326 |
1
|
|
|
353 |
}
|
354 |
},
|
355 |
"model.layers.4": {
|
356 |
+
"accuracy": 0.8780388832092285,
|
357 |
+
"total_bits": 36303648,
|
358 |
"q_proj": {
|
359 |
"group_size": {
|
360 |
"2": 64
|
|
|
369 |
},
|
370 |
"k_proj": {
|
371 |
"group_size": {
|
372 |
+
"2": 64
|
373 |
},
|
374 |
"bits": [
|
375 |
+
2
|
376 |
],
|
377 |
"bits_prop": [
|
378 |
1
|
|
|
405 |
},
|
406 |
"up_proj": {
|
407 |
"group_size": {
|
408 |
+
"2": 64
|
409 |
},
|
410 |
"bits": [
|
411 |
+
2
|
412 |
],
|
413 |
"bits_prop": [
|
414 |
1
|
|
|
417 |
},
|
418 |
"gate_proj": {
|
419 |
"group_size": {
|
420 |
+
"2": 64
|
421 |
},
|
422 |
"bits": [
|
423 |
+
2
|
424 |
],
|
425 |
"bits_prop": [
|
426 |
1
|
|
|
529 |
}
|
530 |
},
|
531 |
"model.layers.6": {
|
532 |
+
"accuracy": 0.9222159385681152,
|
533 |
+
"total_bits": 28563648,
|
534 |
"q_proj": {
|
535 |
"group_size": {
|
536 |
"2": 64
|
|
|
569 |
},
|
570 |
"o_proj": {
|
571 |
"group_size": {
|
572 |
+
"2": 64
|
573 |
},
|
574 |
"bits": [
|
575 |
+
2
|
576 |
],
|
577 |
"bits_prop": [
|
578 |
1
|
|
|
617 |
}
|
618 |
},
|
619 |
"model.layers.7": {
|
620 |
+
"accuracy": 0.9137954711914062,
|
621 |
+
"total_bits": 28563648,
|
622 |
"q_proj": {
|
623 |
"group_size": {
|
624 |
"2": 64
|
|
|
633 |
},
|
634 |
"k_proj": {
|
635 |
"group_size": {
|
636 |
+
"2": 64
|
637 |
},
|
638 |
"bits": [
|
639 |
+
2
|
640 |
],
|
641 |
"bits_prop": [
|
642 |
1
|
|
|
705 |
}
|
706 |
},
|
707 |
"model.layers.8": {
|
708 |
+
"accuracy": 0.9160647392272949,
|
709 |
+
"total_bits": 28563648,
|
710 |
"q_proj": {
|
711 |
"group_size": {
|
712 |
"2": 64
|
|
|
721 |
},
|
722 |
"k_proj": {
|
723 |
"group_size": {
|
724 |
+
"2": 64
|
725 |
},
|
726 |
"bits": [
|
727 |
+
2
|
728 |
],
|
729 |
"bits_prop": [
|
730 |
1
|
|
|
881 |
}
|
882 |
},
|
883 |
"model.layers.10": {
|
884 |
+
"accuracy": 0.9132890701293945,
|
885 |
+
"total_bits": 28563648,
|
886 |
"q_proj": {
|
887 |
"group_size": {
|
888 |
"2": 64
|
|
|
921 |
},
|
922 |
"o_proj": {
|
923 |
"group_size": {
|
924 |
+
"2": 64
|
925 |
},
|
926 |
"bits": [
|
927 |
+
2
|
928 |
],
|
929 |
"bits_prop": [
|
930 |
1
|
|
|
969 |
}
|
970 |
},
|
971 |
"model.layers.11": {
|
972 |
+
"accuracy": 0.908735990524292,
|
973 |
+
"total_bits": 28563648,
|
974 |
"q_proj": {
|
975 |
"group_size": {
|
976 |
"2": 64
|
|
|
985 |
},
|
986 |
"k_proj": {
|
987 |
"group_size": {
|
988 |
+
"2": 64
|
989 |
},
|
990 |
"bits": [
|
991 |
+
2
|
992 |
],
|
993 |
"bits_prop": [
|
994 |
1
|
|
|
1009 |
},
|
1010 |
"o_proj": {
|
1011 |
"group_size": {
|
1012 |
+
"2": 64
|
1013 |
},
|
1014 |
"bits": [
|
1015 |
+
2
|
1016 |
],
|
1017 |
"bits_prop": [
|
1018 |
1
|
|
|
1045 |
},
|
1046 |
"down_proj": {
|
1047 |
"group_size": {
|
1048 |
+
"2": 64
|
1049 |
},
|
1050 |
"bits": [
|
1051 |
+
2
|
1052 |
],
|
1053 |
"bits_prop": [
|
1054 |
1
|
|
|
1057 |
}
|
1058 |
},
|
1059 |
"model.layers.12": {
|
1060 |
+
"accuracy": 0.9236173629760742,
|
1061 |
+
"total_bits": 30627648,
|
1062 |
"q_proj": {
|
1063 |
"group_size": {
|
1064 |
"2": 64
|
|
|
1133 |
},
|
1134 |
"down_proj": {
|
1135 |
"group_size": {
|
1136 |
+
"2": 64
|
1137 |
},
|
1138 |
"bits": [
|
1139 |
+
2
|
1140 |
],
|
1141 |
"bits_prop": [
|
1142 |
1
|
|
|
1321 |
}
|
1322 |
},
|
1323 |
"model.layers.15": {
|
1324 |
+
"accuracy": 0.9489641189575195,
|
1325 |
+
"total_bits": 41980320,
|
1326 |
"q_proj": {
|
1327 |
"group_size": {
|
1328 |
"2": 64
|
|
|
1373 |
},
|
1374 |
"up_proj": {
|
1375 |
"group_size": {
|
1376 |
+
"4": 128
|
1377 |
},
|
1378 |
"bits": [
|
1379 |
+
4
|
1380 |
],
|
1381 |
"bits_prop": [
|
1382 |
1
|
|
|
1397 |
},
|
1398 |
"down_proj": {
|
1399 |
"group_size": {
|
1400 |
+
"4": 128
|
1401 |
},
|
1402 |
"bits": [
|
1403 |
+
4
|
1404 |
],
|
1405 |
"bits_prop": [
|
1406 |
1
|
|
|
1409 |
}
|
1410 |
},
|
1411 |
"model.layers.16": {
|
1412 |
+
"accuracy": 0.9460692405700684,
|
1413 |
+
"total_bits": 41980320,
|
1414 |
"q_proj": {
|
1415 |
"group_size": {
|
1416 |
"2": 64
|
|
|
1461 |
},
|
1462 |
"up_proj": {
|
1463 |
"group_size": {
|
1464 |
+
"4": 128
|
1465 |
},
|
1466 |
"bits": [
|
1467 |
+
4
|
1468 |
],
|
1469 |
"bits_prop": [
|
1470 |
1
|
|
|
1497 |
}
|
1498 |
},
|
1499 |
"model.layers.17": {
|
1500 |
+
"accuracy": 0.97892165184021,
|
1501 |
+
"total_bits": 51784992,
|
1502 |
"q_proj": {
|
1503 |
"group_size": {
|
1504 |
+
"4": 128
|
1505 |
},
|
1506 |
"bits": [
|
1507 |
+
4
|
1508 |
],
|
1509 |
"bits_prop": [
|
1510 |
1
|
|
|
1513 |
},
|
1514 |
"k_proj": {
|
1515 |
"group_size": {
|
1516 |
+
"4": 128
|
1517 |
},
|
1518 |
"bits": [
|
1519 |
+
4
|
1520 |
],
|
1521 |
"bits_prop": [
|
1522 |
1
|
|
|
1561 |
},
|
1562 |
"gate_proj": {
|
1563 |
"group_size": {
|
1564 |
+
"4": 128
|
1565 |
},
|
1566 |
"bits": [
|
1567 |
+
4
|
1568 |
],
|
1569 |
"bits_prop": [
|
1570 |
1
|
|
|
1585 |
}
|
1586 |
},
|
1587 |
"model.layers.18": {
|
1588 |
+
"accuracy": 0.9773706197738647,
|
1589 |
+
"total_bits": 51784992,
|
1590 |
"q_proj": {
|
1591 |
"group_size": {
|
1592 |
+
"4": 128
|
1593 |
},
|
1594 |
"bits": [
|
1595 |
+
4
|
1596 |
],
|
1597 |
"bits_prop": [
|
1598 |
1
|
|
|
1601 |
},
|
1602 |
"k_proj": {
|
1603 |
"group_size": {
|
1604 |
+
"4": 128
|
1605 |
},
|
1606 |
"bits": [
|
1607 |
+
4
|
1608 |
],
|
1609 |
"bits_prop": [
|
1610 |
1
|
|
|
1649 |
},
|
1650 |
"gate_proj": {
|
1651 |
"group_size": {
|
1652 |
+
"4": 128
|
1653 |
},
|
1654 |
"bits": [
|
1655 |
+
4
|
1656 |
],
|
1657 |
"bits_prop": [
|
1658 |
1
|
|
|
1673 |
}
|
1674 |
},
|
1675 |
"model.layers.19": {
|
1676 |
+
"accuracy": 0.9783408641815186,
|
1677 |
+
"total_bits": 51784992,
|
1678 |
"q_proj": {
|
1679 |
"group_size": {
|
1680 |
+
"4": 128
|
1681 |
},
|
1682 |
"bits": [
|
1683 |
+
4
|
1684 |
],
|
1685 |
"bits_prop": [
|
1686 |
1
|
|
|
1689 |
},
|
1690 |
"k_proj": {
|
1691 |
"group_size": {
|
1692 |
+
"4": 128
|
1693 |
},
|
1694 |
"bits": [
|
1695 |
+
4
|
1696 |
],
|
1697 |
"bits_prop": [
|
1698 |
1
|
|
|
1725 |
},
|
1726 |
"up_proj": {
|
1727 |
"group_size": {
|
1728 |
+
"4": 128
|
1729 |
},
|
1730 |
"bits": [
|
1731 |
+
4
|
1732 |
],
|
1733 |
"bits_prop": [
|
1734 |
1
|
|
|
1737 |
},
|
1738 |
"gate_proj": {
|
1739 |
"group_size": {
|
1740 |
+
"4": 128
|
1741 |
},
|
1742 |
"bits": [
|
1743 |
+
4
|
1744 |
],
|
1745 |
"bits_prop": [
|
1746 |
1
|
|
|
1761 |
}
|
1762 |
},
|
1763 |
"model.layers.20": {
|
1764 |
+
"accuracy": 0.9780358076095581,
|
1765 |
+
"total_bits": 51784992,
|
1766 |
"q_proj": {
|
1767 |
"group_size": {
|
1768 |
+
"4": 128
|
1769 |
},
|
1770 |
"bits": [
|
1771 |
+
4
|
1772 |
],
|
1773 |
"bits_prop": [
|
1774 |
1
|
|
|
1813 |
},
|
1814 |
"up_proj": {
|
1815 |
"group_size": {
|
1816 |
+
"4": 128
|
1817 |
},
|
1818 |
"bits": [
|
1819 |
+
4
|
1820 |
],
|
1821 |
"bits_prop": [
|
1822 |
1
|
|
|
1825 |
},
|
1826 |
"gate_proj": {
|
1827 |
"group_size": {
|
1828 |
+
"4": 128
|
1829 |
},
|
1830 |
"bits": [
|
1831 |
+
4
|
1832 |
],
|
1833 |
"bits_prop": [
|
1834 |
1
|
|
|
1849 |
}
|
1850 |
},
|
1851 |
"model.layers.21": {
|
1852 |
+
"accuracy": 0.9766302108764648,
|
1853 |
+
"total_bits": 51784992,
|
1854 |
"q_proj": {
|
1855 |
"group_size": {
|
1856 |
+
"4": 128
|
1857 |
},
|
1858 |
"bits": [
|
1859 |
+
4
|
1860 |
],
|
1861 |
"bits_prop": [
|
1862 |
1
|
|
|
1901 |
},
|
1902 |
"up_proj": {
|
1903 |
"group_size": {
|
1904 |
+
"4": 128
|
1905 |
},
|
1906 |
"bits": [
|
1907 |
+
4
|
1908 |
],
|
1909 |
"bits_prop": [
|
1910 |
1
|
|
|
1913 |
},
|
1914 |
"gate_proj": {
|
1915 |
"group_size": {
|
1916 |
+
"4": 128
|
1917 |
},
|
1918 |
"bits": [
|
1919 |
+
4
|
1920 |
],
|
1921 |
"bits_prop": [
|
1922 |
1
|
|
|
1937 |
}
|
1938 |
},
|
1939 |
"model.layers.22": {
|
1940 |
+
"accuracy": 0.9679172039031982,
|
1941 |
+
"total_bits": 51784992,
|
1942 |
"q_proj": {
|
1943 |
"group_size": {
|
1944 |
+
"4": 128
|
1945 |
},
|
1946 |
"bits": [
|
1947 |
+
4
|
1948 |
],
|
1949 |
"bits_prop": [
|
1950 |
1
|
|
|
1953 |
},
|
1954 |
"k_proj": {
|
1955 |
"group_size": {
|
1956 |
+
"4": 128
|
1957 |
},
|
1958 |
"bits": [
|
1959 |
+
4
|
1960 |
],
|
1961 |
"bits_prop": [
|
1962 |
1
|
|
|
2001 |
},
|
2002 |
"gate_proj": {
|
2003 |
"group_size": {
|
2004 |
+
"4": 128
|
2005 |
},
|
2006 |
"bits": [
|
2007 |
+
4
|
2008 |
],
|
2009 |
"bits_prop": [
|
2010 |
1
|
|
|
2025 |
}
|
2026 |
},
|
2027 |
"model.layers.23": {
|
2028 |
+
"accuracy": 0.9569640159606934,
|
2029 |
+
"total_bits": 51784992,
|
2030 |
"q_proj": {
|
2031 |
"group_size": {
|
2032 |
+
"4": 128
|
2033 |
},
|
2034 |
"bits": [
|
2035 |
+
4
|
2036 |
],
|
2037 |
"bits_prop": [
|
2038 |
1
|
|
|
2041 |
},
|
2042 |
"k_proj": {
|
2043 |
"group_size": {
|
2044 |
+
"4": 128
|
2045 |
},
|
2046 |
"bits": [
|
2047 |
+
4
|
2048 |
],
|
2049 |
"bits_prop": [
|
2050 |
1
|