Spaces:
Running
Running
jerryzh168
commited on
Commit
β’
3795233
1
Parent(s):
51a4daf
Add torchao int4 weight only quantization as an option
Browse filesSummary:
This is follow up of https://github.com/huggingface/optimum-benchmark/pull/297/
to make torchao available as an option on leader board.
We want to add torchao.autoquant as an option in the end, after we integrate it
to TorchAoConfig later.
Test Plan:
leaderboard?
Reviewers:
Subscribers:
Tasks:
Tags:
- hardware.yaml +2 -1
- src/panel.py +1 -1
- src/utils.py +5 -0
hardware.yaml
CHANGED
@@ -19,6 +19,7 @@
|
|
19 |
- awq
|
20 |
- bnb
|
21 |
- gptq
|
|
|
22 |
backends:
|
23 |
- pytorch
|
24 |
|
@@ -45,4 +46,4 @@
|
|
45 |
backends:
|
46 |
- pytorch
|
47 |
- openvino
|
48 |
-
- onnxruntime
|
|
|
19 |
- awq
|
20 |
- bnb
|
21 |
- gptq
|
22 |
+
- torchao
|
23 |
backends:
|
24 |
- pytorch
|
25 |
|
|
|
46 |
backends:
|
47 |
- pytorch
|
48 |
- openvino
|
49 |
+
- onnxruntime
|
src/panel.py
CHANGED
@@ -26,7 +26,7 @@ def create_control_panel(
|
|
26 |
if hardware_provider == "nvidia":
|
27 |
backends = ["pytorch"]
|
28 |
attention_implementations = ["Eager", "SDPA", "FAv2"]
|
29 |
-
quantizations = ["Unquantized", "BnB.4bit", "BnB.8bit", "AWQ.4bit", "GPTQ.4bit"]
|
30 |
kernels = [
|
31 |
"No Kernel",
|
32 |
"GPTQ.ExllamaV1",
|
|
|
26 |
if hardware_provider == "nvidia":
|
27 |
backends = ["pytorch"]
|
28 |
attention_implementations = ["Eager", "SDPA", "FAv2"]
|
29 |
+
quantizations = ["Unquantized", "BnB.4bit", "BnB.8bit", "AWQ.4bit", "GPTQ.4bit", "torchao.4bit"]
|
30 |
kernels = [
|
31 |
"No Kernel",
|
32 |
"GPTQ.ExllamaV1",
|
src/utils.py
CHANGED
@@ -70,6 +70,11 @@ def process_quantizations(x):
|
|
70 |
and x["config.backend.quantization_config.bits"] == 4
|
71 |
):
|
72 |
return "AWQ.4bit"
|
|
|
|
|
|
|
|
|
|
|
73 |
else:
|
74 |
return "Unquantized"
|
75 |
|
|
|
70 |
and x["config.backend.quantization_config.bits"] == 4
|
71 |
):
|
72 |
return "AWQ.4bit"
|
73 |
+
elif (
|
74 |
+
x["config.backend.quantization_scheme"] == "torchao"
|
75 |
+
and x["config.backend.quantization_config.quant_type"] == "int4_weight_only"
|
76 |
+
):
|
77 |
+
return "torchao.4bit"
|
78 |
else:
|
79 |
return "Unquantized"
|
80 |
|