jerryzh168 commited on
Commit
3795233
β€’
1 Parent(s): 51a4daf

Add torchao int4 weight only quantization as an option

Browse files

Summary:
This is follow up of https://github.com/huggingface/optimum-benchmark/pull/297/
to make torchao available as an option on leader board.

We want to add torchao.autoquant as an option in the end, after we integrate it
to TorchAoConfig later.

Test Plan:
leaderboard?

Reviewers:

Subscribers:

Tasks:

Tags:

Files changed (3) hide show
  1. hardware.yaml +2 -1
  2. src/panel.py +1 -1
  3. src/utils.py +5 -0
hardware.yaml CHANGED
@@ -19,6 +19,7 @@
19
  - awq
20
  - bnb
21
  - gptq
 
22
  backends:
23
  - pytorch
24
 
@@ -45,4 +46,4 @@
45
  backends:
46
  - pytorch
47
  - openvino
48
- - onnxruntime
 
19
  - awq
20
  - bnb
21
  - gptq
22
+ - torchao
23
  backends:
24
  - pytorch
25
 
 
46
  backends:
47
  - pytorch
48
  - openvino
49
+ - onnxruntime
src/panel.py CHANGED
@@ -26,7 +26,7 @@ def create_control_panel(
26
  if hardware_provider == "nvidia":
27
  backends = ["pytorch"]
28
  attention_implementations = ["Eager", "SDPA", "FAv2"]
29
- quantizations = ["Unquantized", "BnB.4bit", "BnB.8bit", "AWQ.4bit", "GPTQ.4bit"]
30
  kernels = [
31
  "No Kernel",
32
  "GPTQ.ExllamaV1",
 
26
  if hardware_provider == "nvidia":
27
  backends = ["pytorch"]
28
  attention_implementations = ["Eager", "SDPA", "FAv2"]
29
+ quantizations = ["Unquantized", "BnB.4bit", "BnB.8bit", "AWQ.4bit", "GPTQ.4bit", "torchao.4bit"]
30
  kernels = [
31
  "No Kernel",
32
  "GPTQ.ExllamaV1",
src/utils.py CHANGED
@@ -70,6 +70,11 @@ def process_quantizations(x):
70
  and x["config.backend.quantization_config.bits"] == 4
71
  ):
72
  return "AWQ.4bit"
 
 
 
 
 
73
  else:
74
  return "Unquantized"
75
 
 
70
  and x["config.backend.quantization_config.bits"] == 4
71
  ):
72
  return "AWQ.4bit"
73
+ elif (
74
+ x["config.backend.quantization_scheme"] == "torchao"
75
+ and x["config.backend.quantization_config.quant_type"] == "int4_weight_only"
76
+ ):
77
+ return "torchao.4bit"
78
  else:
79
  return "Unquantized"
80