align with gptq format

Signed-off-by: wenhuach <[email protected]>

Files changed (2) hide show

config.json CHANGED Viewed

@@ -40,7 +40,6 @@
   "quantization_config": {
     "amp": true,
     "autoround_version": "0.4.5",
-    "backend": "auto_round:gptq:exllamav2",
     "batch_size": 4,
     "bits": 4,
     "data_type": "int",
@@ -55,6 +54,9 @@
     "minmax_lr": 0.005,
     "nsamples": 512,
     "quant_method": "gptq",
     "scale_dtype": "torch.float16",
     "seqlen": 2048,
     "sym": true,

   "quantization_config": {
     "amp": true,
     "autoround_version": "0.4.5",
     "batch_size": 4,
     "bits": 4,
     "data_type": "int",
     "minmax_lr": 0.005,
     "nsamples": 512,
     "quant_method": "gptq",
+    "desc_act": false,
+    "true_sequential": false,
+    "damp_percent": 0.01,
     "scale_dtype": "torch.float16",
     "seqlen": 2048,
     "sym": true,

quantization_config.json CHANGED Viewed

@@ -19,6 +19,8 @@
   "enable_norm_bias_tuning": false,
   "dataset": "NeelNanda/pile-10k",
   "autoround_version": "0.4.5",
-  "quant_method": "intel/auto-round",
-  "backend": "auto_round:gptq:exllamav2"
-}

   "enable_norm_bias_tuning": false,
   "dataset": "NeelNanda/pile-10k",
   "autoround_version": "0.4.5",
+  "quant_method": "gptq",
+  "desc_act": false,
+  "true_sequential": false,
+  "damp_percent": 0.01
+}