TheBloke
/

Saily_220B-GPTQ

Text Generation

text-generation-inference

4-bit precision

Model card Files Files and versions Community

TheBloke commited on Dec 18, 2023

Commit

6801dba

·

1 Parent(s): 4225be4

GPTQ model commit

Files changed (2) hide show

config.json +10 -40
quantize_config.json +6 -6

config.json CHANGED Viewed

@@ -18,45 +18,6 @@
     "num_key_value_heads": 8,
     "pad_token_id": 0,
     "pretraining_tp": 1,
-    "quantization_config": {
-        "batch_size": 1,
-        "bits": 4,
-        "block_name_to_quantize": null,
-        "cache_block_outputs": true,
-        "damp_percent": 0.1,
-        "desc_act": true,
-        "exllama_config": {
-            "version": 1
-        },
-        "group_size": -1,
-        "max_input_length": null,
-        "model_seqlen": null,
-        "module_name_preceding_first_block": null,
-        "modules_in_block_to_quantize": [
-            [
-                "self_attn.k_proj",
-                "self_attn.v_proj",
-                "self_attn.q_proj"
-            ],
-            [
-                "self_attn.o_proj"
-            ],
-            [
-                "mlp.up_proj",
-                "mlp.gate_proj"
-            ],
-            [
-                "mlp.down_proj"
-            ]
-        ],
-        "pad_token_id": null,
-        "quant_method": "gptq",
-        "sym": true,
-        "tokenizer": null,
-        "true_sequential": true,
-        "use_cuda_fp16": false,
-        "use_exllama": true
-    },
     "rms_norm_eps": 1e-05,
     "rope_scaling": null,
     "rope_theta": 10000.0,
@@ -64,5 +25,14 @@
     "torch_dtype": "float16",
     "transformers_version": "4.37.0.dev0",
     "use_cache": true,
-    "vocab_size": 32000
 }

     "num_key_value_heads": 8,
     "pad_token_id": 0,
     "pretraining_tp": 1,
     "rms_norm_eps": 1e-05,
     "rope_scaling": null,
     "rope_theta": 10000.0,
     "torch_dtype": "float16",
     "transformers_version": "4.37.0.dev0",
     "use_cache": true,
+    "vocab_size": 32000,
+    "quantization_config": {
+        "bits": 4,
+        "group_size": -1,
+        "damp_percent": 0.1,
+        "desc_act": true,
+        "sym": true,
+        "true_sequential": true,
+        "quant_method": "gptq"
+    }
 }

quantize_config.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "bits": 4,
-    "group_size": -1,
-    "damp_percent": 0.1,
-    "desc_act": true,
-    "sym": true,
-    "true_sequential": true
 }

 {
+  "bits": 4,
+  "group_size": -1,
+  "damp_percent": 0.1,
+  "desc_act": true,
+  "sym": true,
+  "true_sequential": true
 }