Upload new GPTQs with varied parameters
Browse files
README.md
CHANGED
@@ -1,6 +1,13 @@
|
|
1 |
---
|
|
|
|
|
2 |
inference: false
|
|
|
|
|
|
|
3 |
license: other
|
|
|
|
|
4 |
---
|
5 |
|
6 |
<!-- header start -->
|
@@ -44,7 +51,6 @@ You are an AI assistant that follows instruction extremely well. Help as much as
|
|
44 |
{input}
|
45 |
|
46 |
### Response:
|
47 |
-
|
48 |
```
|
49 |
|
50 |
## Provided files
|
@@ -57,8 +63,8 @@ Each separate quant is in a different branch. See below for instructions on fet
|
|
57 |
| ------ | ---- | ---------- | -------------------- | --------- | ------------------- | --------- | ----------- |
|
58 |
| main | 4 | 128 | False | 7.45 GB | True | GPTQ-for-LLaMa | Most compatible option. Good inference speed in AutoGPTQ and GPTQ-for-LLaMa. Lower inference quality than other options. |
|
59 |
| gptq-4bit-32g-actorder_True | 4 | 32 | True | 8.00 GB | True | AutoGPTQ | 4-bit, with Act Order and group size. 32g gives highest possible inference quality, with maximum VRAM usage. Poor AutoGPTQ CUDA speed. |
|
60 |
-
| gptq-4bit-64g-actorder_True | 4 | 64 | True | 7.51 GB | True | AutoGPTQ | 4-bit, with Act Order and group size. 64g uses less VRAM, but with slightly lower accuracy. Poor AutoGPTQ CUDA speed. |
|
61 |
-
| gptq-4bit-128g-actorder_True | 4 | 128 | True | 7.26 GB | True | AutoGPTQ | 4-bit, with Act Order
|
62 |
| gptq-8bit--1g-actorder_True | 8 | None | True | 13.36 GB | False | AutoGPTQ | 8-bit, with Act Order. No group size, to lower VRAM requirements and to improve AutoGPTQ speed. |
|
63 |
| gptq-8bit-128g-actorder_False | 8 | 128 | False | 13.65 GB | False | AutoGPTQ | 8-bit, with group size 128g for higher inference quality and without Act Order to improve AutoGPTQ speed. |
|
64 |
|
@@ -140,7 +146,6 @@ You are an AI assistant that follows instruction extremely well. Help as much as
|
|
140 |
{input}
|
141 |
|
142 |
### Response:
|
143 |
-
|
144 |
'''
|
145 |
|
146 |
print("\n\n*** Generate:")
|
@@ -400,6 +405,16 @@ If you found wizardlm_alpaca_dolly_orca_open_llama_7b useful in your research or
|
|
400 |
}
|
401 |
```
|
402 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
403 |
@software{touvron2023llama,
|
404 |
title={LLaMA: Open and Efficient Foundation Language Models},
|
405 |
author={Touvron, Hugo and Lavril, Thibaut and Izacard, Gautier and Martinet, Xavier and Lachaux, Marie-Anne and Lacroix, Timoth{\'e}e and Rozi{\`e}re, Baptiste and Goyal, Naman and Hambro, Eric and Azhar, Faisal and Rodriguez, Aurelien and Joulin, Armand and Grave, Edouard and Lample, Guillaume},
|
|
|
1 |
---
|
2 |
+
datasets:
|
3 |
+
- psmathur/orca_minis_uncensored_dataset
|
4 |
inference: false
|
5 |
+
language:
|
6 |
+
- en
|
7 |
+
library_name: transformers
|
8 |
license: other
|
9 |
+
model_type: llama
|
10 |
+
pipeline_tag: text-generation
|
11 |
---
|
12 |
|
13 |
<!-- header start -->
|
|
|
51 |
{input}
|
52 |
|
53 |
### Response:
|
|
|
54 |
```
|
55 |
|
56 |
## Provided files
|
|
|
63 |
| ------ | ---- | ---------- | -------------------- | --------- | ------------------- | --------- | ----------- |
|
64 |
| main | 4 | 128 | False | 7.45 GB | True | GPTQ-for-LLaMa | Most compatible option. Good inference speed in AutoGPTQ and GPTQ-for-LLaMa. Lower inference quality than other options. |
|
65 |
| gptq-4bit-32g-actorder_True | 4 | 32 | True | 8.00 GB | True | AutoGPTQ | 4-bit, with Act Order and group size. 32g gives highest possible inference quality, with maximum VRAM usage. Poor AutoGPTQ CUDA speed. |
|
66 |
+
| gptq-4bit-64g-actorder_True | 4 | 64 | True | 7.51 GB | True | AutoGPTQ | 4-bit, with Act Order and group size. 64g uses less VRAM than 32g, but with slightly lower accuracy. Poor AutoGPTQ CUDA speed. |
|
67 |
+
| gptq-4bit-128g-actorder_True | 4 | 128 | True | 7.26 GB | True | AutoGPTQ | 4-bit, with Act Order and group size. 128g uses even less VRAM, but with slightly lower accuracy. Poor AutoGPTQ CUDA speed. |
|
68 |
| gptq-8bit--1g-actorder_True | 8 | None | True | 13.36 GB | False | AutoGPTQ | 8-bit, with Act Order. No group size, to lower VRAM requirements and to improve AutoGPTQ speed. |
|
69 |
| gptq-8bit-128g-actorder_False | 8 | 128 | False | 13.65 GB | False | AutoGPTQ | 8-bit, with group size 128g for higher inference quality and without Act Order to improve AutoGPTQ speed. |
|
70 |
|
|
|
146 |
{input}
|
147 |
|
148 |
### Response:
|
|
|
149 |
'''
|
150 |
|
151 |
print("\n\n*** Generate:")
|
|
|
405 |
}
|
406 |
```
|
407 |
```
|
408 |
+
@misc{mukherjee2023orca,
|
409 |
+
title={Orca: Progressive Learning from Complex Explanation Traces of GPT-4},
|
410 |
+
author={Subhabrata Mukherjee and Arindam Mitra and Ganesh Jawahar and Sahaj Agarwal and Hamid Palangi and Ahmed Awadallah},
|
411 |
+
year={2023},
|
412 |
+
eprint={2306.02707},
|
413 |
+
archivePrefix={arXiv},
|
414 |
+
primaryClass={cs.CL}
|
415 |
+
}
|
416 |
+
```
|
417 |
+
```
|
418 |
@software{touvron2023llama,
|
419 |
title={LLaMA: Open and Efficient Foundation Language Models},
|
420 |
author={Touvron, Hugo and Lavril, Thibaut and Izacard, Gautier and Martinet, Xavier and Lachaux, Marie-Anne and Lacroix, Timoth{\'e}e and Rozi{\`e}re, Baptiste and Goyal, Naman and Hambro, Eric and Azhar, Faisal and Rodriguez, Aurelien and Joulin, Armand and Grave, Edouard and Lample, Guillaume},
|