Rúben Almeida
commited on
Commit
·
edebf90
1
Parent(s):
0735f93
Exception handling for non-supported AWQ quantization not in the correct place
Browse files- main.py +5 -3
- tests/.env.example +3 -1
- tests/test_awq.py +46 -0
- tests/test_convertion.py +0 -31
- tests/test_gguf.py +0 -0
- tests/test_gptq.py +0 -0
main.py
CHANGED
@@ -65,14 +65,16 @@ def redirect_to_docs():
|
|
65 |
### FastAPI Endpoints
|
66 |
@app.post("/convert_awq", response_model=None)
|
67 |
def convert(request: AWQConvertionRequest)->Union[FileResponse, dict]:
|
68 |
-
model = AutoAWQForCausalLM.from_pretrained(request.hf_model_name)
|
69 |
-
tokenizer = AutoTokenizer.from_pretrained(request.hf_tokenizer_name or request.hf_model_name, trust_remote_code=True)
|
70 |
|
71 |
try:
|
72 |
-
model.
|
73 |
except TypeError as e:
|
74 |
raise HTTPException(status_code=400, detail=f"Is this model supported by AWQ Quantization? Check:https://github.com/mit-han-lab/llm-awq?tab=readme-ov-file {e}")
|
75 |
|
|
|
|
|
|
|
|
|
76 |
if request.hf_push_repo:
|
77 |
model.save_quantized(request.hf_push_repo)
|
78 |
tokenizer.save_pretrained(request.hf_push_repo)
|
|
|
65 |
### FastAPI Endpoints
|
66 |
@app.post("/convert_awq", response_model=None)
|
67 |
def convert(request: AWQConvertionRequest)->Union[FileResponse, dict]:
|
|
|
|
|
68 |
|
69 |
try:
|
70 |
+
model = AutoAWQForCausalLM.from_pretrained(request.hf_model_name)
|
71 |
except TypeError as e:
|
72 |
raise HTTPException(status_code=400, detail=f"Is this model supported by AWQ Quantization? Check:https://github.com/mit-han-lab/llm-awq?tab=readme-ov-file {e}")
|
73 |
|
74 |
+
tokenizer = AutoTokenizer.from_pretrained(request.hf_tokenizer_name or request.hf_model_name, trust_remote_code=True)
|
75 |
+
|
76 |
+
model.quantize(tokenizer, quant_config=request.quantization_config.model_dump())
|
77 |
+
|
78 |
if request.hf_push_repo:
|
79 |
model.save_quantized(request.hf_push_repo)
|
80 |
tokenizer.save_pretrained(request.hf_push_repo)
|
tests/.env.example
CHANGED
@@ -1,2 +1,4 @@
|
|
1 |
ENDPOINT=
|
2 |
-
HF_TOKEN=
|
|
|
|
|
|
1 |
ENDPOINT=
|
2 |
+
HF_TOKEN=
|
3 |
+
HF_PUSH_REPO=
|
4 |
+
HF_ORGANIZATION=
|
tests/test_awq.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pytest
|
2 |
+
import requests
|
3 |
+
from environs import Env
|
4 |
+
from huggingface_hub import login
|
5 |
+
|
6 |
+
env = Env()
|
7 |
+
env.read_env(override=True)
|
8 |
+
|
9 |
+
def test_incompatible_model():
|
10 |
+
with pytest.raises(requests.exceptions.HTTPError):
|
11 |
+
response = requests.post(
|
12 |
+
f"{env.str('ENDPOINT')}/convert_awq",
|
13 |
+
json={
|
14 |
+
"hf_model_name": "gpt2",
|
15 |
+
"hf_tokenizer_name": "gpt2",
|
16 |
+
"hf_push_repo": None,
|
17 |
+
}
|
18 |
+
)
|
19 |
+
assert response.status_code == 400
|
20 |
+
|
21 |
+
|
22 |
+
def test_convert_download():
|
23 |
+
response = requests.post(
|
24 |
+
f"{env.str('ENDPOINT')}/convert_awq",
|
25 |
+
json={
|
26 |
+
"hf_model_name": "Qwen/Qwen2.5-14B-Instruct",
|
27 |
+
}
|
28 |
+
)
|
29 |
+
|
30 |
+
response.raise_for_status()
|
31 |
+
|
32 |
+
assert response.content_type == 'application/zip'
|
33 |
+
|
34 |
+
|
35 |
+
def test_convert_push():
|
36 |
+
model_name = "Qwen/Qwen2.5-14B-Instruct"
|
37 |
+
|
38 |
+
response = requests.post(
|
39 |
+
f"{env.str('ENDPOINT')}/convert_awq",
|
40 |
+
json={
|
41 |
+
"hf_model_name": "Qwen/Qwen2.5-14B-Instruct",
|
42 |
+
"hf_push_repo": env.str("HF_PUSH_REPO") or f"{env.str('HF_ORGANIZATION')}/{model_name.split('/')[-1]}-AWQ",
|
43 |
+
}
|
44 |
+
)
|
45 |
+
|
46 |
+
response.raise_for_status()
|
tests/test_convertion.py
DELETED
@@ -1,31 +0,0 @@
|
|
1 |
-
import pytest
|
2 |
-
import requests
|
3 |
-
from environs import Env
|
4 |
-
from huggingface_hub import login
|
5 |
-
|
6 |
-
env = Env()
|
7 |
-
env.read_env(override=True)
|
8 |
-
|
9 |
-
@pytest.mark.parametrize("model_name", [
|
10 |
-
"gpt2",
|
11 |
-
])
|
12 |
-
def test_convert_download(model_name):
|
13 |
-
if env.str("HF_TOKEN"):
|
14 |
-
login(token=env("HF_TOKEN"))
|
15 |
-
|
16 |
-
response = requests.post(
|
17 |
-
env.str("ENDPOINT"),
|
18 |
-
json={
|
19 |
-
"hf_model_name": model_name,
|
20 |
-
"hf_tokenizer_name": model_name,
|
21 |
-
"hf_push_repo": None,
|
22 |
-
}
|
23 |
-
)
|
24 |
-
|
25 |
-
response.raise_for_status()
|
26 |
-
|
27 |
-
assert response.content_type == 'application/zip'
|
28 |
-
|
29 |
-
|
30 |
-
def test_convert_push():
|
31 |
-
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_gguf.py
ADDED
File without changes
|
tests/test_gptq.py
ADDED
File without changes
|