Rúben Almeida commited on
Commit
edebf90
·
1 Parent(s): 0735f93

Exception handling for non-supported AWQ quantization not in the correct place

Browse files
main.py CHANGED
@@ -65,14 +65,16 @@ def redirect_to_docs():
65
  ### FastAPI Endpoints
66
  @app.post("/convert_awq", response_model=None)
67
  def convert(request: AWQConvertionRequest)->Union[FileResponse, dict]:
68
- model = AutoAWQForCausalLM.from_pretrained(request.hf_model_name)
69
- tokenizer = AutoTokenizer.from_pretrained(request.hf_tokenizer_name or request.hf_model_name, trust_remote_code=True)
70
 
71
  try:
72
- model.quantize(tokenizer, quant_config=request.quantization_config.model_dump())
73
  except TypeError as e:
74
  raise HTTPException(status_code=400, detail=f"Is this model supported by AWQ Quantization? Check:https://github.com/mit-han-lab/llm-awq?tab=readme-ov-file {e}")
75
 
 
 
 
 
76
  if request.hf_push_repo:
77
  model.save_quantized(request.hf_push_repo)
78
  tokenizer.save_pretrained(request.hf_push_repo)
 
65
  ### FastAPI Endpoints
66
  @app.post("/convert_awq", response_model=None)
67
  def convert(request: AWQConvertionRequest)->Union[FileResponse, dict]:
 
 
68
 
69
  try:
70
+ model = AutoAWQForCausalLM.from_pretrained(request.hf_model_name)
71
  except TypeError as e:
72
  raise HTTPException(status_code=400, detail=f"Is this model supported by AWQ Quantization? Check:https://github.com/mit-han-lab/llm-awq?tab=readme-ov-file {e}")
73
 
74
+ tokenizer = AutoTokenizer.from_pretrained(request.hf_tokenizer_name or request.hf_model_name, trust_remote_code=True)
75
+
76
+ model.quantize(tokenizer, quant_config=request.quantization_config.model_dump())
77
+
78
  if request.hf_push_repo:
79
  model.save_quantized(request.hf_push_repo)
80
  tokenizer.save_pretrained(request.hf_push_repo)
tests/.env.example CHANGED
@@ -1,2 +1,4 @@
1
  ENDPOINT=
2
- HF_TOKEN=
 
 
 
1
  ENDPOINT=
2
+ HF_TOKEN=
3
+ HF_PUSH_REPO=
4
+ HF_ORGANIZATION=
tests/test_awq.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ import requests
3
+ from environs import Env
4
+ from huggingface_hub import login
5
+
6
+ env = Env()
7
+ env.read_env(override=True)
8
+
9
+ def test_incompatible_model():
10
+ with pytest.raises(requests.exceptions.HTTPError):
11
+ response = requests.post(
12
+ f"{env.str('ENDPOINT')}/convert_awq",
13
+ json={
14
+ "hf_model_name": "gpt2",
15
+ "hf_tokenizer_name": "gpt2",
16
+ "hf_push_repo": None,
17
+ }
18
+ )
19
+ assert response.status_code == 400
20
+
21
+
22
+ def test_convert_download():
23
+ response = requests.post(
24
+ f"{env.str('ENDPOINT')}/convert_awq",
25
+ json={
26
+ "hf_model_name": "Qwen/Qwen2.5-14B-Instruct",
27
+ }
28
+ )
29
+
30
+ response.raise_for_status()
31
+
32
+ assert response.content_type == 'application/zip'
33
+
34
+
35
+ def test_convert_push():
36
+ model_name = "Qwen/Qwen2.5-14B-Instruct"
37
+
38
+ response = requests.post(
39
+ f"{env.str('ENDPOINT')}/convert_awq",
40
+ json={
41
+ "hf_model_name": "Qwen/Qwen2.5-14B-Instruct",
42
+ "hf_push_repo": env.str("HF_PUSH_REPO") or f"{env.str('HF_ORGANIZATION')}/{model_name.split('/')[-1]}-AWQ",
43
+ }
44
+ )
45
+
46
+ response.raise_for_status()
tests/test_convertion.py DELETED
@@ -1,31 +0,0 @@
1
- import pytest
2
- import requests
3
- from environs import Env
4
- from huggingface_hub import login
5
-
6
- env = Env()
7
- env.read_env(override=True)
8
-
9
- @pytest.mark.parametrize("model_name", [
10
- "gpt2",
11
- ])
12
- def test_convert_download(model_name):
13
- if env.str("HF_TOKEN"):
14
- login(token=env("HF_TOKEN"))
15
-
16
- response = requests.post(
17
- env.str("ENDPOINT"),
18
- json={
19
- "hf_model_name": model_name,
20
- "hf_tokenizer_name": model_name,
21
- "hf_push_repo": None,
22
- }
23
- )
24
-
25
- response.raise_for_status()
26
-
27
- assert response.content_type == 'application/zip'
28
-
29
-
30
- def test_convert_push():
31
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/test_gguf.py ADDED
File without changes
tests/test_gptq.py ADDED
File without changes