Spaces:

arubenruben
/

Quantization-Attempts

Sleeping

Rúben Almeida commited on Apr 16

Commit

edebf90

1 Parent(s): 0735f93

Exception handling for non-supported AWQ quantization not in the correct place

Files changed (6) hide show

main.py CHANGED Viewed

@@ -65,14 +65,16 @@ def redirect_to_docs():
 ### FastAPI Endpoints
 @app.post("/convert_awq", response_model=None)
 def convert(request: AWQConvertionRequest)->Union[FileResponse, dict]:
-    model = AutoAWQForCausalLM.from_pretrained(request.hf_model_name)
-    tokenizer = AutoTokenizer.from_pretrained(request.hf_tokenizer_name or request.hf_model_name, trust_remote_code=True)
     try:
-        model.quantize(tokenizer, quant_config=request.quantization_config.model_dump())
     except TypeError as e:
         raise HTTPException(status_code=400, detail=f"Is this model supported by AWQ Quantization? Check:https://github.com/mit-han-lab/llm-awq?tab=readme-ov-file {e}")
     if request.hf_push_repo:
         model.save_quantized(request.hf_push_repo)
         tokenizer.save_pretrained(request.hf_push_repo)

 ### FastAPI Endpoints
 @app.post("/convert_awq", response_model=None)
 def convert(request: AWQConvertionRequest)->Union[FileResponse, dict]:
     try:
+        model = AutoAWQForCausalLM.from_pretrained(request.hf_model_name)
     except TypeError as e:
         raise HTTPException(status_code=400, detail=f"Is this model supported by AWQ Quantization? Check:https://github.com/mit-han-lab/llm-awq?tab=readme-ov-file {e}")
+    tokenizer = AutoTokenizer.from_pretrained(request.hf_tokenizer_name or request.hf_model_name, trust_remote_code=True)
+    model.quantize(tokenizer, quant_config=request.quantization_config.model_dump())
     if request.hf_push_repo:
         model.save_quantized(request.hf_push_repo)
         tokenizer.save_pretrained(request.hf_push_repo)

tests/.env.example CHANGED Viewed

@@ -1,2 +1,4 @@
 ENDPOINT=
-HF_TOKEN=

 ENDPOINT=
+HF_TOKEN=
+HF_PUSH_REPO=
+HF_ORGANIZATION=

tests/test_awq.py ADDED Viewed

+import pytest
+import requests
+from environs import Env
+from huggingface_hub import login
+env = Env()
+env.read_env(override=True)
+def test_incompatible_model():
+    with pytest.raises(requests.exceptions.HTTPError):
+        response = requests.post(
+            f"{env.str('ENDPOINT')}/convert_awq",
+            json={
+                "hf_model_name": "gpt2",
+                "hf_tokenizer_name": "gpt2",
+                "hf_push_repo": None,
+            }
+        )
+        assert response.status_code == 400
+def test_convert_download():
+    response = requests.post(
+        f"{env.str('ENDPOINT')}/convert_awq",
+        json={
+            "hf_model_name": "Qwen/Qwen2.5-14B-Instruct",
+        }
+    )
+    response.raise_for_status()
+    assert response.content_type == 'application/zip'
+def test_convert_push():
+    model_name = "Qwen/Qwen2.5-14B-Instruct"
+    response = requests.post(
+        f"{env.str('ENDPOINT')}/convert_awq",
+        json={
+            "hf_model_name": "Qwen/Qwen2.5-14B-Instruct",
+            "hf_push_repo": env.str("HF_PUSH_REPO") or f"{env.str('HF_ORGANIZATION')}/{model_name.split('/')[-1]}-AWQ",
+        }
+    )
+    response.raise_for_status()

tests/test_convertion.py DELETED Viewed

@@ -1,31 +0,0 @@
-import pytest
-import requests
-from environs import Env
-from huggingface_hub import login
-env = Env()
-env.read_env(override=True)
-@pytest.mark.parametrize("model_name", [
-    "gpt2",
-])
-def test_convert_download(model_name):
-    if env.str("HF_TOKEN"):
-        login(token=env("HF_TOKEN"))
-    response = requests.post(
-        env.str("ENDPOINT"),
-        json={
-            "hf_model_name": model_name,
-            "hf_tokenizer_name": model_name,
-            "hf_push_repo": None,
-        }
-    )
-    response.raise_for_status()
-    assert response.content_type == 'application/zip'
-def test_convert_push():
-    pass

tests/test_gguf.py ADDED Viewed

File without changes

tests/test_gptq.py ADDED Viewed

File without changes