Rúben Almeida
commited on
Commit
·
6af49e3
1
Parent(s):
af9aed3
Done? Issue with Return type of the convert route
Browse files
main.py
CHANGED
@@ -3,6 +3,7 @@ from typing import Optional, Union
|
|
3 |
from awq import AutoAWQForCausalLM
|
4 |
from pydantic import BaseModel, Field
|
5 |
from transformers import AutoTokenizer
|
|
|
6 |
from contextlib import asynccontextmanager
|
7 |
from fastapi import FastAPI, HTTPException
|
8 |
from fastapi.responses import RedirectResponse, FileResponse
|
@@ -40,24 +41,35 @@ def redirect_to_docs():
|
|
40 |
def read_root():
|
41 |
return {"status": "ok"}
|
42 |
|
43 |
-
@app.post("/convert")
|
44 |
def convert(request: ConvertRequest)->Union[FileResponse, dict]:
|
45 |
model = AutoAWQForCausalLM.from_pretrained(request.hf_model_name)
|
46 |
tokenizer = AutoTokenizer.from_pretrained(request.hf_tokenizer_name or request.hf_model_name, trust_remote_code=True)
|
47 |
|
48 |
-
model.quantize(tokenizer, quant_config=
|
49 |
|
50 |
if request.hf_push_repo:
|
51 |
-
model.save_quantized(
|
52 |
-
tokenizer.save_pretrained(
|
53 |
|
54 |
return {
|
55 |
"status": "ok",
|
56 |
-
"message": f"Model saved to {
|
57 |
}
|
58 |
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
|
62 |
-
raise HTTPException(status_code=
|
63 |
-
#return FileResponse(file_location, media_type='application/octet-stream',filename=file_name)
|
|
|
3 |
from awq import AutoAWQForCausalLM
|
4 |
from pydantic import BaseModel, Field
|
5 |
from transformers import AutoTokenizer
|
6 |
+
from tempfile import NamedTemporaryFile
|
7 |
from contextlib import asynccontextmanager
|
8 |
from fastapi import FastAPI, HTTPException
|
9 |
from fastapi.responses import RedirectResponse, FileResponse
|
|
|
41 |
def read_root():
|
42 |
return {"status": "ok"}
|
43 |
|
44 |
+
@app.post("/convert", response_model=None)
|
45 |
def convert(request: ConvertRequest)->Union[FileResponse, dict]:
|
46 |
model = AutoAWQForCausalLM.from_pretrained(request.hf_model_name)
|
47 |
tokenizer = AutoTokenizer.from_pretrained(request.hf_tokenizer_name or request.hf_model_name, trust_remote_code=True)
|
48 |
|
49 |
+
model.quantize(tokenizer, quant_config=request.quantization_config.model_dump())
|
50 |
|
51 |
if request.hf_push_repo:
|
52 |
+
model.save_quantized(request.hf_push_repo)
|
53 |
+
tokenizer.save_pretrained(request.hf_push_repo)
|
54 |
|
55 |
return {
|
56 |
"status": "ok",
|
57 |
+
"message": f"Model saved to {request.hf_push_repo}",
|
58 |
}
|
59 |
|
60 |
+
# Return a zip file with the converted model
|
61 |
+
with NamedTemporaryFile(suffix=".zip", delete=False) as temp_zip:
|
62 |
+
zip_file_path = temp_zip.name
|
63 |
+
with zipfile.ZipFile(zip_file_path, 'w') as zipf:
|
64 |
+
# Save the model and tokenizer files to the zip
|
65 |
+
model.save_quantized(zipf)
|
66 |
+
tokenizer.save_pretrained(zipf)
|
67 |
+
|
68 |
+
return FileResponse(
|
69 |
+
zip_file_path,
|
70 |
+
media_type='application/zip',
|
71 |
+
filename=f"{request.hf_model_name}.zip"
|
72 |
+
)
|
73 |
|
74 |
|
75 |
+
raise HTTPException(status_code=500, detail="Failed to convert model")
|
|