Rúben Almeida commited on
Commit
6af49e3
·
1 Parent(s): af9aed3

Done? Issue with Return type of the convert route

Browse files
Files changed (1) hide show
  1. main.py +20 -8
main.py CHANGED
@@ -3,6 +3,7 @@ from typing import Optional, Union
3
  from awq import AutoAWQForCausalLM
4
  from pydantic import BaseModel, Field
5
  from transformers import AutoTokenizer
 
6
  from contextlib import asynccontextmanager
7
  from fastapi import FastAPI, HTTPException
8
  from fastapi.responses import RedirectResponse, FileResponse
@@ -40,24 +41,35 @@ def redirect_to_docs():
40
  def read_root():
41
  return {"status": "ok"}
42
 
43
- @app.post("/convert")
44
  def convert(request: ConvertRequest)->Union[FileResponse, dict]:
45
  model = AutoAWQForCausalLM.from_pretrained(request.hf_model_name)
46
  tokenizer = AutoTokenizer.from_pretrained(request.hf_tokenizer_name or request.hf_model_name, trust_remote_code=True)
47
 
48
- model.quantize(tokenizer, quant_config=quant_config)
49
 
50
  if request.hf_push_repo:
51
- model.save_quantized(quant_path)
52
- tokenizer.save_pretrained(quant_path)
53
 
54
  return {
55
  "status": "ok",
56
- "message": f"Model saved to {quant_path}"
57
  }
58
 
59
- # Return a zip file with the converted model
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
 
62
- raise HTTPException(status_code=501, detail="Not Implemented yet")
63
- #return FileResponse(file_location, media_type='application/octet-stream',filename=file_name)
 
3
  from awq import AutoAWQForCausalLM
4
  from pydantic import BaseModel, Field
5
  from transformers import AutoTokenizer
6
+ from tempfile import NamedTemporaryFile
7
  from contextlib import asynccontextmanager
8
  from fastapi import FastAPI, HTTPException
9
  from fastapi.responses import RedirectResponse, FileResponse
 
41
  def read_root():
42
  return {"status": "ok"}
43
 
44
+ @app.post("/convert", response_model=None)
45
  def convert(request: ConvertRequest)->Union[FileResponse, dict]:
46
  model = AutoAWQForCausalLM.from_pretrained(request.hf_model_name)
47
  tokenizer = AutoTokenizer.from_pretrained(request.hf_tokenizer_name or request.hf_model_name, trust_remote_code=True)
48
 
49
+ model.quantize(tokenizer, quant_config=request.quantization_config.model_dump())
50
 
51
  if request.hf_push_repo:
52
+ model.save_quantized(request.hf_push_repo)
53
+ tokenizer.save_pretrained(request.hf_push_repo)
54
 
55
  return {
56
  "status": "ok",
57
+ "message": f"Model saved to {request.hf_push_repo}",
58
  }
59
 
60
+ # Return a zip file with the converted model
61
+ with NamedTemporaryFile(suffix=".zip", delete=False) as temp_zip:
62
+ zip_file_path = temp_zip.name
63
+ with zipfile.ZipFile(zip_file_path, 'w') as zipf:
64
+ # Save the model and tokenizer files to the zip
65
+ model.save_quantized(zipf)
66
+ tokenizer.save_pretrained(zipf)
67
+
68
+ return FileResponse(
69
+ zip_file_path,
70
+ media_type='application/zip',
71
+ filename=f"{request.hf_model_name}.zip"
72
+ )
73
 
74
 
75
+ raise HTTPException(status_code=500, detail="Failed to convert model")