Rúben Almeida commited on
Commit
4d163d0
·
1 Parent(s): 1a1e448

Add GPU support

Browse files
Files changed (4) hide show
  1. .dockerignore +4 -0
  2. Dockerfile +1 -1
  3. main.py +10 -2
  4. requirements.txt +1 -1
.dockerignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ **.venv
2
+ **.pytest_cache
3
+ **__pycache__
4
+ **.env
Dockerfile CHANGED
@@ -18,7 +18,7 @@ RUN pip install --upgrade pip
18
  RUN pip install -U setuptools wheel
19
 
20
  # Install torch cpu version
21
- RUN pip install -U torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
22
 
23
  # Copy the requirements file into the container
24
  COPY --chown=user requirements.txt .
 
18
  RUN pip install -U setuptools wheel
19
 
20
  # Install torch cpu version
21
+ RUN pip install -U torch torchvision torchaudio
22
 
23
  # Copy the requirements file into the container
24
  COPY --chown=user requirements.txt .
main.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import zipfile
2
  from typing import Union
3
  from awq import AutoAWQForCausalLM
@@ -6,13 +7,20 @@ from tempfile import NamedTemporaryFile
6
  from contextlib import asynccontextmanager
7
  from fastapi import FastAPI, HTTPException
8
  from fastapi.responses import RedirectResponse, FileResponse
9
- from dto import AWQConvertionRequest, GGUFConvertionRequest, GPTQConvertionRequest
10
 
11
  ### FastAPI Initialization
12
  @asynccontextmanager
13
  async def lifespan(app:FastAPI):
 
 
 
 
 
14
  yield
15
 
 
 
16
  app = FastAPI(title="Huggingface Safetensor Model Converter to AWQ", version="0.1.0", lifespan=lifespan)
17
  ### -------
18
 
@@ -26,7 +34,7 @@ def redirect_to_docs():
26
  def convert(request: AWQConvertionRequest)->Union[FileResponse, dict]:
27
 
28
  try:
29
- model = AutoAWQForCausalLM.from_pretrained(request.hf_model_name, device_map="cpu", trust_remote_code=True)
30
  except TypeError as e:
31
  raise HTTPException(status_code=400, detail=f"Is this model supported by AWQ Quantization? Check:https://github.com/mit-han-lab/llm-awq?tab=readme-ov-file {e}")
32
 
 
1
+ import torch
2
  import zipfile
3
  from typing import Union
4
  from awq import AutoAWQForCausalLM
 
7
  from contextlib import asynccontextmanager
8
  from fastapi import FastAPI, HTTPException
9
  from fastapi.responses import RedirectResponse, FileResponse
10
+ from .dto import AWQConvertionRequest, GGUFConvertionRequest, GPTQConvertionRequest
11
 
12
  ### FastAPI Initialization
13
  @asynccontextmanager
14
  async def lifespan(app:FastAPI):
15
+ torch.cuda.empty_cache()
16
+
17
+ print("Starting FastAPI server...")
18
+ print(f"Running on {"cuda" if torch.cuda.is_available() else "cpu"}")
19
+
20
  yield
21
 
22
+ torch.cuda.empty_cache()
23
+
24
  app = FastAPI(title="Huggingface Safetensor Model Converter to AWQ", version="0.1.0", lifespan=lifespan)
25
  ### -------
26
 
 
34
  def convert(request: AWQConvertionRequest)->Union[FileResponse, dict]:
35
 
36
  try:
37
+ model = AutoAWQForCausalLM.from_pretrained(request.hf_model_name, trust_remote_code=True)
38
  except TypeError as e:
39
  raise HTTPException(status_code=400, detail=f"Is this model supported by AWQ Quantization? Check:https://github.com/mit-han-lab/llm-awq?tab=readme-ov-file {e}")
40
 
requirements.txt CHANGED
@@ -8,7 +8,7 @@ pydantic
8
  fastapi[standard]
9
  transformers
10
  huggingface_hub[hf_xet]
11
- autoawq[cpu]>=0.2.8
12
  pytest
13
  requests
14
  environs
 
8
  fastapi[standard]
9
  transformers
10
  huggingface_hub[hf_xet]
11
+ autoawq>=0.2.8
12
  pytest
13
  requests
14
  environs