Rúben Almeida commited on
Commit
1a1e448
·
1 Parent(s): 59098b0

Revert to use CPU resources in the quantization process

Browse files
Files changed (4) hide show
  1. .vscode/launch.json +24 -0
  2. Dockerfile +2 -2
  3. main.py +2 -2
  4. requirements.txt +1 -1
.vscode/launch.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ // Use IntelliSense to learn about possible attributes.
3
+ // Hover to view descriptions of existing attributes.
4
+ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5
+ "version": "0.2.0",
6
+ "configurations": [
7
+ {
8
+ "name": "Python Debugger: FastAPI",
9
+ "type": "debugpy",
10
+ "request": "launch",
11
+ "module": "uvicorn",
12
+ "args": [
13
+ "main:app",
14
+ "--reload",
15
+ "--port",
16
+ "5000",
17
+ "--host",
18
+ "0.0.0.0"
19
+ ],
20
+ "jinja": true,
21
+ "cwd": "${workspaceFolder}",
22
+ }
23
+ ]
24
+ }
Dockerfile CHANGED
@@ -17,8 +17,8 @@ RUN pip install --upgrade pip
17
 
18
  RUN pip install -U setuptools wheel
19
 
20
- # Install torch
21
- RUN pip install -U torch torchvision torchaudio
22
 
23
  # Copy the requirements file into the container
24
  COPY --chown=user requirements.txt .
 
17
 
18
  RUN pip install -U setuptools wheel
19
 
20
+ # Install torch cpu version
21
+ RUN pip install -U torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
22
 
23
  # Copy the requirements file into the container
24
  COPY --chown=user requirements.txt .
main.py CHANGED
@@ -6,7 +6,7 @@ from tempfile import NamedTemporaryFile
6
  from contextlib import asynccontextmanager
7
  from fastapi import FastAPI, HTTPException
8
  from fastapi.responses import RedirectResponse, FileResponse
9
- from .dto import AWQConvertionRequest, GGUFConvertionRequest, GPTQConvertionRequest
10
 
11
  ### FastAPI Initialization
12
  @asynccontextmanager
@@ -26,7 +26,7 @@ def redirect_to_docs():
26
  def convert(request: AWQConvertionRequest)->Union[FileResponse, dict]:
27
 
28
  try:
29
- model = AutoAWQForCausalLM.from_pretrained(request.hf_model_name)
30
  except TypeError as e:
31
  raise HTTPException(status_code=400, detail=f"Is this model supported by AWQ Quantization? Check:https://github.com/mit-han-lab/llm-awq?tab=readme-ov-file {e}")
32
 
 
6
  from contextlib import asynccontextmanager
7
  from fastapi import FastAPI, HTTPException
8
  from fastapi.responses import RedirectResponse, FileResponse
9
+ from dto import AWQConvertionRequest, GGUFConvertionRequest, GPTQConvertionRequest
10
 
11
  ### FastAPI Initialization
12
  @asynccontextmanager
 
26
  def convert(request: AWQConvertionRequest)->Union[FileResponse, dict]:
27
 
28
  try:
29
+ model = AutoAWQForCausalLM.from_pretrained(request.hf_model_name, device_map="cpu", trust_remote_code=True)
30
  except TypeError as e:
31
  raise HTTPException(status_code=400, detail=f"Is this model supported by AWQ Quantization? Check:https://github.com/mit-han-lab/llm-awq?tab=readme-ov-file {e}")
32
 
requirements.txt CHANGED
@@ -8,7 +8,7 @@ pydantic
8
  fastapi[standard]
9
  transformers
10
  huggingface_hub[hf_xet]
11
- autoawq>=0.2.8
12
  pytest
13
  requests
14
  environs
 
8
  fastapi[standard]
9
  transformers
10
  huggingface_hub[hf_xet]
11
+ autoawq[cpu]>=0.2.8
12
  pytest
13
  requests
14
  environs