File size: 792 Bytes
ae7cfbb
 
 
 
 
 
 
 
 
4998ce7
ae7cfbb
 
6d19ece
 
147b3a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
FROM python:3.12

RUN useradd -m -u 1000 user
USER user
ENV PATH="/home/user/.local/bin:$PATH"

WORKDIR /app

COPY --chown=user ./requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu113

COPY --chown=user . /app

EXPOSE 7860

#CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

CMD [
    "python3",
    "-u",
    "openai/api_server.py",
    "--model",
    "meta-llama/Llama-3.2-3B-Instruct",
    "--revision",
    "0cb88a4f764b7a12671c53f0838cd831a0843b95",
    "--host",
    "0.0.0.0",
    "--port",
    "7860",
    "--max-num-batched-tokens",
    "32768",
    "--max-model-len",
    "32768",
    "--dtype",
    "half",
    "--enforce-eager",
    "--gpu-memory-utilization",
    "0.85"
]