FROM python:3.12 RUN useradd -m -u 1000 user USER user ENV PATH="/home/user/.local/bin:$PATH" WORKDIR /app COPY --chown=user ./requirements.txt requirements.txt RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu113 COPY --chown=user . /app EXPOSE 7860 #CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"] CMD [ "python3", "-u", "openai/api_server.py", "--model", "meta-llama/Llama-3.2-3B-Instruct", "--revision", "0cb88a4f764b7a12671c53f0838cd831a0843b95", "--host", "0.0.0.0", "--port", "7860", "--max-num-batched-tokens", "32768", "--max-model-len", "32768", "--dtype", "half", "--enforce-eager", "--gpu-memory-utilization", "0.85" ]