FROM nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04 # Set environment variables ENV PYTHONUNBUFFERED=1 \ DEBIAN_FRONTEND=noninteractive \ CUDA_HOME=/usr/local/cuda \ PATH=/usr/local/cuda/bin:$PATH \ LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH \ NVIDIA_VISIBLE_DEVICES=all \ NVIDIA_DRIVER_CAPABILITIES=compute,utility \ HF_HOME=/app/models \ NUMBA_CACHE_DIR=/tmp/numba_cache # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ python3 \ python3-pip \ python3-dev \ build-essential \ git \ ffmpeg \ libsndfile1 \ curl \ && rm -rf /var/lib/apt/lists/* # Upgrade pip and install build tools RUN python3 -m pip install --upgrade pip setuptools wheel uv WORKDIR /app # Create Numba cache directory RUN mkdir -p /tmp/numba_cache && \ chown nobody:nogroup /tmp/numba_cache && \ chmod 700 /tmp/numba_cache COPY requirements.txt . # Install other requirements RUN python3 -m uv pip install --no-cache-dir -r requirements.txt --prerelease=allow RUN python3 -m uv pip install --no-build-isolation flash-attn COPY . . EXPOSE 8000 CMD ["python3", "server.py"]