# Use an official Python runtime as a parent image | |
FROM python:3.12 | |
# Set the working directory in the container | |
WORKDIR /usr/src/app | |
# Copy the current directory contents into the container at /usr/src/app | |
COPY . . | |
# Install system dependencies | |
RUN apt-get update && apt-get install -y \ | |
ffmpeg \ | |
build-essential \ | |
cmake \ | |
git \ | |
&& rm -rf /var/lib/apt/lists/* | |
# Install Python dependencies | |
RUN pip install --no-cache-dir -r requirements.txt | |
# Install Flash Attention 2 dependencies | |
RUN pip install packaging ninja | |
RUN pip install flash-attn --no-build-isolation | |
# Install additional CUDA dependencies for Flash Attention 2 | |
RUN pip install triton | |
# Expose port 7860 for Gradio | |
EXPOSE 7860 | |
# Define environment variable to avoid Python buffering | |
ENV PYTHONUNBUFFERED=1 | |
# Set the entry point for the container to run any script | |
ENTRYPOINT ["python", "app.py"] | |