Spaces:
Running
Running
# This Dockerfile is used to build a Docker image for the CrawlGPT project using Streamlit as the front-end | |
# Specifically for Hugging Face Spaces | |
# Modified Dockerfile with database support | |
FROM python:3.12-slim | |
# Set working directory | |
WORKDIR /app | |
# Install system dependencies including SQLite and Chrome/Playwright dependencies | |
RUN apt-get update && apt-get install -y \ | |
build-essential \ | |
curl \ | |
software-properties-common \ | |
sudo \ | |
git \ | |
libsqlite3-dev \ | |
sqlite3 \ | |
&& rm -rf /var/lib/apt/lists/* | |
# Create a non-root user and set permissions | |
RUN useradd -m -s /bin/bash appuser && \ | |
echo "appuser ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers | |
# Set ownership for database storage | |
RUN mkdir -p /app/data && chown -R appuser:appuser /app/data | |
RUN mkdir -p /app/.crawl4ai && chown -R appuser:appuser /app/.crawl4ai | |
RUN mkdir -p /app/exports && chown -R appuser:appuser /app/exports | |
# Copy project files and set ownership | |
COPY pyproject.toml setup_env.py ./ | |
COPY src/ ./src/ | |
COPY tests/ ./tests/ | |
RUN chown -R appuser:appuser /app # Ensure appuser owns all files | |
# Define a default database path | |
ENV DATABASE_PATH="/app/data/database.sqlite" | |
# Accept the secret token as a build argument | |
ARG GROQ_API_KEY | |
ARG OLLAMA_API_TOKEN | |
# Docs: https://huggingface.co/docs/hub/en/spaces-sdks-docker#secrets-and-variables-management | |
# Expose the secret GROQ_API_KEY and OLLAMA_API_TOKEN at build time and set them as environment variables | |
RUN --mount=type=secret,id=GROQ_API_KEY,mode=0444,required=true \ | |
export GROQ_API_KEY=$(cat /run/secrets/GROQ_API_KEY) && \ | |
echo "GROQ_API_KEY is set." | |
RUN --mount=type=secret,id=OLLAMA_API_TOKEN,mode=0444,required=true \ | |
export OLLAMA_API_TOKEN=$(cat /run/secrets/OLLAMA_API_TOKEN) && \ | |
echo "OLLAMA_API_TOKEN is set." | |
# Set environment variables using the build arguments | |
ENV OLLAMA_API_TOKEN=${OLLAMA_API_TOKEN} | |
ENV GROQ_API_KEY=${GROQ_API_KEY} | |
# Install Python dependencies | |
RUN pip install --no-cache-dir -e . | |
RUN pip install --no-cache-dir pytest pytest-mockito black isort flake8 | |
# Set environment variables | |
ENV PYTHONPATH=/app | |
ENV PATH="/app/src:${PATH}" | |
# Switch to non-root user | |
USER appuser | |
# Initialize database file | |
RUN touch ${DATABASE_PATH} && chmod 644 ${DATABASE_PATH} | |
# Allow appuser to install Python packages locally (user-level installations) | |
ENV PATH="/home/appuser/.local/bin:${PATH}" | |
RUN mkdir -p /home/appuser/.local && chown -R appuser:appuser /home/appuser | |
# Install Playwright and dependencies | |
RUN playwright install | |
RUN playwright install-deps | |
# Expose Streamlit port | |
EXPOSE 7860 | |
# Set default command to run the Streamlit app | |
CMD ["python", "-m", "streamlit", "run", "src/crawlgpt/ui/chat_app.py", "--server.port=7860", "--server.address=0.0.0.0"] |