# Use the official Python 3.12 image as a base FROM python:3.12-slim # Set environment variables to prevent Python from writing .pyc files and buffering stdout/stderr ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONUNBUFFERED=1 # Install system dependencies RUN apt-get update && apt-get install -y \ build-essential \ git \ wget \ curl \ unzip \ && rm -rf /var/lib/apt/lists/* # Create a working directory WORKDIR /app # Create cache directory with proper permissions RUN mkdir -p /app/cache && chmod -R 777 /app/cache RUN mkdir -p /app/output && chmod -R 777 /app/output # Set the cache environment variable ENV HF_HOME=/app/cache # Install Python dependencies RUN git clone https://github.com/facebookresearch/MobileLLM RUN pip install --upgrade pip RUN pip install -r requirement.txt # Copy the current directory contents into the container at /app COPY . /app # Unzip the data.zip file into the /app/data directory RUN unzip data.zip -d /app/data # data pre-process and specify the data path in pretrain.sh # run pretraining RUN sudo bash pretrain.sh