train_70b_4bit / Dockerfile
daresearch's picture
Update Dockerfile
2987771 verified
# Use CUDA 11.8 with cuDNN 8
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
ENV DEBIAN_FRONTEND=noninteractive
# Install basic utilities
RUN apt-get update && apt-get install -y \
tzdata \
software-properties-common \
curl && \
ln -fs /usr/share/zoneinfo/Etc/UTC /etc/localtime && \
echo "Etc/UTC" > /etc/timezone
# Install Python 3.11
RUN add-apt-repository ppa:deadsnakes/ppa && apt-get update && \
apt-get install -y python3.11 python3.11-dev
# Install pip
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1
# Install Python packages (torch, xFormers, Transformers, etc.)
RUN python -m pip install --no-cache-dir \
torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 && \
python -m pip install --no-cache-dir \
xformers==0.0.27 \
--extra-index-url https://download.pytorch.org/whl/nightly/cu118/torch_nightly.html && \
python -m pip install --no-cache-dir \
transformers \
accelerate \
trl \
unsloth \
pandas \
datasets \
huggingface_hub \
safetensors \
bitsandbytes
# Helps reduce CUDA memory fragmentation
ENV PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
# Set your cache directories
ENV HF_HOME=/workspace/.huggingface
ENV TRANSFORMERS_CACHE=/workspace/.huggingface
ENV DATASETS_CACHE=/workspace/.cache
ENV TORCH_HOME=/workspace/.cache
ENV XDG_CACHE_HOME=/workspace/.cache
ENV TRITON_CACHE_DIR=/workspace/.cache/triton
# Create directories and set permissions
RUN mkdir -p /workspace/outputs \
&& mkdir -p /workspace/.huggingface \
&& mkdir -p /workspace/.cache/triton \
&& chmod -R 777 /workspace
WORKDIR /workspace
# Copy training script and data
COPY finetune_script.py /workspace/
COPY train.csv /workspace/train.csv
COPY valid.csv /workspace/valid.csv
# Preconfigure Accelerate for multi-GPU usage (4 GPUs, fp16)
RUN mkdir -p /root/.cache/huggingface/accelerate && \
echo '{"compute_environment": "LOCAL_MACHINE", "distributed_type": "MULTI_GPU", "num_processes": 4, "mixed_precision": "fp16", "machine_rank": 0, "main_training_function": "main", "use_cpu": false, "num_machines": 1}' \
> /root/.cache/huggingface/accelerate/default_config.yaml
# Launch your training script on 4 GPUs with fp16
CMD ["accelerate", "launch", "--num_processes=4", "--mixed_precision=fp16", "finetune_script.py"]