FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ git wget build-essential \ ca-certificates libjpeg-dev libpng-dev && \ rm -rf /var/lib/apt/lists/* ENV CONDA_DIR /opt/conda ENV PATH $CONDA_DIR/bin:$PATH ENV CUDA_HOME /usr/local/cuda RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh && \ /bin/bash /tmp/miniconda.sh -b -p $CONDA_DIR && \ rm /tmp/miniconda.sh && \ $CONDA_DIR/bin/conda clean -ya RUN conda create -n docker python=3.11 -y WORKDIR /app COPY requirements_lang.txt /app/requirements_lang.txt RUN conda run -n docker conda install -c pytorch -c nvidia \ pytorch torchvision torchaudio pytorch-cuda=12.1 -y && \ conda run -n docker pip install --no-cache-dir -r /app/requirements_lang.txt && \ conda run -n docker pip install --no-cache-dir pandas deepspeed sentencepiece tqdm COPY . /app RUN conda run -n docker pip install -e . ENTRYPOINT ["conda", "run", "-n", "docker"] CMD ["torchrun", "--nproc_per_node", "4", \ "-m", "FlagEmbedding.baai_general_embedding.finetune.run", \ "--output_dir", "output_dir/ckpt_bi_encoder", \ "--model_name_or_path", "BAAI/bge-m3", \ "--train_data", "data/bi/train_1024_chunks.jsonl", \ "--learning_rate", "1e-5", \ "--num_train_epochs", "20", \ "--per_device_train_batch_size", "2", \ "--dataloader_drop_last", "True", \ "--deepspeed", "FlagEmbedding/BGE_M3/ds_config.json", \ "--normlized", "True", \ "--temperature", "0.02", \ "--warmup_ratio", "0.1", \ "--query_max_len", "128", \ "--passage_max_len", "1024", \ "--train_group_size", "4", \ "--negatives_cross_device", \ "--logging_steps", "10", \ "--fp16", \ "--save_steps", "1000"]