|
FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 |
|
|
|
RUN apt-get update && \ |
|
apt-get install -y --no-install-recommends \ |
|
git wget build-essential \ |
|
ca-certificates libjpeg-dev libpng-dev && \ |
|
rm -rf /var/lib/apt/lists/* |
|
|
|
ENV CONDA_DIR /opt/conda |
|
ENV PATH $CONDA_DIR/bin:$PATH |
|
ENV CUDA_HOME /usr/local/cuda |
|
|
|
RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh && \ |
|
/bin/bash /tmp/miniconda.sh -b -p $CONDA_DIR && \ |
|
rm /tmp/miniconda.sh && \ |
|
$CONDA_DIR/bin/conda clean -ya |
|
|
|
RUN conda create -n docker python=3.11 -y |
|
|
|
WORKDIR /app |
|
|
|
COPY requirements_lang.txt /app/requirements_lang.txt |
|
|
|
RUN conda run -n docker conda install -c pytorch -c nvidia \ |
|
pytorch torchvision torchaudio pytorch-cuda=12.1 -y && \ |
|
conda run -n docker pip install --no-cache-dir -r /app/requirements_lang.txt && \ |
|
conda run -n docker pip install --no-cache-dir pandas deepspeed sentencepiece tqdm |
|
|
|
COPY . /app |
|
|
|
RUN conda run -n docker pip install -e . |
|
|
|
ENTRYPOINT ["conda", "run", "-n", "docker"] |
|
|
|
CMD ["torchrun", "--nproc_per_node", "4", \ |
|
"-m", "FlagEmbedding.baai_general_embedding.finetune.run", \ |
|
"--output_dir", "output_dir/ckpt_bi_encoder", \ |
|
"--model_name_or_path", "BAAI/bge-m3", \ |
|
"--train_data", "data/bi/train_1024_chunks.jsonl", \ |
|
"--learning_rate", "1e-5", \ |
|
"--num_train_epochs", "20", \ |
|
"--per_device_train_batch_size", "2", \ |
|
"--dataloader_drop_last", "True", \ |
|
"--deepspeed", "FlagEmbedding/BGE_M3/ds_config.json", \ |
|
"--normlized", "True", \ |
|
"--temperature", "0.02", \ |
|
"--warmup_ratio", "0.1", \ |
|
"--query_max_len", "128", \ |
|
"--passage_max_len", "1024", \ |
|
"--train_group_size", "4", \ |
|
"--negatives_cross_device", \ |
|
"--logging_steps", "10", \ |
|
"--fp16", \ |
|
"--save_steps", "1000"] |