eltorio
commited on
Commit
·
1392114
1
Parent(s):
2b6a5b3
refactor docker image
Browse files- Dockerfile +27 -10
- learn.py +2 -1
- preload.py +1 -0
- start.sh +19 -3
Dockerfile
CHANGED
@@ -1,10 +1,12 @@
|
|
1 |
-
# build with: docker build . --tag sctg/roco-idefics3:0.0.
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
|
|
|
|
8 |
COPY --chmod=777 start.sh /start.sh
|
9 |
COPY learn.py /learn.py
|
10 |
COPY preload.py /preload.py
|
@@ -12,6 +14,21 @@ COPY preload.py /preload.py
|
|
12 |
USER root
|
13 |
RUN chown -R 42420:42420 /workspace
|
14 |
USER 42420
|
15 |
-
RUN
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# build with: docker build . --tag sctg/roco-idefics3:0.0.4 --tag sctg/roco-idefics3:latest --push
|
2 |
+
# run with
|
3 |
+
# docker run --gpus all --user=42420:42420 -e HF_TOKEN=hf_TOKEN -it sctg/roco-idefics3:0.0.4 bash -i /start.sh hf_TOKEN
|
4 |
+
FROM nvidia/cuda:11.6.1-devel-ubuntu20.04
|
5 |
+
# FROM nvidia/cuda:11.0.3-devel-ubuntu20.04
|
6 |
+
# RUN mkdir -p /workspace
|
7 |
+
RUN /usr/sbin/addgroup --gid 42420 ovh
|
8 |
+
RUN /usr/sbin/useradd -u 42420 --gid 42420 -m -d /workspace -s /bin/bash ovh
|
9 |
+
RUN apt update -y && apt-get install -y curl git git-lfs screen
|
10 |
COPY --chmod=777 start.sh /start.sh
|
11 |
COPY learn.py /learn.py
|
12 |
COPY preload.py /preload.py
|
|
|
14 |
USER root
|
15 |
RUN chown -R 42420:42420 /workspace
|
16 |
USER 42420
|
17 |
+
RUN curl -L https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > /workspace/miniconda.sh
|
18 |
+
RUN /bin/bash /workspace/miniconda.sh -b -p /workspace/.miniconda3
|
19 |
+
RUN . /workspace/.miniconda3/bin/activate && conda init --all
|
20 |
+
RUN . /workspace/.miniconda3/bin/activate \
|
21 |
+
&& pip install -U "safetensors>=0.4.5" \
|
22 |
+
&& pip install -U "https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-0.44.2.dev0-py3-none-manylinux_2_24_x86_64.whl" \
|
23 |
+
&& pip install -U git+https://github.com/huggingface/transformers.git\
|
24 |
+
&& pip install huggingface_hub[cli] accelerate datasets peft\
|
25 |
+
&& pip install -U Pillow
|
26 |
+
RUN . /workspace/.miniconda3/bin/activate && conda install -y jupyter
|
27 |
+
# Mandatory to run the jobs in rootless mode
|
28 |
+
# USER root
|
29 |
+
# RUN chown -R 42420:42420 /workspace
|
30 |
+
USER 42420
|
31 |
+
WORKDIR /workspace
|
32 |
+
# RUN export HOME=/workspace && cd /workspace && . /workspace/.miniconda3/bin/activate \
|
33 |
+
# && mkdir -p /workspace/data \
|
34 |
+
# && python /preload.py
|
learn.py
CHANGED
@@ -148,4 +148,5 @@ trainer = Trainer(
|
|
148 |
train_dataset = train_dataset,
|
149 |
)
|
150 |
|
151 |
-
trainer.train()
|
|
|
|
148 |
train_dataset = train_dataset,
|
149 |
)
|
150 |
|
151 |
+
trainer.train()
|
152 |
+
|
preload.py
CHANGED
@@ -3,4 +3,5 @@
|
|
3 |
# License: Apache License 2.0
|
4 |
from datasets import load_dataset
|
5 |
dataset_id = "eltorio/ROCO-radiology"
|
|
|
6 |
train_dataset = load_dataset(dataset_id, split="train", cache_dir=cache_dir)
|
|
|
3 |
# License: Apache License 2.0
|
4 |
from datasets import load_dataset
|
5 |
dataset_id = "eltorio/ROCO-radiology"
|
6 |
+
cache_dir = "/workspace/data"
|
7 |
train_dataset = load_dataset(dataset_id, split="train", cache_dir=cache_dir)
|
start.sh
CHANGED
@@ -1,10 +1,26 @@
|
|
1 |
#!/bin/bash
|
|
|
|
|
|
|
|
|
2 |
cd /workspace
|
3 |
-
git config --global credential.helper store
|
4 |
git lfs install
|
5 |
export HF_TOKEN=$1
|
|
|
6 |
echo "HF_TOKEN: $HF_TOKEN"
|
7 |
-
|
8 |
-
git clone https://huggingface.co/eltorio/IDEFICS3_ROCO
|
9 |
. /workspace/.miniconda3/bin/activate
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
python /learn.py
|
|
|
1 |
#!/bin/bash
|
2 |
+
JOB_URL_SCHEME=${JOB_URL_SCHEME:-"http://"}
|
3 |
+
JOB_ID=${JOB_ID:-'localhost'}
|
4 |
+
JOB_HOST=${JOB_HOST:-'local'}
|
5 |
+
export HOME=/workspace
|
6 |
cd /workspace
|
|
|
7 |
git lfs install
|
8 |
export HF_TOKEN=$1
|
9 |
+
unset $1
|
10 |
echo "HF_TOKEN: $HF_TOKEN"
|
11 |
+
. /workspace/.bashrc
|
|
|
12 |
. /workspace/.miniconda3/bin/activate
|
13 |
+
git clone https://huggingface.co/eltorio/IDEFICS3_ROCO
|
14 |
+
git config --global credential.helper store
|
15 |
+
|
16 |
+
huggingface-cli login --add-to-git-credential --token $HF_TOKEN
|
17 |
+
|
18 |
+
|
19 |
+
jupyter lab --ip=0.0.0.0 --port=8080 --no-browser --allow-root \
|
20 |
+
--notebook-dir=/workspace \
|
21 |
+
--LabApp.token='' \
|
22 |
+
--LabApp.custom_display_url=${JOB_URL_SCHEME}${JOB_ID}-8080.${JOB_HOST} \
|
23 |
+
--LabApp.allow_remote_access=True \
|
24 |
+
--LabApp.allow_origin='*' \
|
25 |
+
--LabApp.disable_check_xsrf=True &
|
26 |
python /learn.py
|