Update logs
Browse files
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run_gpt_neo.sh
CHANGED
@@ -5,32 +5,36 @@ export HF_PROJECT="gpt-neo-1.3B-dutch"
|
|
5 |
# Variables for training the tokenizer and creating the config
|
6 |
export VOCAB_SIZE="50257"
|
7 |
export DATASET="yhavinga/mc4_nl_cleaned" # Name of the dataset in the Huggingface Hub
|
8 |
-
export DATASET_CONFIG="
|
9 |
export DATASET_SPLIT="train" # Split to use for training tokenizer and model
|
10 |
export TEXT_FIELD="text" # Field containing the text to be used for training
|
11 |
export CONFIG_TYPE="EleutherAI/gpt-neo-1.3B" # Config that our model will use
|
12 |
export MODEL_PATH="${HOME}/data/${HF_PROJECT}" # Path to the model, e.g. here inside the mount
|
13 |
|
|
|
14 |
python run_clm_flax.py \
|
15 |
--output_dir="${MODEL_PATH}" \
|
16 |
--model_type="gpt_neo" \
|
17 |
--config_name="${MODEL_PATH}" \
|
|
|
18 |
--tokenizer_name="${MODEL_PATH}" \
|
19 |
--preprocessing_num_workers="96" \
|
20 |
--do_train --do_eval \
|
21 |
--dataset_name="${DATASET}" \
|
22 |
--dataset_config_name="${DATASET_CONFIG}" \
|
23 |
--block_size="512" \
|
24 |
-
--per_device_train_batch_size="
|
25 |
-
--per_device_eval_batch_size="
|
26 |
-
--learning_rate="0.0005"
|
|
|
27 |
--adafactor \
|
28 |
--overwrite_output_dir \
|
29 |
--num_train_epochs="1" \
|
30 |
--logging_steps="500" \
|
31 |
-
--save_steps="
|
32 |
-
--eval_steps="
|
33 |
|
34 |
# \
|
35 |
# --push_to_hub
|
36 |
# --adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
|
|
|
|
5 |
# Variables for training the tokenizer and creating the config
|
6 |
export VOCAB_SIZE="50257"
|
7 |
export DATASET="yhavinga/mc4_nl_cleaned" # Name of the dataset in the Huggingface Hub
|
8 |
+
export DATASET_CONFIG="large" # Config of the dataset in the Huggingface Hub
|
9 |
export DATASET_SPLIT="train" # Split to use for training tokenizer and model
|
10 |
export TEXT_FIELD="text" # Field containing the text to be used for training
|
11 |
export CONFIG_TYPE="EleutherAI/gpt-neo-1.3B" # Config that our model will use
|
12 |
export MODEL_PATH="${HOME}/data/${HF_PROJECT}" # Path to the model, e.g. here inside the mount
|
13 |
|
14 |
+
|
15 |
python run_clm_flax.py \
|
16 |
--output_dir="${MODEL_PATH}" \
|
17 |
--model_type="gpt_neo" \
|
18 |
--config_name="${MODEL_PATH}" \
|
19 |
+
--model_name_or_path="${MODEL_PATH}" \
|
20 |
--tokenizer_name="${MODEL_PATH}" \
|
21 |
--preprocessing_num_workers="96" \
|
22 |
--do_train --do_eval \
|
23 |
--dataset_name="${DATASET}" \
|
24 |
--dataset_config_name="${DATASET_CONFIG}" \
|
25 |
--block_size="512" \
|
26 |
+
--per_device_train_batch_size="2" \
|
27 |
+
--per_device_eval_batch_size="2" \
|
28 |
+
--learning_rate="0.0005" \
|
29 |
+
--warmup_steps="5000" \
|
30 |
--adafactor \
|
31 |
--overwrite_output_dir \
|
32 |
--num_train_epochs="1" \
|
33 |
--logging_steps="500" \
|
34 |
+
--save_steps="20000" \
|
35 |
+
--eval_steps="5000"
|
36 |
|
37 |
# \
|
38 |
# --push_to_hub
|
39 |
# --adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
|
40 |
+
# --learning_rate="0.0005" --warmup_steps="5000" \
|
runs/events.out.tfevents.1641116702.t1v-n-2f64d7c8-w-0.151740.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb5f45ffc186250657cd42a837e4697912bd59a51de9216c7012dbb08dcd7c85
|
3 |
+
size 956608
|
runs/events.out.tfevents.1641125986.t1v-n-2f64d7c8-w-0.164072.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47fc30d1dfda8812fd6ea2b7e10f02db18885492aaa1eb87211edef70e496f8b
|
3 |
+
size 1839862
|
runs/events.out.tfevents.1641156371.t1v-n-2f64d7c8-w-0.13342.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f4ea05e3c57e06d5f3dd56cb928fa56afd6f58867454ec0d9aeecf8a0ee83f8
|
3 |
+
size 28682045
|