flax-community
/

gpt2-medium-indonesian

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Metrics Training metrics Community

cahya commited on Jul 9, 2021

Commit

6f9afb3

·

1 Parent(s): a9c5d2c

fixed the save_steps, make test

Files changed (2) hide show

run_clm_flax.py +2 -2
run_pretraining.sh +6 -3

run_clm_flax.py CHANGED Viewed

@@ -413,7 +413,8 @@ def main():
         total_length = len(concatenated_examples[list(examples.keys())[0]])
         # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
         # customize this part to your needs.
-        total_length = (total_length // block_size) * block_size
         # Split by chunks of max_len.
         result = {
             k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
@@ -636,7 +637,6 @@ def main():
                 # Save metrics
                 if has_tensorboard and jax.process_index() == 0:
-                    cur_step = epoch * (len(train_dataset) // train_batch_size)
                     write_eval_metric(summary_writer, eval_metrics, cur_step)
             if cur_step % training_args.save_steps == 0 and cur_step > 0:

         total_length = len(concatenated_examples[list(examples.keys())[0]])
         # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
         # customize this part to your needs.
+        if total_length >= block_size:
+            total_length = (total_length // block_size) * block_size
         # Split by chunks of max_len.
         result = {
             k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
                 # Save metrics
                 if has_tensorboard and jax.process_index() == 0:
                     write_eval_metric(summary_writer, eval_metrics, cur_step)
             if cur_step % training_args.save_steps == 0 and cur_step > 0:

run_pretraining.sh CHANGED Viewed

@@ -1,3 +1,4 @@
 export WANDB_ENTITY="wandb"
 export WANDB_PROJECT="hf-flax-gpt2-indonesian"
 export WANDB_LOG_MODEL="true"
@@ -13,12 +14,14 @@ export WANDB_LOG_MODEL="true"
     --block_size="512" \
     --per_device_train_batch_size="24" \
     --per_device_eval_batch_size="24" \
-    --learning_rate="5e-3" --warmup_steps="1000" \
     --adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
     --overwrite_output_dir \
     --num_train_epochs="20" \
     --dataloader_num_workers="64" \
     --preprocessing_num_workers="64" \
-    --save_steps="2000" \
-    --eval_steps="2000" \
     --push_to_hub

+export MODEL_DIR=`pwd`
 export WANDB_ENTITY="wandb"
 export WANDB_PROJECT="hf-flax-gpt2-indonesian"
 export WANDB_LOG_MODEL="true"
     --block_size="512" \
     --per_device_train_batch_size="24" \
     --per_device_eval_batch_size="24" \
+    --learning_rate="0.0024" --warmup_steps="1000" \
     --adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
     --overwrite_output_dir \
     --num_train_epochs="20" \
     --dataloader_num_workers="64" \
     --preprocessing_num_workers="64" \
+    --save_steps="10" \
+    --eval_steps="10" \
+    --max_train_samples="10000" \
+    --max_eval_samples="1000" \
     --push_to_hub