Spaces:
Configuration error
Configuration error
Merge pull request #12 from borisdayma/feat-sweeps
Browse files- seq2seq/run_seq2seq_flax.py +1 -1
- seq2seq/sweep.yaml +37 -0
seq2seq/run_seq2seq_flax.py
CHANGED
|
@@ -152,7 +152,7 @@ class DataTrainingArguments:
|
|
| 152 |
metadata={"help": "An optional input predict data file to do prediction on (a text file)."},
|
| 153 |
)
|
| 154 |
max_source_length: Optional[int] = field(
|
| 155 |
-
default=
|
| 156 |
metadata={
|
| 157 |
"help": "The maximum total input sequence length after tokenization. Sequences longer "
|
| 158 |
"than this will be truncated, sequences shorter will be padded."
|
|
|
|
| 152 |
metadata={"help": "An optional input predict data file to do prediction on (a text file)."},
|
| 153 |
)
|
| 154 |
max_source_length: Optional[int] = field(
|
| 155 |
+
default=128,
|
| 156 |
metadata={
|
| 157 |
"help": "The maximum total input sequence length after tokenization. Sequences longer "
|
| 158 |
"than this will be truncated, sequences shorter will be padded."
|
seq2seq/sweep.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
program: run_seq2seq_flax.py
|
| 2 |
+
entity: wandb
|
| 3 |
+
project: hf-flax-dalle-mini
|
| 4 |
+
method: random
|
| 5 |
+
metric:
|
| 6 |
+
name: eval/loss
|
| 7 |
+
goal: minimize
|
| 8 |
+
parameters:
|
| 9 |
+
learning_rate:
|
| 10 |
+
distribution: log_uniform
|
| 11 |
+
# from exp(min) to exp(max), ie 1e-5 to 1e-3 on log scale
|
| 12 |
+
min: -11.5
|
| 13 |
+
max: -6.9
|
| 14 |
+
gradient_accumulation_steps:
|
| 15 |
+
value: 8
|
| 16 |
+
warmup_steps:
|
| 17 |
+
value: 1000
|
| 18 |
+
command:
|
| 19 |
+
- python3
|
| 20 |
+
- ${program}
|
| 21 |
+
- "--output_dir"
|
| 22 |
+
- "./output_sweep"
|
| 23 |
+
- "--overwrite_output_dir"
|
| 24 |
+
- "--adafactor"
|
| 25 |
+
- "--num_train_epochs"
|
| 26 |
+
- 1
|
| 27 |
+
- "--max_train_samples"
|
| 28 |
+
- 1000
|
| 29 |
+
- "--per_device_train_batch_size"
|
| 30 |
+
- 32
|
| 31 |
+
- "--per_device_eval_batch_size"
|
| 32 |
+
- 32
|
| 33 |
+
- "--preprocessing_num_workers"
|
| 34 |
+
- 80
|
| 35 |
+
- "--do_train"
|
| 36 |
+
- "--do_eval"
|
| 37 |
+
- ${args}
|