amupd's picture
SpeechT5 upload
62e9ca6
raw
history blame
2.99 kB
# @package _group_
common:
fp16: true
log_format: json
log_interval: 200
seed: 1337
tensorboard_logdir: tblog
checkpoint:
save_dir: ???
save_interval: 1
keep_last_epochs: 1
save_interval_updates: 5000
keep_interval_updates: -1
# no_epoch_checkpoints: true
distributed_training:
ddp_backend: no_c10d
distributed_backend: 'nccl'
distributed_world_size: 32
distributed_port: 29671
nprocs_per_node: 8
find_unused_parameters: true
task:
_name: iwslt_joint_pretraining
data: ???
label_dir: ???
labels: ???
label_rate: ${model.label_rate}
sample_rate: 16000
max_sample_size: 250000
min_sample_size: 32000
pad_audio: false
random_crop: true
normalize: false # must be consistent with extractor
add_decoder: false
text_cfg:
seed: ${common.seed}
text_data: ???
data_config: config.yaml
sample_break_mode: eos
tokens_per_sample: 512
shorten_method: "random_crop"
text_maxtokens_ratio: 1.0
dataset:
num_workers: 6
max_tokens: 1400000
skip_invalid_size_inputs_valid_test: true
validate_interval: ${checkpoint.save_interval}
validate_interval_updates: ${checkpoint.save_interval_updates}
required_batch_size_multiple: 1
criterion:
_name: hubert
pred_masked_weight: 1.0
pred_nomask_weight: 0.0
loss_weights: [10,]
label_smoothing: 0.1
optimization:
max_update: 800000
lr: [0.0001]
clip_norm: 10.0
optimizer:
_name: adam
adam_betas: (0.9,0.98)
adam_eps: 1e-06
weight_decay: 0.01
lr_scheduler:
_name: polynomial_decay
warmup_updates: 32000
model:
_name: hubert
label_rate: ???
skip_masked: false
skip_nomask: false
mask_prob: 0.80
extractor_mode: default
conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2'
final_dim: 256
encoder_layerdrop: 0.05
decoder_layerdrop: 0.05
dropout_input: 0.1
dropout_features: 0.1
dropout: 0.1
attention_dropout: 0.1
feature_grad_mult: 0.1
untie_final_proj: true
activation_dropout: 0.0
use_rel_pos_enc: true
text_transformer:
activation_fn: ${model.activation_fn}
dropout: ${model.dropout}
attention_dropout: ${model.attention_dropout}
activation_dropout: ${model.activation_dropout}
adaptive_input: ${model.adaptive_input}
max_source_positions: ${task.text_cfg.tokens_per_sample}
checkpoint_activations: ${model.checkpoint_activations}
no_scale_embedding: false
layernorm_embedding: false
quant_noise:
pq: ${model.quant_noise_pq}
encoder:
embed_dim: 768
ffn_embed_dim: 3072
layers: 6
attention_heads: 12
normalize_before: false
learned_pos: false
layerdrop: ${model.encoder_layerdrop}
hydra:
job:
config:
override_dirname:
kv_sep: '-'
item_sep: '__'
exclude_keys:
- run
- task.data
- task.label_dir
run:
dir: ???
sweep:
dir: ???
subdir: ${hydra.job.config_name}__${hydra.job.override_dirname}