Spaces:
Runtime error
Runtime error
# @package _group_ | |
common: | |
fp16: true | |
log_format: json | |
log_interval: 200 | |
seed: 1337 | |
tensorboard_logdir: tblog | |
checkpoint: | |
save_dir: ??? | |
save_interval: 1 | |
keep_last_epochs: 1 | |
save_interval_updates: 5000 | |
keep_interval_updates: -1 | |
# no_epoch_checkpoints: true | |
distributed_training: | |
ddp_backend: no_c10d | |
distributed_backend: 'nccl' | |
distributed_world_size: 32 | |
distributed_port: 29671 | |
nprocs_per_node: 8 | |
find_unused_parameters: true | |
task: | |
_name: iwslt_joint_pretraining | |
data: ??? | |
label_dir: ??? | |
labels: ??? | |
label_rate: ${model.label_rate} | |
sample_rate: 16000 | |
max_sample_size: 250000 | |
min_sample_size: 32000 | |
pad_audio: false | |
random_crop: true | |
normalize: false # must be consistent with extractor | |
add_decoder: false | |
text_cfg: | |
seed: ${common.seed} | |
text_data: ??? | |
data_config: config.yaml | |
sample_break_mode: eos | |
tokens_per_sample: 512 | |
shorten_method: "random_crop" | |
text_maxtokens_ratio: 1.0 | |
dataset: | |
num_workers: 6 | |
max_tokens: 1400000 | |
skip_invalid_size_inputs_valid_test: true | |
validate_interval: ${checkpoint.save_interval} | |
validate_interval_updates: ${checkpoint.save_interval_updates} | |
required_batch_size_multiple: 1 | |
criterion: | |
_name: hubert | |
pred_masked_weight: 1.0 | |
pred_nomask_weight: 0.0 | |
loss_weights: [10,] | |
label_smoothing: 0.1 | |
optimization: | |
max_update: 800000 | |
lr: [0.0001] | |
clip_norm: 10.0 | |
optimizer: | |
_name: adam | |
adam_betas: (0.9,0.98) | |
adam_eps: 1e-06 | |
weight_decay: 0.01 | |
lr_scheduler: | |
_name: polynomial_decay | |
warmup_updates: 32000 | |
model: | |
_name: hubert | |
label_rate: ??? | |
skip_masked: false | |
skip_nomask: false | |
mask_prob: 0.80 | |
extractor_mode: default | |
conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' | |
final_dim: 256 | |
encoder_layerdrop: 0.05 | |
decoder_layerdrop: 0.05 | |
dropout_input: 0.1 | |
dropout_features: 0.1 | |
dropout: 0.1 | |
attention_dropout: 0.1 | |
feature_grad_mult: 0.1 | |
untie_final_proj: true | |
activation_dropout: 0.0 | |
use_rel_pos_enc: true | |
text_transformer: | |
activation_fn: ${model.activation_fn} | |
dropout: ${model.dropout} | |
attention_dropout: ${model.attention_dropout} | |
activation_dropout: ${model.activation_dropout} | |
adaptive_input: ${model.adaptive_input} | |
max_source_positions: ${task.text_cfg.tokens_per_sample} | |
checkpoint_activations: ${model.checkpoint_activations} | |
no_scale_embedding: false | |
layernorm_embedding: false | |
quant_noise: | |
pq: ${model.quant_noise_pq} | |
encoder: | |
embed_dim: 768 | |
ffn_embed_dim: 3072 | |
layers: 6 | |
attention_heads: 12 | |
normalize_before: false | |
learned_pos: false | |
layerdrop: ${model.encoder_layerdrop} | |
hydra: | |
job: | |
config: | |
override_dirname: | |
kv_sep: '-' | |
item_sep: '__' | |
exclude_keys: | |
- run | |
- task.data | |
- task.label_dir | |
run: | |
dir: ??? | |
sweep: | |
dir: ??? | |
subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} | |