|
[ |
|
9216, |
|
41, |
|
{ |
|
"a_upsample_ratio": 1, |
|
"accum_grad": 2, |
|
"adim": 768, |
|
"aheads": 12, |
|
"apply_uttmvn": true, |
|
"aux_lsm_weight": 0.0, |
|
"backend": "pytorch", |
|
"badim": 320, |
|
"batch_bins": 0, |
|
"batch_count": "auto", |
|
"batch_frames_in": 0, |
|
"batch_frames_inout": 0, |
|
"batch_frames_out": 0, |
|
"bdropout_rate": 0.0, |
|
"beam_size": 4, |
|
"blayers": 2, |
|
"bnmask": 2, |
|
"bprojs": 300, |
|
"btype": "blstmp", |
|
"bunits": 300, |
|
"cnn_module_kernel": 31, |
|
"config2": null, |
|
"config3": null, |
|
"context_residual": false, |
|
"criterion": "acc", |
|
"ctc_type": "warpctc", |
|
"ctc_weight": 0.3, |
|
"debugmode": 1, |
|
"dec_init": null, |
|
"dec_init_mods": [ |
|
"att.", |
|
" dec." |
|
], |
|
"dict": "data/lang_1char/units.txt", |
|
"dlayers": 6, |
|
"dropout_rate": 0.1, |
|
"dunits": 3072, |
|
"early_stop_criterion": "validation/main/acc", |
|
"elayers": 12, |
|
"enc_init": null, |
|
"enc_init_mods": [ |
|
"enc.enc." |
|
], |
|
"eps": 1e-08, |
|
"eps_decay": 0.01, |
|
"eunits": 3072, |
|
"fbank_fmax": null, |
|
"fbank_fmin": 0.0, |
|
"fbank_fs": 16000, |
|
"grad_clip": 5.0, |
|
"grad_noise": false, |
|
"labels_type": "unigram5000", |
|
"lm_weight": 0.1, |
|
"lsm_weight": 0.1, |
|
"macaron_style": 1, |
|
"maxlen_in": 220, |
|
"maxlen_out": 220, |
|
"maxlenratio": 0.0, |
|
"minibatches": 0, |
|
"minlenratio": 0.0, |
|
"model_module": "espnet.nets.pytorch_backend.e2e_asr_transformer_multitask_dual:E2E", |
|
"mtl_custom_worker_l1_weight": 0.0, |
|
"mtl_custom_worker_length_normalized_loss": 0, |
|
"mtl_custom_worker_mlp_hdim": 256, |
|
"mtl_custom_worker_mlp_nlayers": 2, |
|
"mtl_custom_worker_mlp_nonlin_end": 0, |
|
"mtl_custom_worker_mlp_nonlin_type": "relu", |
|
"mtl_custom_worker_name": "patrickvonplaten/wav2vec2-base", |
|
"mtl_custom_worker_task_type": "", |
|
"mtl_custom_worker_tgt_type": "projected_quantized_states", |
|
"mtl_kl_weight": 0.0, |
|
"mtl_kl_weight_2": 0.0, |
|
"mtl_l1_weight": 0.4, |
|
"mtl_l1_weight_2": 0.4, |
|
"mtl_length_normalized_loss": 1, |
|
"mtl_length_normalized_loss_2": 1, |
|
"mtl_mlp_hdim": 256, |
|
"mtl_mlp_hdim_2": 256, |
|
"mtl_mlp_nlayers": 1, |
|
"mtl_mlp_nlayers_2": 1, |
|
"mtl_mlp_nonlin_end": 0, |
|
"mtl_mlp_nonlin_end_2": 0, |
|
"mtl_mlp_nonlin_type": "relu", |
|
"mtl_mlp_nonlin_type_2": "relu", |
|
"mtl_task_layer": "conformer6", |
|
"mtl_task_type": "l1", |
|
"mtl_task_type_2": "l1", |
|
"mtl_worker_source": "conv1d_lrs3_v04_lrs2", |
|
"mtl_worker_source_2": "conv3d_lrs3_v04_lrs2_dual", |
|
"mtlalpha": 0.1, |
|
"n_iter_processes": 12, |
|
"n_mels": 80, |
|
"nbest": 1, |
|
"ngpu": 1, |
|
"num_encs": 1, |
|
"num_input": 2, |
|
"num_save_attention": 3, |
|
"num_spkrs": 1, |
|
"opt": "noam", |
|
"patience": 0, |
|
"penalty": 0.0, |
|
"preprocess_conf": null, |
|
"pretrain_dataset": "lrs2_full_dual_ignore", |
|
"raw_max_freq_width": 150, |
|
"raw_max_speed_rate": 1.1, |
|
"raw_max_time_width": 0.4, |
|
"raw_min_speed_rate": 0.9, |
|
"raw_n_freq_mask": 2, |
|
"raw_n_time_mask": 2, |
|
"raw_speech_do_normalize": false, |
|
"ref_channel": -1, |
|
"rel_pos_type": "latest", |
|
"relu_type": "swish", |
|
"report_cer": false, |
|
"report_interval_iters": 100, |
|
"report_wer": false, |
|
"rnnlm": null, |
|
"rnnlm_conf": null, |
|
"save_interval_iters": 0, |
|
"seed": 1, |
|
"sortagrad": 0, |
|
"specaug_max_freq_width": 30, |
|
"specaug_max_time_warp": 5, |
|
"specaug_max_time_width": 40, |
|
"specaug_n_freq_mask": 2, |
|
"specaug_n_time_mask": 2, |
|
"sr_interp_mode": "nearest", |
|
"sr_interp_scale_factor": 1.0, |
|
"stats_file": null, |
|
"sym_blank": "<blank>", |
|
"sym_space": "<space>", |
|
"threshold": 0.0001, |
|
"train_dtype": "float32", |
|
"transformer_attn_dropout_rate": 0.1, |
|
"transformer_encoder_attn_layer_type": "rel_mha", |
|
"transformer_init": "pytorch", |
|
"transformer_input_layer": "conv3d", |
|
"transformer_length_normalized_loss": 0, |
|
"transformer_warmup_steps": 25000, |
|
"use_beamformer": true, |
|
"use_cnn_module": 1, |
|
"use_dnn_mask_for_wpe": false, |
|
"use_freqmask": false, |
|
"use_frontend": false, |
|
"use_noiseaug": false, |
|
"use_specaug": false, |
|
"use_speedaug": false, |
|
"use_timemask": false, |
|
"use_v_adaptive_timemask": true, |
|
"use_v_cutout": false, |
|
"use_v_timemask": false, |
|
"use_wpe": false, |
|
"uttmvn_norm_means": true, |
|
"uttmvn_norm_vars": false, |
|
"v_cutout_max_hole_length": 22, |
|
"v_cutout_n_holes": 1, |
|
"v_raw_max_time_width": 0.4, |
|
"v_raw_n_time_mask": 1, |
|
"v_timemask_replace_with_zero": false, |
|
"v_timemask_stride": 1.0, |
|
"verbose": 0, |
|
"wavaugments": null, |
|
"wdropout_rate": 0.0, |
|
"weight_decay": 0.0, |
|
"wlayers": 2, |
|
"wpe_delay": 3, |
|
"wpe_taps": 5, |
|
"wprojs": 300, |
|
"wtype": "blstmp", |
|
"wunits": 300, |
|
"zero_triu": false |
|
} |
|
] |