vec2wav2.0 / config.yml
cantabile-kwok's picture
Upload config.yml
fab5f60 verified
allow_cache: false
batch_frames: 3600
config: conf/ctxv2w.v1.yaml
crop_max_frames: 100
discriminator_adv_loss_params:
average_by_discriminators: false
discriminator_grad_norm: -1
discriminator_optimizer_params:
betas:
- 0.5
- 0.9
lr: 0.0002
weight_decay: 0.0
discriminator_optimizer_type: Adam
discriminator_params:
follow_official_norm: true
period_discriminator_params:
bias: true
channels: 32
downsample_scales:
- 3
- 3
- 3
- 3
- 1
in_channels: 1
kernel_sizes:
- 5
- 3
max_downsample_channels: 1024
nonlinear_activation: LeakyReLU
nonlinear_activation_params:
negative_slope: 0.1
out_channels: 1
use_spectral_norm: false
use_weight_norm: true
periods:
- 2
- 3
- 5
- 7
- 11
scale_discriminator_params:
bias: true
channels: 128
downsample_scales:
- 4
- 4
- 4
- 4
- 1
in_channels: 1
kernel_sizes:
- 15
- 41
- 5
- 3
max_downsample_channels: 1024
max_groups: 16
nonlinear_activation: LeakyReLU
nonlinear_activation_params:
negative_slope: 0.1
out_channels: 1
scale_downsample_pooling: AvgPool1d
scale_downsample_pooling_params:
kernel_size: 4
padding: 2
stride: 2
scales: 3
discriminator_scheduler_params:
gamma: 0.5
milestones:
- 200000
- 400000
- 600000
- 800000
discriminator_scheduler_type: MultiStepLR
discriminator_train_start_steps: 0
discriminator_type: HiFiGANMultiScaleMultiPeriodDiscriminator
distributed: true
dropout_features: 0.0
eval_interval_steps: 100000
feat_match_loss_params:
average_by_discriminators: false
average_by_layers: false
include_final_outputs: false
frontend_mel_prediction_stop_steps: 200000
frontend_params:
conformer_params:
activation_type: swish
attention_dim: 184
attention_dropout_rate: 0.2
attention_heads: 2
cnn_module_kernel: 31
concat_after: false
dropout_rate: 0.2
linear_units: 1536
macaron_style: true
normalize_before: true
num_blocks: 2
pos_enc_layer_type: rel_pos
positional_dropout_rate: 0.2
positionwise_conv_kernel_size: 3
positionwise_layer_type: conv1d
selfattention_layer_type: rel_selfattn
use_cnn_module: true
prompt_channels: 1024
vqvec_channels: 512
generator_adv_loss_params:
average_by_discriminators: false
generator_grad_norm: -1
generator_optimizer_params:
betas:
- 0.5
- 0.9
lr: 0.0002
weight_decay: 0.0
generator_optimizer_type: Adam
generator_params:
bias: true
channels: 512
condition_dim: 1024
in_channels: 184
kernel_size: 7
nonlinear_activation: snakebeta-condition
out_channels: 1
resblock: '1'
resblock_dilations:
- - 1
- 3
- 5
- - 1
- 3
- 5
- - 1
- 3
- 5
resblock_kernel_sizes:
- 3
- 7
- 11
snake_logscale: true
upsample_kernel_sizes:
- 16
- 10
- 6
- 4
upsample_scales:
- 8
- 5
- 3
- 2
use_additional_convs: true
use_weight_norm: true
generator_scheduler_params:
gamma: 0.5
milestones:
- 200000
- 400000
- 600000
- 800000
generator_scheduler_type: MultiStepLR
generator_train_start_steps: 1
generator_type: BigVGAN
hop_size: 240
lambda_adv: 1.0
lambda_aux: 45.0
lambda_feat_match: 2.0
lambda_frontend_mel_prediction: 60
log_interval_steps: 1000
max_num_frames: 3000
mel_loss_params:
fft_size: 2048
fmax: 8000
fmin: 40
fs: 24000
hop_size: 300
log_base: null
num_mels: 80
win_length: 1200
window: hann
min_num_frames: 600
num_mels: 80
num_save_intermediate_results: 4
num_workers: 8
outdir: exp/train_all_ctxv2w.v1
pin_memory: true
pretrain: ''
prompt_fold_by_2: true
prompt_net_type: ConvPromptPrenet
rank: 0
sampling_rate: 24000
save_interval_steps: 10000
use_feat_match_loss: true
use_mel_loss: true
use_stft_loss: false
verbose: 1
version: 0.5.3
vq_codebook: feats/vqidx/codebook.npy
win_length: 697
world_size: 4