seed: 1234 | |
# Data | |
f0_path: '' | |
p_train: 0.95 | |
min_frames: null | |
batch_size: 128 | |
features: f0_interp,vuv | |
out_features: norm_f0_interp,vuv | |
segment_size: null | |
segment_multi: 16 | |
num_workers: 4 | |
vuv_scale: 2 | |
speaker_stats: '' | |
recon_loss_fn: l1_loss | |
# Optimization | |
learning_rate: 0.0002 | |
adam_b1: 0.8 | |
adam_b2: 0.99 | |
lr_decay: 0.999 | |
lambda_commit: 0.02 | |
# VQ params | |
vq_params: | |
l_bins: 64 | |
emb_width: 128 | |
mu: 0.99 | |
levels: 1 | |
# Encoder params | |
encoder_params: | |
input_emb_width: 2 | |
output_emb_width: 128 | |
levels: 1 | |
downs_t: | |
- 4 | |
strides_t: | |
- 2 | |
width: 32 | |
depth: 4 | |
m_conv: 1.0 | |
dilation_growth_rate: 3 | |
# Decoder params | |
decoder_params: | |
input_emb_width: 2 | |
output_emb_width: 128 | |
levels: 1 | |
downs_t: | |
- 4 | |
strides_t: | |
- 2 | |
width: 32 | |
depth: 4 | |
m_conv: 1.0 | |
dilation_growth_rate: 3 | |