cantabile-kwok commited on
Commit
fab5f60
·
verified ·
1 Parent(s): 58f5ee0

Upload config.yml

Browse files
Files changed (1) hide show
  1. config.yml +201 -0
config.yml ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ allow_cache: false
2
+ batch_frames: 3600
3
+ config: conf/ctxv2w.v1.yaml
4
+ crop_max_frames: 100
5
+ discriminator_adv_loss_params:
6
+ average_by_discriminators: false
7
+ discriminator_grad_norm: -1
8
+ discriminator_optimizer_params:
9
+ betas:
10
+ - 0.5
11
+ - 0.9
12
+ lr: 0.0002
13
+ weight_decay: 0.0
14
+ discriminator_optimizer_type: Adam
15
+ discriminator_params:
16
+ follow_official_norm: true
17
+ period_discriminator_params:
18
+ bias: true
19
+ channels: 32
20
+ downsample_scales:
21
+ - 3
22
+ - 3
23
+ - 3
24
+ - 3
25
+ - 1
26
+ in_channels: 1
27
+ kernel_sizes:
28
+ - 5
29
+ - 3
30
+ max_downsample_channels: 1024
31
+ nonlinear_activation: LeakyReLU
32
+ nonlinear_activation_params:
33
+ negative_slope: 0.1
34
+ out_channels: 1
35
+ use_spectral_norm: false
36
+ use_weight_norm: true
37
+ periods:
38
+ - 2
39
+ - 3
40
+ - 5
41
+ - 7
42
+ - 11
43
+ scale_discriminator_params:
44
+ bias: true
45
+ channels: 128
46
+ downsample_scales:
47
+ - 4
48
+ - 4
49
+ - 4
50
+ - 4
51
+ - 1
52
+ in_channels: 1
53
+ kernel_sizes:
54
+ - 15
55
+ - 41
56
+ - 5
57
+ - 3
58
+ max_downsample_channels: 1024
59
+ max_groups: 16
60
+ nonlinear_activation: LeakyReLU
61
+ nonlinear_activation_params:
62
+ negative_slope: 0.1
63
+ out_channels: 1
64
+ scale_downsample_pooling: AvgPool1d
65
+ scale_downsample_pooling_params:
66
+ kernel_size: 4
67
+ padding: 2
68
+ stride: 2
69
+ scales: 3
70
+ discriminator_scheduler_params:
71
+ gamma: 0.5
72
+ milestones:
73
+ - 200000
74
+ - 400000
75
+ - 600000
76
+ - 800000
77
+ discriminator_scheduler_type: MultiStepLR
78
+ discriminator_train_start_steps: 0
79
+ discriminator_type: HiFiGANMultiScaleMultiPeriodDiscriminator
80
+ distributed: true
81
+ dropout_features: 0.0
82
+ eval_interval_steps: 100000
83
+ feat_match_loss_params:
84
+ average_by_discriminators: false
85
+ average_by_layers: false
86
+ include_final_outputs: false
87
+ frontend_mel_prediction_stop_steps: 200000
88
+ frontend_params:
89
+ conformer_params:
90
+ activation_type: swish
91
+ attention_dim: 184
92
+ attention_dropout_rate: 0.2
93
+ attention_heads: 2
94
+ cnn_module_kernel: 31
95
+ concat_after: false
96
+ dropout_rate: 0.2
97
+ linear_units: 1536
98
+ macaron_style: true
99
+ normalize_before: true
100
+ num_blocks: 2
101
+ pos_enc_layer_type: rel_pos
102
+ positional_dropout_rate: 0.2
103
+ positionwise_conv_kernel_size: 3
104
+ positionwise_layer_type: conv1d
105
+ selfattention_layer_type: rel_selfattn
106
+ use_cnn_module: true
107
+ prompt_channels: 1024
108
+ vqvec_channels: 512
109
+ generator_adv_loss_params:
110
+ average_by_discriminators: false
111
+ generator_grad_norm: -1
112
+ generator_optimizer_params:
113
+ betas:
114
+ - 0.5
115
+ - 0.9
116
+ lr: 0.0002
117
+ weight_decay: 0.0
118
+ generator_optimizer_type: Adam
119
+ generator_params:
120
+ bias: true
121
+ channels: 512
122
+ condition_dim: 1024
123
+ in_channels: 184
124
+ kernel_size: 7
125
+ nonlinear_activation: snakebeta-condition
126
+ out_channels: 1
127
+ resblock: '1'
128
+ resblock_dilations:
129
+ - - 1
130
+ - 3
131
+ - 5
132
+ - - 1
133
+ - 3
134
+ - 5
135
+ - - 1
136
+ - 3
137
+ - 5
138
+ resblock_kernel_sizes:
139
+ - 3
140
+ - 7
141
+ - 11
142
+ snake_logscale: true
143
+ upsample_kernel_sizes:
144
+ - 16
145
+ - 10
146
+ - 6
147
+ - 4
148
+ upsample_scales:
149
+ - 8
150
+ - 5
151
+ - 3
152
+ - 2
153
+ use_additional_convs: true
154
+ use_weight_norm: true
155
+ generator_scheduler_params:
156
+ gamma: 0.5
157
+ milestones:
158
+ - 200000
159
+ - 400000
160
+ - 600000
161
+ - 800000
162
+ generator_scheduler_type: MultiStepLR
163
+ generator_train_start_steps: 1
164
+ generator_type: BigVGAN
165
+ hop_size: 240
166
+ lambda_adv: 1.0
167
+ lambda_aux: 45.0
168
+ lambda_feat_match: 2.0
169
+ lambda_frontend_mel_prediction: 60
170
+ log_interval_steps: 1000
171
+ max_num_frames: 3000
172
+ mel_loss_params:
173
+ fft_size: 2048
174
+ fmax: 8000
175
+ fmin: 40
176
+ fs: 24000
177
+ hop_size: 300
178
+ log_base: null
179
+ num_mels: 80
180
+ win_length: 1200
181
+ window: hann
182
+ min_num_frames: 600
183
+ num_mels: 80
184
+ num_save_intermediate_results: 4
185
+ num_workers: 8
186
+ outdir: exp/train_all_ctxv2w.v1
187
+ pin_memory: true
188
+ pretrain: ''
189
+ prompt_fold_by_2: true
190
+ prompt_net_type: ConvPromptPrenet
191
+ rank: 0
192
+ sampling_rate: 24000
193
+ save_interval_steps: 10000
194
+ use_feat_match_loss: true
195
+ use_mel_loss: true
196
+ use_stft_loss: false
197
+ verbose: 1
198
+ version: 0.5.3
199
+ vq_codebook: feats/vqidx/codebook.npy
200
+ win_length: 697
201
+ world_size: 4