imenLa commited on
Commit
31be6db
·
verified ·
1 Parent(s): 2bb4eba

Upload 2 files

Browse files
exp/tts_train_conformer_fastspeech2_raw_phn_none/checkpoint.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:047620e8a39eb4f330f1244dd03b62f37b74cf136e725292e5c854d20dca955a
3
+ size 845573147
exp/tts_train_conformer_fastspeech2_raw_phn_none/config.yaml ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_conformer_fastspeech2.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/tts_train_conformer_fastspeech2_raw_phn_none
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 1000
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss
39
+ - min
40
+ - - train
41
+ - loss
42
+ - min
43
+ keep_nbest_models: 5
44
+ nbest_averaging_interval: 0
45
+ grad_clip: 1.0
46
+ grad_clip_type: 2.0
47
+ grad_noise: false
48
+ accum_grad: 10
49
+ no_forward_run: false
50
+ resume: true
51
+ train_dtype: float32
52
+ use_amp: false
53
+ log_interval: null
54
+ use_matplotlib: true
55
+ use_tensorboard: true
56
+ create_graph_in_tensorboard: false
57
+ use_wandb: false
58
+ wandb_project: null
59
+ wandb_id: null
60
+ wandb_entity: null
61
+ wandb_name: null
62
+ wandb_model_log_interval: -1
63
+ detect_anomaly: false
64
+ pretrain_path: null
65
+ init_param: []
66
+ ignore_init_mismatch: false
67
+ freeze_param: []
68
+ num_iters_per_epoch: 200
69
+ batch_size: 20
70
+ valid_batch_size: null
71
+ batch_bins: 2000000
72
+ valid_batch_bins: null
73
+ train_shape_file:
74
+ - exp/tts_finetune_tacotron2_phn_notoken_r3_lr03_sr22050/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/text_shape.phn
75
+ - exp/tts_finetune_tacotron2_phn_notoken_r3_lr03_sr22050/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/speech_shape
76
+ valid_shape_file:
77
+ - exp/tts_finetune_tacotron2_phn_notoken_r3_lr03_sr22050/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/text_shape.phn
78
+ - exp/tts_finetune_tacotron2_phn_notoken_r3_lr03_sr22050/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/speech_shape
79
+ batch_type: numel
80
+ valid_batch_type: null
81
+ fold_length:
82
+ - 150
83
+ - 204800
84
+ sort_in_batch: descending
85
+ sort_batch: descending
86
+ multiple_iterator: false
87
+ chunk_length: 500
88
+ chunk_shift_ratio: 0.5
89
+ num_cache_chunks: 1024
90
+ chunk_excluded_key_prefixes: []
91
+ train_data_path_and_name_and_type:
92
+ - - dump/raw/train/text
93
+ - text
94
+ - text
95
+ - - exp/tts_finetune_tacotron2_phn_notoken_r3_lr03_sr22050/decode_tacotron2_use_teacher_forcingtrue_train.loss.ave/train/durations
96
+ - durations
97
+ - text_int
98
+ - - dump/raw/train/wav.scp
99
+ - speech
100
+ - sound
101
+ - - exp/tts_finetune_tacotron2_phn_notoken_r3_lr03_sr22050/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/collect_feats/pitch.scp
102
+ - pitch
103
+ - npy
104
+ - - exp/tts_finetune_tacotron2_phn_notoken_r3_lr03_sr22050/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/collect_feats/energy.scp
105
+ - energy
106
+ - npy
107
+ valid_data_path_and_name_and_type:
108
+ - - dump/raw/dev/text
109
+ - text
110
+ - text
111
+ - - exp/tts_finetune_tacotron2_phn_notoken_r3_lr03_sr22050/decode_tacotron2_use_teacher_forcingtrue_train.loss.ave/dev/durations
112
+ - durations
113
+ - text_int
114
+ - - dump/raw/dev/wav.scp
115
+ - speech
116
+ - sound
117
+ - - exp/tts_finetune_tacotron2_phn_notoken_r3_lr03_sr22050/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/collect_feats/pitch.scp
118
+ - pitch
119
+ - npy
120
+ - - exp/tts_finetune_tacotron2_phn_notoken_r3_lr03_sr22050/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/collect_feats/energy.scp
121
+ - energy
122
+ - npy
123
+ allow_variable_data_keys: false
124
+ max_cache_size: 0.0
125
+ max_cache_fd: 32
126
+ valid_max_cache_size: null
127
+ exclude_weight_decay: false
128
+ exclude_weight_decay_conf: {}
129
+ optim: adam
130
+ optim_conf:
131
+ lr: 1.0
132
+ scheduler: noamlr
133
+ scheduler_conf:
134
+ model_size: 384
135
+ warmup_steps: 4000
136
+ token_list:
137
+ - <blank>
138
+ - <unk>
139
+ - a
140
+ - sil
141
+ - l
142
+ - aa
143
+ - m
144
+ - ii0
145
+ - t
146
+ - <
147
+ - n
148
+ - r
149
+ - E
150
+ - i0
151
+ - b
152
+ - uu0
153
+ - f
154
+ - i1
155
+ - k
156
+ - w
157
+ - A
158
+ - s
159
+ - y
160
+ - d
161
+ - q
162
+ - h
163
+ - H
164
+ - $
165
+ - u0
166
+ - AA
167
+ - j
168
+ - T
169
+ - x
170
+ - S
171
+ - z
172
+ - ll
173
+ - I1
174
+ - D
175
+ - II0
176
+ - g
177
+ - tt
178
+ - rr
179
+ - I0
180
+ - UU0
181
+ - dd
182
+ - u1
183
+ - U0
184
+ - mm
185
+ - nn
186
+ - '*'
187
+ - $$
188
+ - bb
189
+ - yy
190
+ - ss
191
+ - jj
192
+ - ww
193
+ - ^
194
+ - SS
195
+ - TT
196
+ - Z
197
+ - zz
198
+ - kk
199
+ - U1
200
+ - HH
201
+ - ff
202
+ - qq
203
+ - xx
204
+ - ^^
205
+ - DD
206
+ - hh
207
+ - EE
208
+ - ZZ
209
+ - '**'
210
+ - aaaa
211
+ - ssss
212
+ - v
213
+ - uu1
214
+ - jjjj
215
+ - <sos/eos>
216
+ odim: null
217
+ model_conf: {}
218
+ use_preprocessor: true
219
+ token_type: phn
220
+ bpemodel: null
221
+ non_linguistic_symbols: null
222
+ cleaner: null
223
+ g2p: null
224
+ feats_extract: fbank
225
+ feats_extract_conf:
226
+ n_fft: 1024
227
+ hop_length: 256
228
+ win_length: null
229
+ fs: 22050
230
+ fmin: 80
231
+ fmax: 7600
232
+ n_mels: 80
233
+ normalize: global_mvn
234
+ normalize_conf:
235
+ stats_file: exp/tts_finetune_tacotron2_phn_notoken_r3_lr03_sr22050/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/feats_stats.npz
236
+ tts: fastspeech2
237
+ tts_conf:
238
+ adim: 384
239
+ aheads: 2
240
+ elayers: 4
241
+ eunits: 1536
242
+ dlayers: 4
243
+ dunits: 1536
244
+ positionwise_layer_type: conv1d
245
+ positionwise_conv_kernel_size: 3
246
+ duration_predictor_layers: 2
247
+ duration_predictor_chans: 256
248
+ duration_predictor_kernel_size: 3
249
+ postnet_layers: 5
250
+ postnet_filts: 5
251
+ postnet_chans: 256
252
+ use_masking: true
253
+ encoder_normalize_before: true
254
+ decoder_normalize_before: true
255
+ reduction_factor: 3
256
+ encoder_type: conformer
257
+ decoder_type: conformer
258
+ conformer_pos_enc_layer_type: rel_pos
259
+ conformer_self_attn_layer_type: rel_selfattn
260
+ conformer_activation_type: swish
261
+ use_macaron_style_in_conformer: true
262
+ use_cnn_in_conformer: true
263
+ conformer_enc_kernel_size: 7
264
+ conformer_dec_kernel_size: 31
265
+ init_type: xavier_uniform
266
+ transformer_enc_dropout_rate: 0.2
267
+ transformer_enc_positional_dropout_rate: 0.2
268
+ transformer_enc_attn_dropout_rate: 0.2
269
+ transformer_dec_dropout_rate: 0.2
270
+ transformer_dec_positional_dropout_rate: 0.2
271
+ transformer_dec_attn_dropout_rate: 0.2
272
+ pitch_predictor_layers: 5
273
+ pitch_predictor_chans: 256
274
+ pitch_predictor_kernel_size: 5
275
+ pitch_predictor_dropout: 0.5
276
+ pitch_embed_kernel_size: 1
277
+ pitch_embed_dropout: 0.0
278
+ stop_gradient_from_pitch_predictor: true
279
+ energy_predictor_layers: 2
280
+ energy_predictor_chans: 256
281
+ energy_predictor_kernel_size: 3
282
+ energy_predictor_dropout: 0.5
283
+ energy_embed_kernel_size: 1
284
+ energy_embed_dropout: 0.0
285
+ stop_gradient_from_energy_predictor: false
286
+ pitch_extract: dio
287
+ pitch_extract_conf:
288
+ fs: 22050
289
+ n_fft: 1024
290
+ hop_length: 256
291
+ f0max: 400
292
+ f0min: 80
293
+ reduction_factor: 3
294
+ pitch_normalize: global_mvn
295
+ pitch_normalize_conf:
296
+ stats_file: exp/tts_finetune_tacotron2_phn_notoken_r3_lr03_sr22050/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/pitch_stats.npz
297
+ energy_extract: energy
298
+ energy_extract_conf:
299
+ fs: 22050
300
+ n_fft: 1024
301
+ hop_length: 256
302
+ win_length: null
303
+ reduction_factor: 3
304
+ energy_normalize: global_mvn
305
+ energy_normalize_conf:
306
+ stats_file: exp/tts_finetune_tacotron2_phn_notoken_r3_lr03_sr22050/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/energy_stats.npz
307
+ required:
308
+ - output_dir
309
+ - token_list
310
+ version: '202304'
311
+ distributed: false