Korla commited on
Commit
b981464
·
verified ·
1 Parent(s): 71b0e3a

Upload 6 files

Browse files
Files changed (6) hide show
  1. korla.json +217 -0
  2. korla.pth +3 -0
  3. korla_speakers.pth +3 -0
  4. weronika.json +219 -0
  5. weronika.pth +3 -0
  6. weronika_speakers.pth +3 -0
korla.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_path": "/workspace",
3
+ "logger_uri": null,
4
+ "run_name": "vits_vctk",
5
+ "project_name": null,
6
+ "run_description": "\ud83d\udc38Coqui trainer run.",
7
+ "print_step": 25,
8
+ "plot_step": 100,
9
+ "model_param_stats": false,
10
+ "wandb_entity": null,
11
+ "dashboard_logger": "tensorboard",
12
+ "save_on_interrupt": true,
13
+ "log_model_step": 10000,
14
+ "save_step": 10000,
15
+ "save_n_checkpoints": 5,
16
+ "save_checkpoints": true,
17
+ "save_all_best": false,
18
+ "save_best_after": 0,
19
+ "target_loss": null,
20
+ "print_eval": false,
21
+ "test_delay_epochs": -1,
22
+ "run_eval": true,
23
+ "run_eval_steps": null,
24
+ "distributed_backend": "nccl",
25
+ "distributed_url": "tcp://localhost:54321",
26
+ "mixed_precision": true,
27
+ "precision": "fp16",
28
+ "epochs": 1000,
29
+ "batch_size": 32,
30
+ "eval_batch_size": 16,
31
+ "grad_clip": [1000, 1000],
32
+ "scheduler_after_epoch": true,
33
+ "lr": 0.001,
34
+ "optimizer": "AdamW",
35
+ "optimizer_params": {
36
+ "betas": [0.8, 0.99],
37
+ "eps": 1e-9,
38
+ "weight_decay": 0.01
39
+ },
40
+ "lr_scheduler": null,
41
+ "lr_scheduler_params": {},
42
+ "use_grad_scaler": false,
43
+ "allow_tf32": false,
44
+ "cudnn_enable": true,
45
+ "cudnn_deterministic": false,
46
+ "cudnn_benchmark": false,
47
+ "training_seed": 54321,
48
+ "model": "vits",
49
+ "num_loader_workers": 4,
50
+ "num_eval_loader_workers": 4,
51
+ "use_noise_augment": false,
52
+ "audio": {
53
+ "fft_size": 1024,
54
+ "sample_rate": 22050,
55
+ "win_length": 1024,
56
+ "hop_length": 256,
57
+ "num_mels": 80,
58
+ "mel_fmin": 0,
59
+ "mel_fmax": null
60
+ },
61
+ "use_phonemes": false,
62
+ "phonemizer": null,
63
+ "phoneme_language": "en",
64
+ "compute_input_seq_cache": true,
65
+ "text_cleaner": "english_cleaners",
66
+ "enable_eos_bos_chars": false,
67
+ "test_sentences_file": "",
68
+ "phoneme_cache_path": "/workspace/phoneme_cache",
69
+ "characters": {
70
+ "characters_class": "TTS.tts.models.vits.VitsCharacters",
71
+ "vocab_dict": null,
72
+ "pad": "_",
73
+ "eos": "~",
74
+ "bos": "^",
75
+ "blank": null,
76
+ "characters": "ABCDEFGHIJKLMNOPRSTUVWXYZabcdefghijklmnopqrstuvwyz\u00f3\u0106\u0107\u010c\u010d\u011b\u0141\u0142\u0144\u0159\u0160\u0161\u017a\u017d\u017e ,",
77
+ "punctuations": ".!?",
78
+ "phonemes": null,
79
+ "is_unique": true,
80
+ "is_sorted": true
81
+ },
82
+ "add_blank": true,
83
+ "batch_group_size": 5,
84
+ "loss_masking": null,
85
+ "min_audio_len": 1,
86
+ "max_audio_len": Infinity,
87
+ "min_text_len": 1,
88
+ "max_text_len": 400,
89
+ "compute_f0": false,
90
+ "compute_energy": false,
91
+ "compute_linear_spec": true,
92
+ "precompute_num_workers": 0,
93
+ "start_by_longest": false,
94
+ "shuffle": false,
95
+ "drop_last": false,
96
+ "datasets": [
97
+ {
98
+ "formatter": "vctk_old",
99
+ "dataset_name": "",
100
+ "path": "korla",
101
+ "meta_file_train": "",
102
+ "ignored_speakers": null,
103
+ "language": "",
104
+ "phonemizer": "",
105
+ "meta_file_val": "",
106
+ "meta_file_attn_mask": ""
107
+ }
108
+ ],
109
+ "test_sentences": [
110
+ "W swojej h\u0142ownej etapje tworjenja we \u0141azowskim \u010dasu, hd\u017ae\u017e docp\u011b wjer\u0161k swojeho tworjenja, pokro\u010dowa\u0161e Zejler w tutym basnjenju, zdobom wustupowa\u0161e jako pr\u011bni serbski literat ze sylnej towar\u0161nostnej kritiku.",
111
+ "P\u0159eto\u017e Bo\u017ei hn\u011bw p\u0159ichad\u017aa z njebja na k\u00f3\u017edu bjezb\u00f3\u017enos\u0107 a njesprawnos\u0107 \u010d\u0142owjekow, kot\u0159i\u017e p\u0159ez swoju njesprawnos\u0107 prawdu pot\u0142\u00f3\u010duja."
112
+ ],
113
+ "eval_split_max_size": null,
114
+ "eval_split_size": 0.01,
115
+ "use_speaker_weighted_sampler": false,
116
+ "speaker_weighted_sampler_alpha": 1.0,
117
+ "use_language_weighted_sampler": false,
118
+ "language_weighted_sampler_alpha": 1.0,
119
+ "use_length_weighted_sampler": false,
120
+ "length_weighted_sampler_alpha": 1.0,
121
+ "model_args": {
122
+ "num_chars": 72,
123
+ "out_channels": 513,
124
+ "spec_segment_size": 32,
125
+ "hidden_channels": 192,
126
+ "hidden_channels_ffn_text_encoder": 768,
127
+ "num_heads_text_encoder": 2,
128
+ "num_layers_text_encoder": 6,
129
+ "kernel_size_text_encoder": 3,
130
+ "dropout_p_text_encoder": 0.1,
131
+ "dropout_p_duration_predictor": 0.5,
132
+ "kernel_size_posterior_encoder": 5,
133
+ "dilation_rate_posterior_encoder": 1,
134
+ "num_layers_posterior_encoder": 16,
135
+ "kernel_size_flow": 5,
136
+ "dilation_rate_flow": 1,
137
+ "num_layers_flow": 4,
138
+ "resblock_type_decoder": "1",
139
+ "resblock_kernel_sizes_decoder": [3, 7, 11],
140
+ "resblock_dilation_sizes_decoder": [
141
+ [1, 3, 5],
142
+ [1, 3, 5],
143
+ [1, 3, 5]
144
+ ],
145
+ "upsample_rates_decoder": [8, 8, 2, 2],
146
+ "upsample_initial_channel_decoder": 512,
147
+ "upsample_kernel_sizes_decoder": [16, 16, 4, 4],
148
+ "periods_multi_period_discriminator": [2, 3, 5, 7, 11],
149
+ "use_sdp": true,
150
+ "noise_scale": 1.0,
151
+ "inference_noise_scale": 0.667,
152
+ "length_scale": 1,
153
+ "noise_scale_dp": 1.0,
154
+ "inference_noise_scale_dp": 1.0,
155
+ "max_inference_len": null,
156
+ "init_discriminator": true,
157
+ "use_spectral_norm_disriminator": false,
158
+ "use_speaker_embedding": true,
159
+ "num_speakers": 1,
160
+ "speakers_file": "tts_models/korla_speakers.pth",
161
+ "d_vector_file": null,
162
+ "speaker_embedding_channels": 256,
163
+ "use_d_vector_file": false,
164
+ "d_vector_dim": 0,
165
+ "detach_dp_input": true,
166
+ "use_language_embedding": false,
167
+ "embedded_language_dim": 4,
168
+ "num_languages": 0,
169
+ "language_ids_file": null,
170
+ "use_speaker_encoder_as_loss": false,
171
+ "speaker_encoder_config_path": "",
172
+ "speaker_encoder_model_path": "",
173
+ "condition_dp_on_speaker": true,
174
+ "freeze_encoder": false,
175
+ "freeze_DP": false,
176
+ "freeze_PE": false,
177
+ "freeze_flow_decoder": false,
178
+ "freeze_waveform_decoder": false,
179
+ "encoder_sample_rate": null,
180
+ "interpolate_z": true,
181
+ "reinit_DP": false,
182
+ "reinit_text_encoder": false
183
+ },
184
+ "lr_gen": 5e-5,
185
+ "lr_disc": 5e-5,
186
+ "lr_scheduler_gen": "ExponentialLR",
187
+ "lr_scheduler_gen_params": {
188
+ "gamma": 0.999875,
189
+ "last_epoch": -1
190
+ },
191
+ "lr_scheduler_disc": "ExponentialLR",
192
+ "lr_scheduler_disc_params": {
193
+ "gamma": 0.999875,
194
+ "last_epoch": -1
195
+ },
196
+ "kl_loss_alpha": 1.0,
197
+ "disc_loss_alpha": 1.0,
198
+ "gen_loss_alpha": 1.0,
199
+ "feat_loss_alpha": 1.0,
200
+ "mel_loss_alpha": 45.0,
201
+ "dur_loss_alpha": 1.0,
202
+ "speaker_encoder_loss_alpha": 1.0,
203
+ "return_wav": true,
204
+ "use_weighted_sampler": false,
205
+ "weighted_sampler_attrs": {},
206
+ "weighted_sampler_multipliers": {},
207
+ "r": 1,
208
+ "num_speakers": 0,
209
+ "use_speaker_embedding": true,
210
+ "speakers_file": "/workspace/vits_vctk-July-22-2024_08+57AM-0000000/speakers.pth",
211
+ "speaker_embedding_channels": 256,
212
+ "language_ids_file": null,
213
+ "use_language_embedding": false,
214
+ "use_d_vector_file": false,
215
+ "d_vector_file": null,
216
+ "d_vector_dim": 0
217
+ }
korla.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca2db12b1e2b986ff33051b2901216c30640c19fe114b77d60972ed73f7c6eec
3
+ size 1038028838
korla_speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6b44acdd525fb4d3ec34aec8c3e202a150e4cbe333f9a5014cb88df6b27aa9c
3
+ size 864
weronika.json ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_path": "/workspace",
3
+ "logger_uri": null,
4
+ "run_name": "vits_weronika",
5
+ "project_name": null,
6
+ "run_description": "\ud83d\udc38Coqui trainer run.",
7
+ "print_step": 25,
8
+ "plot_step": 100,
9
+ "model_param_stats": false,
10
+ "wandb_entity": null,
11
+ "dashboard_logger": "tensorboard",
12
+ "save_on_interrupt": true,
13
+ "log_model_step": 10000,
14
+ "save_step": 10000,
15
+ "save_n_checkpoints": 5,
16
+ "save_checkpoints": true,
17
+ "save_all_best": false,
18
+ "save_best_after": 0,
19
+ "target_loss": null,
20
+ "print_eval": false,
21
+ "test_delay_epochs": -1,
22
+ "run_eval": true,
23
+ "run_eval_steps": null,
24
+ "distributed_backend": "nccl",
25
+ "distributed_url": "tcp://localhost:54321",
26
+ "mixed_precision": true,
27
+ "precision": "fp16",
28
+ "epochs": 1000,
29
+ "batch_size": 32,
30
+ "eval_batch_size": 16,
31
+ "grad_clip": [1000, 1000],
32
+ "scheduler_after_epoch": true,
33
+ "lr": 0.001,
34
+ "optimizer": "AdamW",
35
+ "optimizer_params": {
36
+ "betas": [0.8, 0.99],
37
+ "eps": 1e-9,
38
+ "weight_decay": 0.01
39
+ },
40
+ "lr_scheduler": null,
41
+ "lr_scheduler_params": {},
42
+ "use_grad_scaler": false,
43
+ "allow_tf32": false,
44
+ "cudnn_enable": true,
45
+ "cudnn_deterministic": false,
46
+ "cudnn_benchmark": false,
47
+ "training_seed": 54321,
48
+ "model": "vits",
49
+ "num_loader_workers": 4,
50
+ "num_eval_loader_workers": 4,
51
+ "use_noise_augment": false,
52
+ "audio": {
53
+ "fft_size": 1024,
54
+ "sample_rate": 22050,
55
+ "win_length": 1024,
56
+ "hop_length": 256,
57
+ "num_mels": 80,
58
+ "mel_fmin": 0,
59
+ "mel_fmax": null
60
+ },
61
+ "use_phonemes": false,
62
+ "phonemizer": null,
63
+ "phoneme_language": "en",
64
+ "compute_input_seq_cache": true,
65
+ "text_cleaner": "english_cleaners",
66
+ "enable_eos_bos_chars": false,
67
+ "test_sentences_file": "",
68
+ "phoneme_cache_path": "/workspace/phoneme_cache",
69
+ "characters": {
70
+ "characters_class": "TTS.tts.models.vits.VitsCharacters",
71
+ "vocab_dict": null,
72
+ "pad": "_",
73
+ "eos": "~",
74
+ "bos": "^",
75
+ "blank": null,
76
+ "characters": "ABCDEFGHIJKLMNOPRSTUVWXYZabcdefghijklmnopqrstuvwyz\u00f3\u0106\u0107\u010c\u010d\u011b\u0141\u0142\u0144\u0159\u0160\u0161\u017a\u017d\u017e ,",
77
+ "punctuations": ".!?",
78
+ "phonemes": null,
79
+ "is_unique": true,
80
+ "is_sorted": true
81
+ },
82
+ "add_blank": true,
83
+ "batch_group_size": 5,
84
+ "loss_masking": null,
85
+ "min_audio_len": 1,
86
+ "max_audio_len": Infinity,
87
+ "min_text_len": 1,
88
+ "max_text_len": 400,
89
+ "compute_f0": false,
90
+ "compute_energy": false,
91
+ "compute_linear_spec": true,
92
+ "precompute_num_workers": 0,
93
+ "start_by_longest": false,
94
+ "shuffle": false,
95
+ "drop_last": false,
96
+ "datasets": [
97
+ {
98
+ "formatter": "vctk_old",
99
+ "dataset_name": "",
100
+ "path": "vctk_export",
101
+ "meta_file_train": "",
102
+ "ignored_speakers": null,
103
+ "language": "",
104
+ "phonemizer": "",
105
+ "meta_file_val": "",
106
+ "meta_file_attn_mask": ""
107
+ }
108
+ ],
109
+ "test_sentences": [
110
+ "W swojej h\u0142ownej etapje tworjenja we \u0141azowskim \u010dasu, hd\u017ae\u017e docp\u011b wjer\u0161k swojeho tworjenja, pokro\u010dowa\u0161e Zejler w tutym basnjenju, zdobom wustupowa\u0161e jako pr\u011bni serbski literat ze sylnej towar\u0161nostnej kritiku.",
111
+ "P\u0159eto\u017e Bo\u017ei hn\u011bw p\u0159ichad\u017aa z njebja na k\u00f3\u017edu bjezb\u00f3\u017enos\u0107 a njesprawnos\u0107 \u010d\u0142owjekow, kot\u0159i\u017e p\u0159ez swoju njesprawnos\u0107 prawdu pot\u0142\u00f3\u010duja.",
112
+ "Jubilejne, d\u017aesate p\u0159edstajenje stawiznow ze \u017eiwjenja Jana \u0160adowica, alias Krabata, je w\u010dera wje\u010dor w \u010cornym Cho\u0142mcu swoju premjeru do\u017eiwi\u0142o.",
113
+ "W na\u0107isku kl\u011btu\u0161eho etata zwjazka, kotry\u017e je zaw\u010deraw\u0161im knje\u017eerstwo zwjazkowemu sejmej za dal\u0161e wobd\u017a\u011b\u0142anje a wobzamknjenje p\u0159epoda\u0142o, su za p\u0159ira\u017eki Za\u0142o\u017ebje za serbski lud sr\u011bdki w samsnej wysokos\u0107i ka\u017e l\u011btsa zaplanowane."
114
+ ],
115
+ "eval_split_max_size": null,
116
+ "eval_split_size": 0.01,
117
+ "use_speaker_weighted_sampler": false,
118
+ "speaker_weighted_sampler_alpha": 1.0,
119
+ "use_language_weighted_sampler": false,
120
+ "language_weighted_sampler_alpha": 1.0,
121
+ "use_length_weighted_sampler": false,
122
+ "length_weighted_sampler_alpha": 1.0,
123
+ "model_args": {
124
+ "num_chars": 72,
125
+ "out_channels": 513,
126
+ "spec_segment_size": 32,
127
+ "hidden_channels": 192,
128
+ "hidden_channels_ffn_text_encoder": 768,
129
+ "num_heads_text_encoder": 2,
130
+ "num_layers_text_encoder": 6,
131
+ "kernel_size_text_encoder": 3,
132
+ "dropout_p_text_encoder": 0.1,
133
+ "dropout_p_duration_predictor": 0.5,
134
+ "kernel_size_posterior_encoder": 5,
135
+ "dilation_rate_posterior_encoder": 1,
136
+ "num_layers_posterior_encoder": 16,
137
+ "kernel_size_flow": 5,
138
+ "dilation_rate_flow": 1,
139
+ "num_layers_flow": 4,
140
+ "resblock_type_decoder": "1",
141
+ "resblock_kernel_sizes_decoder": [3, 7, 11],
142
+ "resblock_dilation_sizes_decoder": [
143
+ [1, 3, 5],
144
+ [1, 3, 5],
145
+ [1, 3, 5]
146
+ ],
147
+ "upsample_rates_decoder": [8, 8, 2, 2],
148
+ "upsample_initial_channel_decoder": 512,
149
+ "upsample_kernel_sizes_decoder": [16, 16, 4, 4],
150
+ "periods_multi_period_discriminator": [2, 3, 5, 7, 11],
151
+ "use_sdp": true,
152
+ "noise_scale": 1.0,
153
+ "inference_noise_scale": 0.667,
154
+ "length_scale": 1,
155
+ "noise_scale_dp": 1.0,
156
+ "inference_noise_scale_dp": 1.0,
157
+ "max_inference_len": null,
158
+ "init_discriminator": true,
159
+ "use_spectral_norm_disriminator": false,
160
+ "use_speaker_embedding": true,
161
+ "num_speakers": 1,
162
+ "speakers_file": "tts_models/weronika_speakers.pth",
163
+ "d_vector_file": null,
164
+ "speaker_embedding_channels": 256,
165
+ "use_d_vector_file": false,
166
+ "d_vector_dim": 0,
167
+ "detach_dp_input": true,
168
+ "use_language_embedding": false,
169
+ "embedded_language_dim": 4,
170
+ "num_languages": 0,
171
+ "language_ids_file": null,
172
+ "use_speaker_encoder_as_loss": false,
173
+ "speaker_encoder_config_path": "",
174
+ "speaker_encoder_model_path": "",
175
+ "condition_dp_on_speaker": true,
176
+ "freeze_encoder": false,
177
+ "freeze_DP": false,
178
+ "freeze_PE": false,
179
+ "freeze_flow_decoder": false,
180
+ "freeze_waveform_decoder": false,
181
+ "encoder_sample_rate": null,
182
+ "interpolate_z": true,
183
+ "reinit_DP": false,
184
+ "reinit_text_encoder": false
185
+ },
186
+ "lr_gen": 0.0002,
187
+ "lr_disc": 0.0002,
188
+ "lr_scheduler_gen": "ExponentialLR",
189
+ "lr_scheduler_gen_params": {
190
+ "gamma": 0.999875,
191
+ "last_epoch": -1
192
+ },
193
+ "lr_scheduler_disc": "ExponentialLR",
194
+ "lr_scheduler_disc_params": {
195
+ "gamma": 0.999875,
196
+ "last_epoch": -1
197
+ },
198
+ "kl_loss_alpha": 1.0,
199
+ "disc_loss_alpha": 1.0,
200
+ "gen_loss_alpha": 1.0,
201
+ "feat_loss_alpha": 1.0,
202
+ "mel_loss_alpha": 45.0,
203
+ "dur_loss_alpha": 1.0,
204
+ "speaker_encoder_loss_alpha": 1.0,
205
+ "return_wav": true,
206
+ "use_weighted_sampler": false,
207
+ "weighted_sampler_attrs": {},
208
+ "weighted_sampler_multipliers": {},
209
+ "r": 1,
210
+ "num_speakers": 0,
211
+ "use_speaker_embedding": true,
212
+ "speakers_file": "/workspace/vits_weronika-July-23-2024_09+56AM-0000000/speakers.pth",
213
+ "speaker_embedding_channels": 256,
214
+ "language_ids_file": null,
215
+ "use_language_embedding": false,
216
+ "use_d_vector_file": false,
217
+ "d_vector_file": null,
218
+ "d_vector_dim": 0
219
+ }
weronika.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe2a4c48500c954d284e67fd74f2fede5744496dfc4733077fbdd63b4f7fab3a
3
+ size 1038029286
weronika_speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6b44acdd525fb4d3ec34aec8c3e202a150e4cbe333f9a5014cb88df6b27aa9c
3
+ size 864