Upload 2 files
Browse files
config_dit_mel_seed_uvit_whisper_base_f0_44k.yml
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
-
log_dir: "./runs"
|
2 |
save_freq: 1
|
3 |
log_interval: 10
|
4 |
save_interval: 1000
|
5 |
device: "cuda"
|
6 |
epochs: 1000 # number of epochs for first stage training (pre-training)
|
7 |
-
batch_size:
|
8 |
batch_length: 100 # maximum duration of audio in a batch (in seconds)
|
9 |
max_len: 80 # maximum number of frames
|
10 |
pretrained_model: ""
|
@@ -25,13 +25,17 @@ model_params:
|
|
25 |
dit_type: "DiT" # uDiT or DiT
|
26 |
reg_loss_type: "l1" # l1 or l2
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
speech_tokenizer:
|
29 |
type: 'whisper'
|
30 |
-
|
31 |
-
path: "speech_tokenizer_v1.onnx"
|
32 |
-
|
33 |
-
cosyvoice:
|
34 |
-
path: "../CosyVoice/pretrained_models/CosyVoice-300M"
|
35 |
|
36 |
style_encoder:
|
37 |
dim: 192
|
|
|
1 |
+
log_dir: "./runs/run_dit_mel_seed_uvit_whisper_base_f0_44k"
|
2 |
save_freq: 1
|
3 |
log_interval: 10
|
4 |
save_interval: 1000
|
5 |
device: "cuda"
|
6 |
epochs: 1000 # number of epochs for first stage training (pre-training)
|
7 |
+
batch_size: 1
|
8 |
batch_length: 100 # maximum duration of audio in a batch (in seconds)
|
9 |
max_len: 80 # maximum number of frames
|
10 |
pretrained_model: ""
|
|
|
25 |
dit_type: "DiT" # uDiT or DiT
|
26 |
reg_loss_type: "l1" # l1 or l2
|
27 |
|
28 |
+
timbre_shifter:
|
29 |
+
se_db_path: "./modules/openvoice/checkpoints_v2/converter/se_db.pt"
|
30 |
+
ckpt_path: './modules/openvoice/checkpoints_v2/converter'
|
31 |
+
|
32 |
+
vocoder:
|
33 |
+
type: "bigvgan"
|
34 |
+
name: "nvidia/bigvgan_v2_44khz_128band_512x"
|
35 |
+
|
36 |
speech_tokenizer:
|
37 |
type: 'whisper'
|
38 |
+
name: "openai/whisper-small"
|
|
|
|
|
|
|
|
|
39 |
|
40 |
style_encoder:
|
41 |
dim: 192
|
config_dit_mel_seed_uvit_whisper_small_wavenet.yml
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
log_dir: "./runs"
|
2 |
save_freq: 1
|
3 |
log_interval: 10
|
4 |
save_interval: 1000
|
@@ -25,24 +25,21 @@ model_params:
|
|
25 |
dit_type: "DiT" # uDiT or DiT
|
26 |
reg_loss_type: "l1" # l1 or l2
|
27 |
|
|
|
|
|
|
|
|
|
28 |
speech_tokenizer:
|
29 |
type: 'whisper'
|
30 |
-
|
31 |
-
path: "speech_tokenizer_v1.onnx"
|
32 |
-
|
33 |
-
cosyvoice:
|
34 |
-
path: "../CosyVoice/pretrained_models/CosyVoice-300M"
|
35 |
|
36 |
style_encoder:
|
37 |
dim: 192
|
38 |
campplus_path: "campplus_cn_common.bin"
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
decoder_dim: 1536
|
44 |
-
decoder_rates: [ 6, 5, 5, 2 ]
|
45 |
-
sr: 24000
|
46 |
|
47 |
length_regulator:
|
48 |
channels: 512
|
|
|
1 |
+
log_dir: "./runs/run_dit_mel_seed_uvit_whisper_small_wavenet"
|
2 |
save_freq: 1
|
3 |
log_interval: 10
|
4 |
save_interval: 1000
|
|
|
25 |
dit_type: "DiT" # uDiT or DiT
|
26 |
reg_loss_type: "l1" # l1 or l2
|
27 |
|
28 |
+
timbre_shifter:
|
29 |
+
se_db_path: "./modules/openvoice/checkpoints_v2/converter/se_db.pt"
|
30 |
+
ckpt_path: './modules/openvoice/checkpoints_v2/converter'
|
31 |
+
|
32 |
speech_tokenizer:
|
33 |
type: 'whisper'
|
34 |
+
name: "openai/whisper-small"
|
|
|
|
|
|
|
|
|
35 |
|
36 |
style_encoder:
|
37 |
dim: 192
|
38 |
campplus_path: "campplus_cn_common.bin"
|
39 |
|
40 |
+
vocoder:
|
41 |
+
type: "bigvgan"
|
42 |
+
name: "nvidia/bigvgan_v2_22khz_80band_256x"
|
|
|
|
|
|
|
43 |
|
44 |
length_regulator:
|
45 |
channels: 512
|