Plachta commited on
Commit
d9d1850
1 Parent(s): 5cffe4f

Upload 2 files

Browse files
config_dit_mel_seed_uvit_whisper_base_f0_44k.yml CHANGED
@@ -1,10 +1,10 @@
1
- log_dir: "./runs"
2
  save_freq: 1
3
  log_interval: 10
4
  save_interval: 1000
5
  device: "cuda"
6
  epochs: 1000 # number of epochs for first stage training (pre-training)
7
- batch_size: 2
8
  batch_length: 100 # maximum duration of audio in a batch (in seconds)
9
  max_len: 80 # maximum number of frames
10
  pretrained_model: ""
@@ -25,13 +25,17 @@ model_params:
25
  dit_type: "DiT" # uDiT or DiT
26
  reg_loss_type: "l1" # l1 or l2
27
 
 
 
 
 
 
 
 
 
28
  speech_tokenizer:
29
  type: 'whisper'
30
- whisper_name: "openai/whisper-small"
31
- path: "speech_tokenizer_v1.onnx"
32
-
33
- cosyvoice:
34
- path: "../CosyVoice/pretrained_models/CosyVoice-300M"
35
 
36
  style_encoder:
37
  dim: 192
 
1
+ log_dir: "./runs/run_dit_mel_seed_uvit_whisper_base_f0_44k"
2
  save_freq: 1
3
  log_interval: 10
4
  save_interval: 1000
5
  device: "cuda"
6
  epochs: 1000 # number of epochs for first stage training (pre-training)
7
+ batch_size: 1
8
  batch_length: 100 # maximum duration of audio in a batch (in seconds)
9
  max_len: 80 # maximum number of frames
10
  pretrained_model: ""
 
25
  dit_type: "DiT" # uDiT or DiT
26
  reg_loss_type: "l1" # l1 or l2
27
 
28
+ timbre_shifter:
29
+ se_db_path: "./modules/openvoice/checkpoints_v2/converter/se_db.pt"
30
+ ckpt_path: './modules/openvoice/checkpoints_v2/converter'
31
+
32
+ vocoder:
33
+ type: "bigvgan"
34
+ name: "nvidia/bigvgan_v2_44khz_128band_512x"
35
+
36
  speech_tokenizer:
37
  type: 'whisper'
38
+ name: "openai/whisper-small"
 
 
 
 
39
 
40
  style_encoder:
41
  dim: 192
config_dit_mel_seed_uvit_whisper_small_wavenet.yml CHANGED
@@ -1,4 +1,4 @@
1
- log_dir: "./runs"
2
  save_freq: 1
3
  log_interval: 10
4
  save_interval: 1000
@@ -25,24 +25,21 @@ model_params:
25
  dit_type: "DiT" # uDiT or DiT
26
  reg_loss_type: "l1" # l1 or l2
27
 
 
 
 
 
28
  speech_tokenizer:
29
  type: 'whisper'
30
- whisper_name: "openai/whisper-small"
31
- path: "speech_tokenizer_v1.onnx"
32
-
33
- cosyvoice:
34
- path: "../CosyVoice/pretrained_models/CosyVoice-300M"
35
 
36
  style_encoder:
37
  dim: 192
38
  campplus_path: "campplus_cn_common.bin"
39
 
40
- DAC:
41
- encoder_dim: 64
42
- encoder_rates: [2, 5, 5, 6]
43
- decoder_dim: 1536
44
- decoder_rates: [ 6, 5, 5, 2 ]
45
- sr: 24000
46
 
47
  length_regulator:
48
  channels: 512
 
1
+ log_dir: "./runs/run_dit_mel_seed_uvit_whisper_small_wavenet"
2
  save_freq: 1
3
  log_interval: 10
4
  save_interval: 1000
 
25
  dit_type: "DiT" # uDiT or DiT
26
  reg_loss_type: "l1" # l1 or l2
27
 
28
+ timbre_shifter:
29
+ se_db_path: "./modules/openvoice/checkpoints_v2/converter/se_db.pt"
30
+ ckpt_path: './modules/openvoice/checkpoints_v2/converter'
31
+
32
  speech_tokenizer:
33
  type: 'whisper'
34
+ name: "openai/whisper-small"
 
 
 
 
35
 
36
  style_encoder:
37
  dim: 192
38
  campplus_path: "campplus_cn_common.bin"
39
 
40
+ vocoder:
41
+ type: "bigvgan"
42
+ name: "nvidia/bigvgan_v2_22khz_80band_256x"
 
 
 
43
 
44
  length_regulator:
45
  channels: 512