File size: 1,532 Bytes
27d3bc5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
train:
  model: "sovits"
  seed: 1234
  epochs: 10000
  learning_rate: 2e-4
  betas: [0.8, 0.99]
  lr_decay: 0.999875
  eps: 1e-9
  batch_size: 8
  c_stft: 5
  c_mel: 2.5
  c_kl: 1.0
  port: 8001
  pretrain: ""
#############################
data: 
  training_files: "files/train.txt"
  validation_files: "files/valid.txt"
  segment_size: 12000  # WARNING: base on hop_length
  max_wav_value: 32768.0
  sampling_rate: 48000
  filter_length: 2048
  hop_length: 480
  win_length: 2048
  mel_channels: 80
  mel_fmin: 0.0
  mel_fmax: 24000.0
#############################
vits:
  ppg_dim: 1024
  spk_dim: 256
  gin_channels: 256
  inter_channels: 192
  hidden_channels: 192
  filter_channels: 512
#############################
gen:
  upsample_input: 192
  upsample_rates: [6,5,4,2,2]
  upsample_kernel_sizes: [20,15,8,4,4]
  upsample_initial_channel: 256
  resblock_kernel_sizes: [3,7,11]
  resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]]
#############################
mpd:
  periods: [2,3,5,7,11]
  kernel_size: 5
  stride: 3
  use_spectral_norm: False
  lReLU_slope: 0.2
#############################
mrd:
  resolutions: "[(1024, 120, 600), (2048, 240, 1200), (512, 50, 240)]" # (filter_length, hop_length, win_length)
  use_spectral_norm: False
  lReLU_slope: 0.2
#############################
log:
  info_interval: 100
  eval_interval: 5
  save_interval: 5
  num_audio: 6
  pth_dir: 'chkpt'
  log_dir: 'logs'
#############################
dist_config:
  dist_backend: "nccl"
  dist_url: "tcp://localhost:54321"
  world_size: 1