fakufaku commited on
Commit
9b6704c
1 Parent(s): f0d6f7d

Adds config and weights of UNIVERSE++

Browse files
Files changed (2) hide show
  1. config.yaml +74 -36
  2. weights.ckpt +2 -2
config.yaml CHANGED
@@ -36,49 +36,45 @@ datamodule:
36
  fs: 16000
37
  split: train
38
  audio_len: 2.0
39
- augmentation: false
40
  vb-val-16k:
41
  _target_: open_universe.datasets.NoisyDataset
42
  audio_path: ${..vb-train-16k.audio_path}
43
  fs: ${..vb-train-16k.fs}
44
  split: val
45
  audio_len: null
46
- augmentation: false
47
  vb-test-16k:
48
  _target_: open_universe.datasets.NoisyDataset
49
  audio_path: ${..vb-train-16k.audio_path}
50
  fs: ${..vb-train-16k.fs}
51
  split: test
52
  audio_len: null
53
- augmentation: false
54
  vb-train-24k:
55
  _target_: open_universe.datasets.NoisyDataset
56
  audio_path: data/voicebank_demand/24k
57
  fs: 24000
58
  split: train
59
  audio_len: 2.0
60
- augmentation: false
61
  vb-val-24k:
62
  _target_: open_universe.datasets.NoisyDataset
63
  audio_path: ${..vb-train-24k.audio_path}
64
  fs: ${..vb-train-24k.fs}
65
  split: val
66
  audio_len: null
67
- augmentation: false
68
  vb-test-24k:
69
  _target_: open_universe.datasets.NoisyDataset
70
  audio_path: ${..vb-train-24k.audio_path}
71
  fs: ${..vb-train-24k.fs}
72
  split: test
73
  audio_len: null
74
- augmentation: false
75
  model:
76
- _target_: open_universe.networks.universe.Universe
77
  fs: 16000
78
  normalization_norm: 2
79
  normalization_kwargs:
80
  ref: both
81
  level_db: -26.0
 
 
82
  score_model:
83
  _target_: open_universe.networks.universe.ScoreNetwork
84
  fb_kernel_size: 3
@@ -93,9 +89,9 @@ model:
93
  encoder_gru_conv_sandwich: false
94
  extra_conv_block: true
95
  decoder_act_type: prelu
96
- use_weight_norm: false
97
- seq_model: gru
98
- use_antialiasing: false
99
  condition_model:
100
  _target_: open_universe.networks.universe.ConditionerNetwork
101
  fb_kernel_size: ${model.score_model.fb_kernel_size}
@@ -107,7 +103,6 @@ model:
107
  extra_conv_block: ${model.score_model.extra_conv_block}
108
  decoder_act_type: prelu
109
  use_weight_norm: ${model.score_model.use_weight_norm}
110
- seq_model: ${model.score_model.seq_model}
111
  use_antialiasing: false
112
  diffusion:
113
  schedule: geometric
@@ -116,17 +111,39 @@ model:
116
  n_steps: 8
117
  epsilon: 1.3
118
  losses:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  weights:
 
120
  score: 1.0
121
- signal: 1.0
122
- latent: 1.0
123
- mdn_n_comp: 3
124
- mdn_alpha_per_sample: true
125
  score_loss:
126
  _target_: torch.nn.MSELoss
127
  training:
128
  audio_len: ${datamodule.datasets.vb-train-16k.audio_len}
129
- time_sampling: time_uniform
130
  dynamic_mixing: false
131
  ema_decay: 0.999
132
  validation:
@@ -134,31 +151,52 @@ model:
134
  main_loss_mode: max
135
  n_bins: 5
136
  max_enh_batches: 4
137
- num_tb_samples: 0
138
  enh_losses:
139
  val/:
140
  _target_: open_universe.metrics.EvalMetrics
141
  audio_fs: ${model.fs}
142
  optimizer:
143
- _target_: torch.optim.AdamW
144
- lr: 0.0002
145
- weight_decay: 0.01
146
- weight_decay_exclude:
147
- - prelu
148
- - bias
149
- lr_warmup: null
150
- betas:
151
- - 0.8
152
- - 0.99
 
 
 
 
 
 
 
 
 
 
 
 
153
  scheduler:
154
- scheduler:
155
- _target_: open_universe.utils.schedulers.LinearWarmupCosineAnnealingLR
156
- T_warmup: 50000
157
- T_cosine: 50001
158
- eta_min: 1.6e-06
159
- T_max: ${trainer.max_steps}
160
- interval: step
161
- frequency: 1
 
 
 
 
 
 
 
 
 
 
162
  grad_clipper:
163
  _target_: open_universe.utils.FixedClipper
164
  max_norm: 1000.0
@@ -167,7 +205,7 @@ trainer:
167
  accumulate_grad_batches: 1
168
  min_epochs: 1
169
  max_epochs: -1
170
- max_steps: 300000
171
  deterministic: warn
172
  accelerator: gpu
173
  devices: -1
 
36
  fs: 16000
37
  split: train
38
  audio_len: 2.0
 
39
  vb-val-16k:
40
  _target_: open_universe.datasets.NoisyDataset
41
  audio_path: ${..vb-train-16k.audio_path}
42
  fs: ${..vb-train-16k.fs}
43
  split: val
44
  audio_len: null
 
45
  vb-test-16k:
46
  _target_: open_universe.datasets.NoisyDataset
47
  audio_path: ${..vb-train-16k.audio_path}
48
  fs: ${..vb-train-16k.fs}
49
  split: test
50
  audio_len: null
 
51
  vb-train-24k:
52
  _target_: open_universe.datasets.NoisyDataset
53
  audio_path: data/voicebank_demand/24k
54
  fs: 24000
55
  split: train
56
  audio_len: 2.0
 
57
  vb-val-24k:
58
  _target_: open_universe.datasets.NoisyDataset
59
  audio_path: ${..vb-train-24k.audio_path}
60
  fs: ${..vb-train-24k.fs}
61
  split: val
62
  audio_len: null
 
63
  vb-test-24k:
64
  _target_: open_universe.datasets.NoisyDataset
65
  audio_path: ${..vb-train-24k.audio_path}
66
  fs: ${..vb-train-24k.fs}
67
  split: test
68
  audio_len: null
 
69
  model:
70
+ _target_: open_universe.networks.universe.UniverseGAN
71
  fs: 16000
72
  normalization_norm: 2
73
  normalization_kwargs:
74
  ref: both
75
  level_db: -26.0
76
+ edm:
77
+ noise: 0.25
78
  score_model:
79
  _target_: open_universe.networks.universe.ScoreNetwork
80
  fb_kernel_size: 3
 
89
  encoder_gru_conv_sandwich: false
90
  extra_conv_block: true
91
  decoder_act_type: prelu
92
+ use_weight_norm: true
93
+ use_antialiasing: true
94
+ time_embedding: simple
95
  condition_model:
96
  _target_: open_universe.networks.universe.ConditionerNetwork
97
  fb_kernel_size: ${model.score_model.fb_kernel_size}
 
103
  extra_conv_block: ${model.score_model.extra_conv_block}
104
  decoder_act_type: prelu
105
  use_weight_norm: ${model.score_model.use_weight_norm}
 
106
  use_antialiasing: false
107
  diffusion:
108
  schedule: geometric
 
111
  n_steps: 8
112
  epsilon: 1.3
113
  losses:
114
+ multi_period_discriminator:
115
+ mpd_reshapes:
116
+ - 2
117
+ - 3
118
+ - 5
119
+ - 7
120
+ - 11
121
+ use_spectral_norm: false
122
+ discriminator_channel_mult: 1
123
+ multi_resolution_discriminator:
124
+ resolutions:
125
+ - - 1024
126
+ - 120
127
+ - 600
128
+ - - 2048
129
+ - 240
130
+ - 1200
131
+ - - 512
132
+ - 50
133
+ - 240
134
+ use_spectral_norm: false
135
+ discriminator_channel_mult: 1
136
+ disc_freeze_step: 0
137
  weights:
138
+ mel_l1: 45.0
139
  score: 1.0
140
+ use_signal_decoupling: true
141
+ signal_decoupling_act: snake
 
 
142
  score_loss:
143
  _target_: torch.nn.MSELoss
144
  training:
145
  audio_len: ${datamodule.datasets.vb-train-16k.audio_len}
146
+ time_sampling: time_normal_0.95
147
  dynamic_mixing: false
148
  ema_decay: 0.999
149
  validation:
 
151
  main_loss_mode: max
152
  n_bins: 5
153
  max_enh_batches: 4
 
154
  enh_losses:
155
  val/:
156
  _target_: open_universe.metrics.EvalMetrics
157
  audio_fs: ${model.fs}
158
  optimizer:
159
+ accumulate_grad_batches: 1
160
+ generator:
161
+ _target_: torch.optim.AdamW
162
+ lr: 0.0002
163
+ weight_decay: 0.01
164
+ betas:
165
+ - 0.8
166
+ - 0.99
167
+ weight_decay_exclude:
168
+ - prelu
169
+ - bias
170
+ discriminator:
171
+ _target_: torch.optim.AdamW
172
+ lr: 0.0002
173
+ betas:
174
+ - 0.8
175
+ - 0.99
176
+ grad_clip_vals:
177
+ mrd: 1000.0
178
+ mpd: 1000.0
179
+ score: 1000.0
180
+ cond: 1000.0
181
  scheduler:
182
+ generator:
183
+ scheduler:
184
+ _target_: open_universe.utils.schedulers.LinearWarmupCosineAnnealingLR
185
+ T_warmup: 20000
186
+ T_cosine: 400000
187
+ eta_min: 1.6e-06
188
+ T_max: ${trainer.max_steps}
189
+ interval: step
190
+ frequency: 1
191
+ discriminator:
192
+ scheduler:
193
+ _target_: open_universe.utils.schedulers.LinearWarmupCosineAnnealingLR
194
+ T_warmup: 20000
195
+ T_cosine: 400000
196
+ eta_min: 1.6e-06
197
+ T_max: ${trainer.max_steps}
198
+ interval: step
199
+ frequency: 1
200
  grad_clipper:
201
  _target_: open_universe.utils.FixedClipper
202
  max_norm: 1000.0
 
205
  accumulate_grad_batches: 1
206
  min_epochs: 1
207
  max_epochs: -1
208
+ max_steps: 600000
209
  deterministic: warn
210
  accelerator: gpu
211
  devices: -1
weights.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee7b031d055bd65d1e849426ba7867bf1416b53adf46e32c4a69312768361222
3
- size 901069356
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d90ab343c86501a23d5dd0011242d1129ad2f54d8cebec68c55dd387037879c
3
+ size 1025936580