Adds config and weights of UNIVERSE++
Browse files- config.yaml +74 -36
- weights.ckpt +2 -2
config.yaml
CHANGED
@@ -36,49 +36,45 @@ datamodule:
|
|
36 |
fs: 16000
|
37 |
split: train
|
38 |
audio_len: 2.0
|
39 |
-
augmentation: false
|
40 |
vb-val-16k:
|
41 |
_target_: open_universe.datasets.NoisyDataset
|
42 |
audio_path: ${..vb-train-16k.audio_path}
|
43 |
fs: ${..vb-train-16k.fs}
|
44 |
split: val
|
45 |
audio_len: null
|
46 |
-
augmentation: false
|
47 |
vb-test-16k:
|
48 |
_target_: open_universe.datasets.NoisyDataset
|
49 |
audio_path: ${..vb-train-16k.audio_path}
|
50 |
fs: ${..vb-train-16k.fs}
|
51 |
split: test
|
52 |
audio_len: null
|
53 |
-
augmentation: false
|
54 |
vb-train-24k:
|
55 |
_target_: open_universe.datasets.NoisyDataset
|
56 |
audio_path: data/voicebank_demand/24k
|
57 |
fs: 24000
|
58 |
split: train
|
59 |
audio_len: 2.0
|
60 |
-
augmentation: false
|
61 |
vb-val-24k:
|
62 |
_target_: open_universe.datasets.NoisyDataset
|
63 |
audio_path: ${..vb-train-24k.audio_path}
|
64 |
fs: ${..vb-train-24k.fs}
|
65 |
split: val
|
66 |
audio_len: null
|
67 |
-
augmentation: false
|
68 |
vb-test-24k:
|
69 |
_target_: open_universe.datasets.NoisyDataset
|
70 |
audio_path: ${..vb-train-24k.audio_path}
|
71 |
fs: ${..vb-train-24k.fs}
|
72 |
split: test
|
73 |
audio_len: null
|
74 |
-
augmentation: false
|
75 |
model:
|
76 |
-
_target_: open_universe.networks.universe.
|
77 |
fs: 16000
|
78 |
normalization_norm: 2
|
79 |
normalization_kwargs:
|
80 |
ref: both
|
81 |
level_db: -26.0
|
|
|
|
|
82 |
score_model:
|
83 |
_target_: open_universe.networks.universe.ScoreNetwork
|
84 |
fb_kernel_size: 3
|
@@ -93,9 +89,9 @@ model:
|
|
93 |
encoder_gru_conv_sandwich: false
|
94 |
extra_conv_block: true
|
95 |
decoder_act_type: prelu
|
96 |
-
use_weight_norm:
|
97 |
-
|
98 |
-
|
99 |
condition_model:
|
100 |
_target_: open_universe.networks.universe.ConditionerNetwork
|
101 |
fb_kernel_size: ${model.score_model.fb_kernel_size}
|
@@ -107,7 +103,6 @@ model:
|
|
107 |
extra_conv_block: ${model.score_model.extra_conv_block}
|
108 |
decoder_act_type: prelu
|
109 |
use_weight_norm: ${model.score_model.use_weight_norm}
|
110 |
-
seq_model: ${model.score_model.seq_model}
|
111 |
use_antialiasing: false
|
112 |
diffusion:
|
113 |
schedule: geometric
|
@@ -116,17 +111,39 @@ model:
|
|
116 |
n_steps: 8
|
117 |
epsilon: 1.3
|
118 |
losses:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
weights:
|
|
|
120 |
score: 1.0
|
121 |
-
|
122 |
-
|
123 |
-
mdn_n_comp: 3
|
124 |
-
mdn_alpha_per_sample: true
|
125 |
score_loss:
|
126 |
_target_: torch.nn.MSELoss
|
127 |
training:
|
128 |
audio_len: ${datamodule.datasets.vb-train-16k.audio_len}
|
129 |
-
time_sampling:
|
130 |
dynamic_mixing: false
|
131 |
ema_decay: 0.999
|
132 |
validation:
|
@@ -134,31 +151,52 @@ model:
|
|
134 |
main_loss_mode: max
|
135 |
n_bins: 5
|
136 |
max_enh_batches: 4
|
137 |
-
num_tb_samples: 0
|
138 |
enh_losses:
|
139 |
val/:
|
140 |
_target_: open_universe.metrics.EvalMetrics
|
141 |
audio_fs: ${model.fs}
|
142 |
optimizer:
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
scheduler:
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
grad_clipper:
|
163 |
_target_: open_universe.utils.FixedClipper
|
164 |
max_norm: 1000.0
|
@@ -167,7 +205,7 @@ trainer:
|
|
167 |
accumulate_grad_batches: 1
|
168 |
min_epochs: 1
|
169 |
max_epochs: -1
|
170 |
-
max_steps:
|
171 |
deterministic: warn
|
172 |
accelerator: gpu
|
173 |
devices: -1
|
|
|
36 |
fs: 16000
|
37 |
split: train
|
38 |
audio_len: 2.0
|
|
|
39 |
vb-val-16k:
|
40 |
_target_: open_universe.datasets.NoisyDataset
|
41 |
audio_path: ${..vb-train-16k.audio_path}
|
42 |
fs: ${..vb-train-16k.fs}
|
43 |
split: val
|
44 |
audio_len: null
|
|
|
45 |
vb-test-16k:
|
46 |
_target_: open_universe.datasets.NoisyDataset
|
47 |
audio_path: ${..vb-train-16k.audio_path}
|
48 |
fs: ${..vb-train-16k.fs}
|
49 |
split: test
|
50 |
audio_len: null
|
|
|
51 |
vb-train-24k:
|
52 |
_target_: open_universe.datasets.NoisyDataset
|
53 |
audio_path: data/voicebank_demand/24k
|
54 |
fs: 24000
|
55 |
split: train
|
56 |
audio_len: 2.0
|
|
|
57 |
vb-val-24k:
|
58 |
_target_: open_universe.datasets.NoisyDataset
|
59 |
audio_path: ${..vb-train-24k.audio_path}
|
60 |
fs: ${..vb-train-24k.fs}
|
61 |
split: val
|
62 |
audio_len: null
|
|
|
63 |
vb-test-24k:
|
64 |
_target_: open_universe.datasets.NoisyDataset
|
65 |
audio_path: ${..vb-train-24k.audio_path}
|
66 |
fs: ${..vb-train-24k.fs}
|
67 |
split: test
|
68 |
audio_len: null
|
|
|
69 |
model:
|
70 |
+
_target_: open_universe.networks.universe.UniverseGAN
|
71 |
fs: 16000
|
72 |
normalization_norm: 2
|
73 |
normalization_kwargs:
|
74 |
ref: both
|
75 |
level_db: -26.0
|
76 |
+
edm:
|
77 |
+
noise: 0.25
|
78 |
score_model:
|
79 |
_target_: open_universe.networks.universe.ScoreNetwork
|
80 |
fb_kernel_size: 3
|
|
|
89 |
encoder_gru_conv_sandwich: false
|
90 |
extra_conv_block: true
|
91 |
decoder_act_type: prelu
|
92 |
+
use_weight_norm: true
|
93 |
+
use_antialiasing: true
|
94 |
+
time_embedding: simple
|
95 |
condition_model:
|
96 |
_target_: open_universe.networks.universe.ConditionerNetwork
|
97 |
fb_kernel_size: ${model.score_model.fb_kernel_size}
|
|
|
103 |
extra_conv_block: ${model.score_model.extra_conv_block}
|
104 |
decoder_act_type: prelu
|
105 |
use_weight_norm: ${model.score_model.use_weight_norm}
|
|
|
106 |
use_antialiasing: false
|
107 |
diffusion:
|
108 |
schedule: geometric
|
|
|
111 |
n_steps: 8
|
112 |
epsilon: 1.3
|
113 |
losses:
|
114 |
+
multi_period_discriminator:
|
115 |
+
mpd_reshapes:
|
116 |
+
- 2
|
117 |
+
- 3
|
118 |
+
- 5
|
119 |
+
- 7
|
120 |
+
- 11
|
121 |
+
use_spectral_norm: false
|
122 |
+
discriminator_channel_mult: 1
|
123 |
+
multi_resolution_discriminator:
|
124 |
+
resolutions:
|
125 |
+
- - 1024
|
126 |
+
- 120
|
127 |
+
- 600
|
128 |
+
- - 2048
|
129 |
+
- 240
|
130 |
+
- 1200
|
131 |
+
- - 512
|
132 |
+
- 50
|
133 |
+
- 240
|
134 |
+
use_spectral_norm: false
|
135 |
+
discriminator_channel_mult: 1
|
136 |
+
disc_freeze_step: 0
|
137 |
weights:
|
138 |
+
mel_l1: 45.0
|
139 |
score: 1.0
|
140 |
+
use_signal_decoupling: true
|
141 |
+
signal_decoupling_act: snake
|
|
|
|
|
142 |
score_loss:
|
143 |
_target_: torch.nn.MSELoss
|
144 |
training:
|
145 |
audio_len: ${datamodule.datasets.vb-train-16k.audio_len}
|
146 |
+
time_sampling: time_normal_0.95
|
147 |
dynamic_mixing: false
|
148 |
ema_decay: 0.999
|
149 |
validation:
|
|
|
151 |
main_loss_mode: max
|
152 |
n_bins: 5
|
153 |
max_enh_batches: 4
|
|
|
154 |
enh_losses:
|
155 |
val/:
|
156 |
_target_: open_universe.metrics.EvalMetrics
|
157 |
audio_fs: ${model.fs}
|
158 |
optimizer:
|
159 |
+
accumulate_grad_batches: 1
|
160 |
+
generator:
|
161 |
+
_target_: torch.optim.AdamW
|
162 |
+
lr: 0.0002
|
163 |
+
weight_decay: 0.01
|
164 |
+
betas:
|
165 |
+
- 0.8
|
166 |
+
- 0.99
|
167 |
+
weight_decay_exclude:
|
168 |
+
- prelu
|
169 |
+
- bias
|
170 |
+
discriminator:
|
171 |
+
_target_: torch.optim.AdamW
|
172 |
+
lr: 0.0002
|
173 |
+
betas:
|
174 |
+
- 0.8
|
175 |
+
- 0.99
|
176 |
+
grad_clip_vals:
|
177 |
+
mrd: 1000.0
|
178 |
+
mpd: 1000.0
|
179 |
+
score: 1000.0
|
180 |
+
cond: 1000.0
|
181 |
scheduler:
|
182 |
+
generator:
|
183 |
+
scheduler:
|
184 |
+
_target_: open_universe.utils.schedulers.LinearWarmupCosineAnnealingLR
|
185 |
+
T_warmup: 20000
|
186 |
+
T_cosine: 400000
|
187 |
+
eta_min: 1.6e-06
|
188 |
+
T_max: ${trainer.max_steps}
|
189 |
+
interval: step
|
190 |
+
frequency: 1
|
191 |
+
discriminator:
|
192 |
+
scheduler:
|
193 |
+
_target_: open_universe.utils.schedulers.LinearWarmupCosineAnnealingLR
|
194 |
+
T_warmup: 20000
|
195 |
+
T_cosine: 400000
|
196 |
+
eta_min: 1.6e-06
|
197 |
+
T_max: ${trainer.max_steps}
|
198 |
+
interval: step
|
199 |
+
frequency: 1
|
200 |
grad_clipper:
|
201 |
_target_: open_universe.utils.FixedClipper
|
202 |
max_norm: 1000.0
|
|
|
205 |
accumulate_grad_batches: 1
|
206 |
min_epochs: 1
|
207 |
max_epochs: -1
|
208 |
+
max_steps: 600000
|
209 |
deterministic: warn
|
210 |
accelerator: gpu
|
211 |
devices: -1
|
weights.ckpt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d90ab343c86501a23d5dd0011242d1129ad2f54d8cebec68c55dd387037879c
|
3 |
+
size 1025936580
|