File size: 5,174 Bytes
f0d6f7d 9b6704c f0d6f7d 9b6704c f0d6f7d 9b6704c f0d6f7d 9b6704c f0d6f7d 9b6704c f0d6f7d 9b6704c f0d6f7d 9b6704c f0d6f7d 9b6704c f0d6f7d 9b6704c f0d6f7d 9b6704c f0d6f7d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
seed: 46762398
ckpt_path: null
train: true
test: false
path:
exp_root: exp
figures: figures
version_base: null
datamodule:
_target_: open_universe.datasets.DataModule
train:
dataset: vb-train-16k
dl_opts:
pin_memory: true
num_workers: 6
shuffle: true
batch_size: 10
val:
dataset: vb-val-16k
dl_opts:
pin_memory: true
num_workers: 4
shuffle: false
batch_size: 1
test:
dataset: vb-test-16k
dl_opts:
pin_memory: true
num_workers: 4
shuffle: false
batch_size: 1
datasets:
vb-train-16k:
_target_: open_universe.datasets.NoisyDataset
audio_path: data/voicebank_demand/16k
fs: 16000
split: train
audio_len: 2.0
vb-val-16k:
_target_: open_universe.datasets.NoisyDataset
audio_path: ${..vb-train-16k.audio_path}
fs: ${..vb-train-16k.fs}
split: val
audio_len: null
vb-test-16k:
_target_: open_universe.datasets.NoisyDataset
audio_path: ${..vb-train-16k.audio_path}
fs: ${..vb-train-16k.fs}
split: test
audio_len: null
vb-train-24k:
_target_: open_universe.datasets.NoisyDataset
audio_path: data/voicebank_demand/24k
fs: 24000
split: train
audio_len: 2.0
vb-val-24k:
_target_: open_universe.datasets.NoisyDataset
audio_path: ${..vb-train-24k.audio_path}
fs: ${..vb-train-24k.fs}
split: val
audio_len: null
vb-test-24k:
_target_: open_universe.datasets.NoisyDataset
audio_path: ${..vb-train-24k.audio_path}
fs: ${..vb-train-24k.fs}
split: test
audio_len: null
model:
_target_: open_universe.networks.universe.UniverseGAN
fs: 16000
normalization_norm: 2
normalization_kwargs:
ref: both
level_db: -26.0
edm:
noise: 0.25
score_model:
_target_: open_universe.networks.universe.ScoreNetwork
fb_kernel_size: 3
rate_factors:
- 2
- 4
- 4
- 5
n_channels: 32
n_rff: 32
noise_cond_dim: 512
encoder_gru_conv_sandwich: false
extra_conv_block: true
decoder_act_type: prelu
use_weight_norm: true
use_antialiasing: true
time_embedding: simple
condition_model:
_target_: open_universe.networks.universe.ConditionerNetwork
fb_kernel_size: ${model.score_model.fb_kernel_size}
rate_factors: ${model.score_model.rate_factors}
n_channels: ${model.score_model.n_channels}
n_mels: 80
n_mel_oversample: 4
encoder_gru_residual: true
extra_conv_block: ${model.score_model.extra_conv_block}
decoder_act_type: prelu
use_weight_norm: ${model.score_model.use_weight_norm}
use_antialiasing: false
diffusion:
schedule: geometric
sigma_min: 0.0005
sigma_max: 5.0
n_steps: 8
epsilon: 1.3
losses:
multi_period_discriminator:
mpd_reshapes:
- 2
- 3
- 5
- 7
- 11
use_spectral_norm: false
discriminator_channel_mult: 1
multi_resolution_discriminator:
resolutions:
- - 1024
- 120
- 600
- - 2048
- 240
- 1200
- - 512
- 50
- 240
use_spectral_norm: false
discriminator_channel_mult: 1
disc_freeze_step: 0
weights:
mel_l1: 45.0
score: 1.0
use_signal_decoupling: true
signal_decoupling_act: snake
score_loss:
_target_: torch.nn.MSELoss
training:
audio_len: ${datamodule.datasets.vb-train-16k.audio_len}
time_sampling: time_normal_0.95
dynamic_mixing: false
ema_decay: 0.999
validation:
main_loss: val/pesq
main_loss_mode: max
n_bins: 5
max_enh_batches: 4
enh_losses:
val/:
_target_: open_universe.metrics.EvalMetrics
audio_fs: ${model.fs}
optimizer:
accumulate_grad_batches: 1
generator:
_target_: torch.optim.AdamW
lr: 0.0002
weight_decay: 0.01
betas:
- 0.8
- 0.99
weight_decay_exclude:
- prelu
- bias
discriminator:
_target_: torch.optim.AdamW
lr: 0.0002
betas:
- 0.8
- 0.99
grad_clip_vals:
mrd: 1000.0
mpd: 1000.0
score: 1000.0
cond: 1000.0
scheduler:
generator:
scheduler:
_target_: open_universe.utils.schedulers.LinearWarmupCosineAnnealingLR
T_warmup: 20000
T_cosine: 400000
eta_min: 1.6e-06
T_max: ${trainer.max_steps}
interval: step
frequency: 1
discriminator:
scheduler:
_target_: open_universe.utils.schedulers.LinearWarmupCosineAnnealingLR
T_warmup: 20000
T_cosine: 400000
eta_min: 1.6e-06
T_max: ${trainer.max_steps}
interval: step
frequency: 1
grad_clipper:
_target_: open_universe.utils.FixedClipper
max_norm: 1000.0
trainer:
_target_: pytorch_lightning.Trainer
accumulate_grad_batches: 1
min_epochs: 1
max_epochs: -1
max_steps: 600000
deterministic: warn
accelerator: gpu
devices: -1
strategy: ddp_find_unused_parameters_true
check_val_every_n_epoch: null
val_check_interval: 5000
default_root_dir: .
profiler: false
|