Spaces:
No application file
No application file
File size: 1,349 Bytes
a6df73d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
from fish_diffusion.utils.pitch import pitch_to_scale
sampling_rate = 44100
mel_channels = 128
hidden_size = 256
model = dict(
type="DiffSVC",
diffusion=dict(
type="GaussianDiffusion",
mel_channels=mel_channels,
noise_schedule="linear",
timesteps=1000,
max_beta=0.01,
s=0.008,
noise_loss="smoothed-l1",
denoiser=dict(
type="WaveNetDenoiser",
mel_channels=mel_channels,
d_encoder=hidden_size,
residual_channels=512,
residual_layers=20,
),
spec_stats_path="dataset/stats.json",
sampler_interval=10,
),
text_encoder=dict(
type="NaiveProjectionEncoder",
input_size=256,
output_size=hidden_size,
),
speaker_encoder=dict(
type="NaiveProjectionEncoder",
input_size=10,
output_size=hidden_size,
use_embedding=True,
),
pitch_encoder=dict(
type="NaiveProjectionEncoder",
input_size=1,
output_size=hidden_size,
use_embedding=False,
preprocessing=pitch_to_scale,
),
vocoder=dict(
type="NsfHifiGAN",
checkpoint_path="checkpoints/nsf_hifigan/model",
sampling_rate=sampling_rate,
mel_channels=mel_channels,
use_natural_log=True,
),
)
|