|
from dataclasses import dataclass, field |
|
|
|
from TTS.vocoder.configs.shared_configs import BaseVocoderConfig |
|
from TTS.vocoder.models.wavegrad import WavegradArgs |
|
|
|
|
|
@dataclass |
|
class WavegradConfig(BaseVocoderConfig): |
|
"""Defines parameters for WaveGrad vocoder. |
|
Example: |
|
|
|
>>> from TTS.vocoder.configs import WavegradConfig |
|
>>> config = WavegradConfig() |
|
|
|
Args: |
|
model (str): |
|
Model name used for selecting the right model at initialization. Defaults to `wavegrad`. |
|
generator_model (str): One of the generators from TTS.vocoder.models.*`. Every other non-GAN vocoder model is |
|
considered as a generator too. Defaults to `wavegrad`. |
|
model_params (WavegradArgs): Model parameters. Check `WavegradArgs` for default values. |
|
target_loss (str): |
|
Target loss name that defines the quality of the model. Defaults to `avg_wavegrad_loss`. |
|
epochs (int): |
|
Number of epochs to traing the model. Defaults to 10000. |
|
batch_size (int): |
|
Batch size used at training. Larger values use more memory. Defaults to 96. |
|
seq_len (int): |
|
Audio segment length used at training. Larger values use more memory. Defaults to 6144. |
|
use_cache (bool): |
|
enable / disable in memory caching of the computed features. It can cause OOM error if the system RAM is |
|
not large enough. Defaults to True. |
|
mixed_precision (bool): |
|
enable / disable mixed precision training. Default is True. |
|
eval_split_size (int): |
|
Number of samples used for evalutaion. Defaults to 50. |
|
train_noise_schedule (dict): |
|
Training noise schedule. Defaults to |
|
`{"min_val": 1e-6, "max_val": 1e-2, "num_steps": 1000}` |
|
test_noise_schedule (dict): |
|
Inference noise schedule. For a better performance, you may need to use `bin/tune_wavegrad.py` to find a |
|
better schedule. Defaults to |
|
` |
|
{ |
|
"min_val": 1e-6, |
|
"max_val": 1e-2, |
|
"num_steps": 50, |
|
} |
|
` |
|
grad_clip (float): |
|
Gradient clipping threshold. If <= 0.0, no clipping is applied. Defaults to 1.0 |
|
lr (float): |
|
Initila leraning rate. Defaults to 1e-4. |
|
lr_scheduler (str): |
|
One of the learning rate schedulers from `torch.optim.scheduler.*`. Defaults to `MultiStepLR`. |
|
lr_scheduler_params (dict): |
|
kwargs for the scheduler. Defaults to `{"gamma": 0.5, "milestones": [100000, 200000, 300000, 400000, 500000, 600000]}` |
|
""" |
|
|
|
model: str = "wavegrad" |
|
|
|
generator_model: str = "wavegrad" |
|
model_params: WavegradArgs = field(default_factory=WavegradArgs) |
|
target_loss: str = "loss" |
|
|
|
|
|
epochs: int = 10000 |
|
batch_size: int = 96 |
|
seq_len: int = 6144 |
|
use_cache: bool = True |
|
mixed_precision: bool = True |
|
eval_split_size: int = 50 |
|
|
|
|
|
train_noise_schedule: dict = field(default_factory=lambda: {"min_val": 1e-6, "max_val": 1e-2, "num_steps": 1000}) |
|
|
|
test_noise_schedule: dict = field( |
|
default_factory=lambda: { |
|
"min_val": 1e-6, |
|
"max_val": 1e-2, |
|
"num_steps": 50, |
|
} |
|
) |
|
|
|
|
|
grad_clip: float = 1.0 |
|
lr: float = 1e-4 |
|
lr_scheduler: str = "MultiStepLR" |
|
lr_scheduler_params: dict = field( |
|
default_factory=lambda: {"gamma": 0.5, "milestones": [100000, 200000, 300000, 400000, 500000, 600000]} |
|
) |
|
|