feature_extractor: class_path: vocos.feature_extractors.MelSpectrogramFeatures init_args: sample_rate: 32000 n_fft: 2048 hop_length: 1024 n_mels: 128 padding: center backbone: class_path: vocos.models.VocosBackbone init_args: input_channels: 128 dim: 512 intermediate_dim: 1536 num_layers: 8 head: class_path: vocos.heads.ISTFTHead init_args: dim: 512 n_fft: 2048 hop_length: 1024 padding: center