{ "train": { "log_interval": 100, "eval_interval": 10000, "seed": 1234, "port": 8001, "epochs": 10000, "learning_rate": 0.0002, "betas": [ 0.8, 0.99 ], "eps": 1e-09, "batch_size": 12, "accumulation_steps": 1, "fp16_run": false, "lr_decay": 0.998, "segment_size": 10240, "init_lr_ratio": 1, "warmup_epochs": 0, "c_mel": 45, "keep_ckpts": 6 }, "data": { "data_dir": "dataset", "dataset_type": "SingDataset", "collate_type": "SingCollate", "training_filelist": "filelists/train.txt", "validation_filelist": "filelists/val.txt", "max_wav_value": 32768.0, "sampling_rate": 44100, "n_fft": 2048, "fmin": 0, "fmax": 22050, "hop_length": 512, "win_size": 2048, "acoustic_dim": 80, "c_dim": 256, "min_level_db": -115, "ref_level_db": 20, "min_db": -115, "max_abs_value": 4.0, "n_speakers": 200 }, "model": { "hidden_channels": 192, "spk_channels": 192, "filter_channels": 768, "n_heads": 2, "n_layers": 4, "kernel_size": 3, "p_dropout": 0.1, "prior_hidden_channels": 192, "prior_filter_channels": 768, "prior_n_heads": 2, "prior_n_layers": 4, "prior_kernel_size": 3, "prior_p_dropout": 0.1, "resblock": "1", "use_spectral_norm": false, "resblock_kernel_sizes": [3,7,11], "resblock_dilation_sizes": [[1,3,5],[1,3,5],[1,3,5]], "upsample_rates": [8,8,4,2], "upsample_initial_channel": 256, "upsample_kernel_sizes": [16,16,8,4], "n_harmonic": 64, "n_bands": 65 }, "spk": { "suijiSUI": 0 } }