mkiol
add mimic3 models
2b85a29
{
"seed": 1234,
"epochs": 10000,
"learning_rate": 0.0002,
"betas": [
0.8,
0.99
],
"eps": 1e-09,
"batch_size": 32,
"fp16_run": true,
"lr_decay": 0.999875,
"segment_size": 8192,
"init_lr_ratio": 1.0,
"warmup_epochs": 0,
"c_mel": 45,
"c_kl": 1.0,
"grad_clip": null,
"min_seq_length": null,
"max_seq_length": 400,
"min_spec_length": null,
"max_spec_length": null,
"min_speaker_utterances": null,
"last_epoch": 1,
"global_step": 1,
"best_loss": null,
"audio": {
"filter_length": 1024,
"hop_length": 256,
"win_length": 1024,
"mel_channels": 80,
"sample_rate": 22050,
"sample_bytes": 2,
"channels": 1,
"mel_fmin": 0.0,
"mel_fmax": null,
"ref_level_db": 20.0,
"spec_gain": 1.0,
"signal_norm": true,
"min_level_db": -100.0,
"max_norm": 1.0,
"clip_norm": true,
"symmetric_norm": true,
"do_dynamic_range_compression": true,
"convert_db_to_amp": true,
"do_trim_silence": false,
"trim_silence_db": 40.0,
"trim_margin_sec": 0.01,
"trim_keep_sec": 0.25,
"scale_mels": false
},
"model": {
"num_symbols": 56,
"n_speakers": 1,
"inter_channels": 192,
"hidden_channels": 192,
"filter_channels": 768,
"n_heads": 2,
"n_layers": 6,
"kernel_size": 3,
"p_dropout": 0.1,
"resblock": "2",
"resblock_kernel_sizes": [
3,
5,
7
],
"resblock_dilation_sizes": [
[
1,
2
],
[
2,
6
],
[
3,
12
]
],
"upsample_rates": [
8,
8,
4
],
"upsample_initial_channel": 256,
"upsample_kernel_sizes": [
16,
16,
8
],
"n_layers_q": 3,
"use_spectral_norm": false,
"gin_channels": 0,
"use_sdp": true
},
"phonemes": {
"phoneme_separator": "_",
"word_separator": "#",
"phoneme_to_id": null,
"pad": "_",
"bos": "^",
"eos": "$",
"blank": "_",
"blank_word": "#",
"blank_between": "tokens_and_words",
"blank_at_start": true,
"blank_at_end": true,
"simple_punctuation": true,
"punctuation_map": null,
"separate": [
"\u02c8",
"\u02cc"
],
"separate_graphemes": false,
"separate_tones": false,
"tone_before": false,
"phoneme_map": null,
"auto_bos_eos": true,
"minor_break": "\u00b7",
"major_break": null
},
"text_aligner": {
"aligner": "kaldi_align",
"casing": "lower"
},
"text_language": "de_DE",
"phonemizer": "gruut",
"inference": {
"auto_append_text": "."
},
"datasets": [
{
"name": "thorsten_neutral",
"metadata_format": "text",
"multispeaker": false,
"text_language": null,
"audio_dir": "/media/12tb/de-de/Thorsten-Voice-Neutral-Dec2021-22kHz/wavs",
"cache_dir": "/media/cache/thorsten_neutral"
},
{
"name": "thorsten_original",
"metadata_format": "text",
"multispeaker": false,
"text_language": null,
"audio_dir": "/media/12tb/de-de/thorsten-de/wavs",
"cache_dir": "/media/cache/thorsten_original"
}
],
"version": 1,
"git_commit": ""
}