mkiol commited on May 1, 2024

Commit

2b85a29

1 Parent(s): a01d6dd

add mimic3 models

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +32 -0
voices/af_ZA/google-nwu_low/ALIASES +3 -0
voices/af_ZA/google-nwu_low/LICENSE +1 -0
voices/af_ZA/google-nwu_low/README.md +311 -0
voices/af_ZA/google-nwu_low/README.md.in +5 -0
voices/af_ZA/google-nwu_low/SOURCE +1 -0
voices/af_ZA/google-nwu_low/VERSION +1 -0
voices/af_ZA/google-nwu_low/config.json +166 -0
voices/af_ZA/google-nwu_low/generator.onnx +3 -0
voices/af_ZA/google-nwu_low/phonemes.txt +60 -0
voices/af_ZA/google-nwu_low/speaker_map.csv +9 -0
voices/af_ZA/google-nwu_low/speakers.txt +9 -0
voices/bn/multi_low/ALIASES +2 -0
voices/bn/multi_low/README.md +299 -0
voices/bn/multi_low/README.md.in +8 -0
voices/bn/multi_low/SOURCE +2 -0
voices/bn/multi_low/VERSION +1 -0
voices/bn/multi_low/cmu-indic/LICENSE +20 -0
voices/bn/multi_low/cmu-indic/SOURCE +1 -0
voices/bn/multi_low/config.json +154 -0
voices/bn/multi_low/generator.onnx +3 -0
voices/bn/multi_low/google/LICENSE +1 -0
voices/bn/multi_low/google/SOURCE +1 -0
voices/bn/multi_low/phoneme_map.txt +1 -0
voices/bn/multi_low/phonemes.txt +57 -0
voices/bn/multi_low/speaker_map.csv +16 -0
voices/bn/multi_low/speakers.txt +16 -0
voices/de_DE/m-ailabs_low/ALIASES +1 -0
voices/de_DE/m-ailabs_low/LICENSE +8 -0
voices/de_DE/m-ailabs_low/README.md +296 -0
voices/de_DE/m-ailabs_low/README.md.in +5 -0
voices/de_DE/m-ailabs_low/SOURCE +1 -0
voices/de_DE/m-ailabs_low/VERSION +1 -0
voices/de_DE/m-ailabs_low/config.json +151 -0
voices/de_DE/m-ailabs_low/generator.onnx +3 -0
voices/de_DE/m-ailabs_low/phoneme_map.txt +2 -0
voices/de_DE/m-ailabs_low/phonemes.txt +57 -0
voices/de_DE/m-ailabs_low/speaker_map.csv +5 -0
voices/de_DE/m-ailabs_low/speakers.txt +5 -0
voices/de_DE/thorsten-emotion_low/LICENSE +1 -0
voices/de_DE/thorsten-emotion_low/README.md +291 -0
voices/de_DE/thorsten-emotion_low/README.md.in +5 -0
voices/de_DE/thorsten-emotion_low/SOURCE +1 -0
voices/de_DE/thorsten-emotion_low/VERSION +1 -0
voices/de_DE/thorsten-emotion_low/config.json +157 -0
voices/de_DE/thorsten-emotion_low/generator.onnx +3 -0
voices/de_DE/thorsten-emotion_low/phonemes.txt +56 -0
voices/de_DE/thorsten-emotion_low/speaker_map.csv +8 -0
voices/de_DE/thorsten-emotion_low/speakers.txt +8 -0
voices/de_DE/thorsten_low/ALIASES +4 -0

README.md CHANGED Viewed

@@ -1,3 +1,35 @@
 ---
 license: cc-by-sa-4.0
 ---

 ---
 license: cc-by-sa-4.0
+language:
+- af
+- bn
+- de
+- el
+- en
+- en
+- es
+- fa
+- fi
+- fr
+- gu
+- ha
+- hu
+- it
+- jv
+- ko
+- ne
+- nl
+- pl
+- ru
+- sw
+- te
+- tn
+- uk
+- vi
+- yo
 ---
+Voice models for the Mimic 3 text to speech system.
+Original source: https://github.com/MycroftAI/mimic3-voices

voices/af_ZA/google-nwu_low/ALIASES ADDED Viewed

	@@ -0,0 +1,3 @@

+af
+af_ZA
+af_ZA/google-nwu

voices/af_ZA/google-nwu_low/LICENSE ADDED Viewed

	@@ -0,0 +1 @@


1	+ Attribution-ShareAlike 4.0 International (CC BY-SA 4.0)

voices/af_ZA/google-nwu_low/README.md ADDED Viewed

	@@ -0,0 +1,311 @@

+# Afrikaans Google/North West University (Low Quality)
+A multi-speaker model for Afrikaans based on the [Google/NWU dataset](http://www.openslr.org/32/).
+See LICENSE file for license.
+## Phonemes
+<table><thead><th>&nbsp;</th><th>Phoneme</th><th>Description</th></thead>
+<tr>
+<td> 0 </td>
+<td> _ </td>
+<td> padding </td>
+</tr>
+<tr>
+<td> 1 </td>
+<td> ^ </td>
+<td> start utterance </td>
+</tr>
+<tr>
+<td> 2 </td>
+<td> $ </td>
+<td> end utterance </td>
+</tr>
+<tr>
+<td> 3 </td>
+<td> , </td>
+<td> short pause (minor break) </td>
+</tr>
+<tr>
+<td> 4 </td>
+<td> . </td>
+<td> long pause (major break) </td>
+</tr>
+<tr>
+<td> 5 </td>
+<td> # </td>
+<td> word break </td>
+</tr>
+<tr>
+<td> 6 </td>
+<td> ˈ </td>
+<td> primary stress </td>
+</tr>
+<tr>
+<td> 7 </td>
+<td> ˌ </td>
+<td> secondary stress </td>
+</tr>
+<tr>
+<td> 8 </td>
+<td> - </td>
+<td>  </td>
+</tr>
+<tr>
+<td> 9 </td>
+<td> a </td>
+<td> vowel open front unrounded [<a title="Audio sample for vowel open front unrounded " href="../../../phonemes/open_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 10 </td>
+<td> b </td>
+<td> consonant plosive bilabial voiced [<a title="Audio sample for consonant plosive bilabial voiced " href="../../../phonemes/voiced_bilabial_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 11 </td>
+<td> c </td>
+<td> consonant plosive palatal unvoiced [<a title="Audio sample for consonant plosive palatal unvoiced " href="../../../phonemes/voiceless_palatal_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 12 </td>
+<td> d </td>
+<td> consonant plosive alveolar voiced [<a title="Audio sample for consonant plosive alveolar voiced " href="../../../phonemes/voiced_alveolar_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 13 </td>
+<td> e </td>
+<td> vowel close-mid front unrounded [<a title="Audio sample for vowel close-mid front unrounded " href="../../../phonemes/close-mid_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 14 </td>
+<td> f </td>
+<td> consonant fricative labio-dental unvoiced [<a title="Audio sample for consonant fricative labio-dental unvoiced " href="../../../phonemes/voiceless_labiodental_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 15 </td>
+<td> h </td>
+<td> consonant fricative glottal unvoiced [<a title="Audio sample for consonant fricative glottal unvoiced " href="../../../phonemes/voiceless_glottal_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 16 </td>
+<td> i </td>
+<td> vowel close front unrounded [<a title="Audio sample for vowel close front unrounded " href="../../../phonemes/close_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 17 </td>
+<td> j </td>
+<td> consonant approximant palatal voiced [<a title="Audio sample for consonant approximant palatal voiced " href="../../../phonemes/palatal_approximant.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 18 </td>
+<td> k </td>
+<td> consonant plosive velar unvoiced [<a title="Audio sample for consonant plosive velar unvoiced " href="../../../phonemes/voiceless_velar_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 19 </td>
+<td> l </td>
+<td> consonant lateral-approximant alveolar voiced [<a title="Audio sample for consonant lateral-approximant alveolar voiced " href="../../../phonemes/alveolar_lateral_approximant.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 20 </td>
+<td> m </td>
+<td> consonant nasal bilabial voiced [<a title="Audio sample for consonant nasal bilabial voiced " href="../../../phonemes/bilabial_nasal.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 21 </td>
+<td> n </td>
+<td> consonant nasal alveolar voiced [<a title="Audio sample for consonant nasal alveolar voiced " href="../../../phonemes/alveolar_nasal.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 22 </td>
+<td> o </td>
+<td> vowel close-mid back rounded [<a title="Audio sample for vowel close-mid back rounded " href="../../../phonemes/close-mid_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 23 </td>
+<td> p </td>
+<td> consonant plosive bilabial unvoiced [<a title="Audio sample for consonant plosive bilabial unvoiced " href="../../../phonemes/voiceless_bilabial_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 24 </td>
+<td> r </td>
+<td> consonant trill alveolar voiced [<a title="Audio sample for consonant trill alveolar voiced " href="../../../phonemes/alveolar_trill.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 25 </td>
+<td> s </td>
+<td> consonant fricative alveolar unvoiced [<a title="Audio sample for consonant fricative alveolar unvoiced " href="../../../phonemes/voiceless_alveolar_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 26 </td>
+<td> t </td>
+<td> consonant plosive alveolar unvoiced [<a title="Audio sample for consonant plosive alveolar unvoiced " href="../../../phonemes/voiceless_alveolar_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 27 </td>
+<td> u </td>
+<td> vowel close back rounded [<a title="Audio sample for vowel close back rounded " href="../../../phonemes/close_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 28 </td>
+<td> v </td>
+<td> consonant fricative labio-dental voiced [<a title="Audio sample for consonant fricative labio-dental voiced " href="../../../phonemes/voiced_labiodental_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 29 </td>
+<td> w </td>
+<td> consonant approximant bilabial voiced [<a title="Audio sample for consonant approximant bilabial voiced " href="../../../phonemes/voiced_bilabial_approximant.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 30 </td>
+<td> x </td>
+<td> consonant fricative velar unvoiced [<a title="Audio sample for consonant fricative velar unvoiced " href="../../../phonemes/voiceless_velar_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 31 </td>
+<td> y </td>
+<td> vowel close front rounded [<a title="Audio sample for vowel close front rounded " href="../../../phonemes/close_front_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 32 </td>
+<td> z </td>
+<td> consonant fricative alveolar voiced [<a title="Audio sample for consonant fricative alveolar voiced " href="../../../phonemes/voiced_alveolar_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 33 </td>
+<td> æ </td>
+<td> vowel near-open front unrounded [<a title="Audio sample for vowel near-open front unrounded " href="../../../phonemes/near-open_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 34 </td>
+<td> ð </td>
+<td> consonant fricative dental voiced [<a title="Audio sample for consonant fricative dental voiced " href="../../../phonemes/voiced_dental_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 35 </td>
+<td> õ </td>
+<td> vowel close-mid back rounded [<a title="Audio sample for vowel close-mid back rounded " href="../../../phonemes/close-mid_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 36 </td>
+<td> ø </td>
+<td> vowel close-mid front rounded [<a title="Audio sample for vowel close-mid front rounded " href="../../../phonemes/close-mid_front_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 37 </td>
+<td> ŋ </td>
+<td> consonant nasal velar voiced [<a title="Audio sample for consonant nasal velar voiced " href="../../../phonemes/velar_nasal.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 38 </td>
+<td> œ </td>
+<td> vowel open-mid front rounded [<a title="Audio sample for vowel open-mid front rounded " href="../../../phonemes/open-mid_front_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 39 </td>
+<td> ɐ </td>
+<td> vowel near-open central unrounded [<a title="Audio sample for vowel near-open central unrounded " href="../../../phonemes/near-open_central_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 40 </td>
+<td> ɑ </td>
+<td> vowel open back unrounded [<a title="Audio sample for vowel open back unrounded " href="../../../phonemes/open_back_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 41 </td>
+<td> ɑ̃ </td>
+<td> vowel open back unrounded [<a title="Audio sample for vowel open back unrounded " href="../../../phonemes/open_back_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 42 </td>
+<td> ɒ </td>
+<td> vowel open back rounded [<a title="Audio sample for vowel open back rounded " href="../../../phonemes/open_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 43 </td>
+<td> ɔ </td>
+<td> vowel open-mid back rounded [<a title="Audio sample for vowel open-mid back rounded " href="../../../phonemes/open-mid_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 44 </td>
+<td> ə </td>
+<td> vowel mid central unrounded </td>
+</tr>
+<tr>
+<td> 45 </td>
+<td> ɛ </td>
+<td> vowel open-mid front unrounded [<a title="Audio sample for vowel open-mid front unrounded " href="../../../phonemes/open-mid_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 46 </td>
+<td> ɜ </td>
+<td> vowel open-mid central unrounded [<a title="Audio sample for vowel open-mid central unrounded " href="../../../phonemes/open-mid_central_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 47 </td>
+<td> ɡ </td>
+<td> consonant plosive velar voiced [<a title="Audio sample for consonant plosive velar voiced " href="../../../phonemes/voiced_velar_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 48 </td>
+<td> ɪ </td>
+<td> vowel near-close near-front unrounded [<a title="Audio sample for vowel near-close near-front unrounded " href="../../../phonemes/near-close_near-front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 49 </td>
+<td> ɬ </td>
+<td>  </td>
+</tr>
+<tr>
+<td> 50 </td>
+<td> ɹ </td>
+<td> consonant approximant alveolar voiced [<a title="Audio sample for consonant approximant alveolar voiced " href="../../../phonemes/alveolar_approximant.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 51 </td>
+<td> ʁ </td>
+<td> consonant fricative uvular voiced [<a title="Audio sample for consonant fricative uvular voiced " href="../../../phonemes/voiced_uvular_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 52 </td>
+<td> ʃ </td>
+<td> consonant fricative post-alveolar unvoiced [<a title="Audio sample for consonant fricative post-alveolar unvoiced " href="../../../phonemes/voiceless_postalveolar_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 53 </td>
+<td> ʊ </td>
+<td> vowel near-close near-back rounded [<a title="Audio sample for vowel near-close near-back rounded " href="../../../phonemes/near-close_near-back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 54 </td>
+<td> ʌ </td>
+<td> vowel open-mid back unrounded [<a title="Audio sample for vowel open-mid back unrounded " href="../../../phonemes/open-mid_back_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 55 </td>
+<td> ʒ </td>
+<td> consonant fricative post-alveolar voiced [<a title="Audio sample for consonant fricative post-alveolar voiced " href="../../../phonemes/voiced_postalveolar_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 56 </td>
+<td> ʔ </td>
+<td> consonant plosive glottal unvoiced [<a title="Audio sample for consonant plosive glottal unvoiced " href="../../../phonemes/glottal_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 57 </td>
+<td> ː </td>
+<td> elongation </td>
+</tr>
+<tr>
+<td> 58 </td>
+<td> θ </td>
+<td> consonant fricative dental unvoiced [<a title="Audio sample for consonant fricative dental unvoiced " href="../../../phonemes/voiceless_dental_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 59 </td>
+<td> ẽ </td>
+<td> vowel close-mid front unrounded [<a title="Audio sample for vowel close-mid front unrounded " href="../../../phonemes/close-mid_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+</table>

voices/af_ZA/google-nwu_low/README.md.in ADDED Viewed

	@@ -0,0 +1,5 @@

+# Afrikaans Google/North West University (Low Quality)
+A multi-speaker model for Afrikaans based on the [Google/NWU dataset](http://www.openslr.org/32/).
+See LICENSE file for license.

voices/af_ZA/google-nwu_low/SOURCE ADDED Viewed

	@@ -0,0 +1 @@


1	+ http://www.openslr.org/32/

voices/af_ZA/google-nwu_low/VERSION ADDED Viewed

	@@ -0,0 +1 @@


1	+ 0.1.0

voices/af_ZA/google-nwu_low/config.json ADDED Viewed

	@@ -0,0 +1,166 @@

+{
+    "seed": 1234,
+    "epochs": 10000,
+    "learning_rate": 0.0002,
+    "betas": [
+        0.8,
+        0.99
+    ],
+    "eps": 1e-09,
+    "batch_size": 32,
+    "fp16_run": true,
+    "lr_decay": 0.999875,
+    "segment_size": 8192,
+    "init_lr_ratio": 1.0,
+    "warmup_epochs": 0,
+    "c_mel": 45,
+    "c_kl": 1.0,
+    "grad_clip": null,
+    "min_seq_length": null,
+    "max_seq_length": 400,
+    "min_spec_length": null,
+    "max_spec_length": null,
+    "min_speaker_utterances": null,
+    "last_epoch": 1,
+    "global_step": 1,
+    "best_loss": null,
+    "audio": {
+        "filter_length": 1024,
+        "hop_length": 256,
+        "win_length": 1024,
+        "mel_channels": 80,
+        "sample_rate": 22050,
+        "sample_bytes": 2,
+        "channels": 1,
+        "mel_fmin": 0.0,
+        "mel_fmax": null,
+        "ref_level_db": 20.0,
+        "spec_gain": 1.0,
+        "signal_norm": true,
+        "min_level_db": -100.0,
+        "max_norm": 1.0,
+        "clip_norm": true,
+        "symmetric_norm": true,
+        "do_dynamic_range_compression": true,
+        "convert_db_to_amp": true,
+        "do_trim_silence": false,
+        "trim_silence_db": 40.0,
+        "trim_margin_sec": 0.01,
+        "trim_keep_sec": 0.25,
+        "scale_mels": false
+    },
+    "model": {
+        "num_symbols": 60,
+        "n_speakers": 9,
+        "inter_channels": 192,
+        "hidden_channels": 192,
+        "filter_channels": 768,
+        "n_heads": 2,
+        "n_layers": 6,
+        "kernel_size": 3,
+        "p_dropout": 0.1,
+        "resblock": "2",
+        "resblock_kernel_sizes": [
+            3,
+            5,
+            7
+        ],
+        "resblock_dilation_sizes": [
+            [
+                1,
+                2
+            ],
+            [
+                2,
+                6
+            ],
+            [
+                3,
+                12
+            ]
+        ],
+        "upsample_rates": [
+            8,
+            8,
+            4
+        ],
+        "upsample_initial_channel": 256,
+        "upsample_kernel_sizes": [
+            16,
+            16,
+            8
+        ],
+        "n_layers_q": 3,
+        "use_spectral_norm": false,
+        "gin_channels": 512,
+        "use_sdp": true
+    },
+    "phonemes": {
+        "phoneme_separator": "_",
+        "word_separator": "#",
+        "phoneme_to_id": null,
+        "pad": "_",
+        "bos": "^",
+        "eos": "$",
+        "blank": "_",
+        "blank_word": "#",
+        "blank_between": "tokens_and_words",
+        "blank_at_start": true,
+        "blank_at_end": true,
+        "simple_punctuation": true,
+        "punctuation_map": null,
+        "separate": [
+            "\u02c8",
+            "\u02cc"
+        ],
+        "separate_graphemes": false,
+        "separate_tones": false,
+        "tone_before": false,
+        "phoneme_map": {
+            ";": [
+                ","
+            ],
+            ":": [
+                ","
+            ],
+            "!": [
+                "."
+            ],
+            "?": [
+                "."
+            ]
+        },
+        "auto_bos_eos": true,
+        "minor_break": ",",
+        "major_break": ".",
+        "break_phonemes_into_graphemes": true,
+        "break_phonemes_into_codepoints": false,
+        "drop_stress": false,
+        "symbols": null
+    },
+    "text_aligner": {
+        "aligner": null,
+        "casing": null
+    },
+    "text_language": "af",
+    "phonemizer": "espeak",
+    "datasets": [
+        {
+            "name": "af-za_google",
+            "metadata_format": "text",
+            "multispeaker": true,
+            "text_language": null,
+            "audio_dir": "/media/12tb/af-za/google/af_za/za/afr/wavs",
+            "cache_dir": "/media/cache/af-za_google"
+        }
+    ],
+    "inference": {
+        "length_scale": 1.0,
+        "noise_scale": 0.333,
+        "noise_w": 0.333,
+        "major_break_ms": 250,
+        "auto_append_text": "."
+    },
+    "version": 1,
+    "git_commit": ""
+}

voices/af_ZA/google-nwu_low/generator.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8366e03683ea3c15f25ba0163aa1b6e87c1692674cf27b1fb6ba479dbef7e0bb
+size 76351329

voices/af_ZA/google-nwu_low/phonemes.txt ADDED Viewed

	@@ -0,0 +1,60 @@

+0 _
+1 ^
+2 $
+3 ,
+4 .
+5 #
+6 ˈ
+7 ˌ
+8 -
+9 a
+10 b
+11 c
+12 d
+13 e
+14 f
+15 h
+16 i
+17 j
+18 k
+19 l
+20 m
+21 n
+22 o
+23 p
+24 r
+25 s
+26 t
+27 u
+28 v
+29 w
+30 x
+31 y
+32 z
+33 æ
+34 ð
+35 õ
+36 ø
+37 ŋ
+38 œ
+39 ɐ
+40 ɑ
+41 ɑ̃
+42 ɒ
+43 ɔ
+44 ə
+45 ɛ
+46 ɜ
+47 ɡ
+48 ɪ
+49 ɬ
+50 ɹ
+51 ʁ
+52 ʃ
+53 ʊ
+54 ʌ
+55 ʒ
+56 ʔ
+57 ː
+58 θ
+59 ẽ

voices/af_ZA/google-nwu_low/speaker_map.csv ADDED Viewed

	@@ -0,0 +1,9 @@

+0|af-za_google|7214
+1|af-za_google|8963
+2|af-za_google|7130
+3|af-za_google|8924
+4|af-za_google|8148
+5|af-za_google|1919
+6|af-za_google|2418
+7|af-za_google|6590
+8|af-za_google|0184

voices/af_ZA/google-nwu_low/speakers.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+7214
+8963
+7130
+8924
+8148
+1919
+2418
+6590
+0184

voices/bn/multi_low/ALIASES ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ bn
2	+ bn/multi

voices/bn/multi_low/README.md ADDED Viewed

	@@ -0,0 +1,299 @@

+# Bengali multi (Low Quality)
+A multi-speaker model for Bengali based on:
+* [cmu-indic](http://festvox.org/cmu_indic/)
+* [google](http://www.openslr.org/37/)
+See LICENSE files for licenses.
+## Phonemes
+<table><thead><th>&nbsp;</th><th>Phoneme</th><th>Description</th></thead>
+<tr>
+<td> 0 </td>
+<td> _ </td>
+<td> padding </td>
+</tr>
+<tr>
+<td> 1 </td>
+<td> ^ </td>
+<td> start utterance </td>
+</tr>
+<tr>
+<td> 2 </td>
+<td> $ </td>
+<td> end utterance </td>
+</tr>
+<tr>
+<td> 3 </td>
+<td> , </td>
+<td> short pause (minor break) </td>
+</tr>
+<tr>
+<td> 4 </td>
+<td> . </td>
+<td> long pause (major break) </td>
+</tr>
+<tr>
+<td> 5 </td>
+<td> # </td>
+<td> word break </td>
+</tr>
+<tr>
+<td> 6 </td>
+<td> ˈ </td>
+<td> primary stress </td>
+</tr>
+<tr>
+<td> 7 </td>
+<td> ˌ </td>
+<td> secondary stress </td>
+</tr>
+<tr>
+<td> 8 </td>
+<td> a </td>
+<td> vowel open front unrounded [<a title="Audio sample for vowel open front unrounded " href="../../../phonemes/open_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 9 </td>
+<td> b </td>
+<td> consonant plosive bilabial voiced [<a title="Audio sample for consonant plosive bilabial voiced " href="../../../phonemes/voiced_bilabial_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 10 </td>
+<td> c </td>
+<td> consonant plosive palatal unvoiced [<a title="Audio sample for consonant plosive palatal unvoiced " href="../../../phonemes/voiceless_palatal_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 11 </td>
+<td> d </td>
+<td> consonant plosive alveolar voiced [<a title="Audio sample for consonant plosive alveolar voiced " href="../../../phonemes/voiced_alveolar_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 12 </td>
+<td> e </td>
+<td> vowel close-mid front unrounded [<a title="Audio sample for vowel close-mid front unrounded " href="../../../phonemes/close-mid_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 13 </td>
+<td> f </td>
+<td> consonant fricative labio-dental unvoiced [<a title="Audio sample for consonant fricative labio-dental unvoiced " href="../../../phonemes/voiceless_labiodental_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 14 </td>
+<td> h </td>
+<td> consonant fricative glottal unvoiced [<a title="Audio sample for consonant fricative glottal unvoiced " href="../../../phonemes/voiceless_glottal_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 15 </td>
+<td> i </td>
+<td> vowel close front unrounded [<a title="Audio sample for vowel close front unrounded " href="../../../phonemes/close_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 16 </td>
+<td> j </td>
+<td> consonant approximant palatal voiced [<a title="Audio sample for consonant approximant palatal voiced " href="../../../phonemes/palatal_approximant.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 17 </td>
+<td> k </td>
+<td> consonant plosive velar unvoiced [<a title="Audio sample for consonant plosive velar unvoiced " href="../../../phonemes/voiceless_velar_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 18 </td>
+<td> l </td>
+<td> consonant lateral-approximant alveolar voiced [<a title="Audio sample for consonant lateral-approximant alveolar voiced " href="../../../phonemes/alveolar_lateral_approximant.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 19 </td>
+<td> m </td>
+<td> consonant nasal bilabial voiced [<a title="Audio sample for consonant nasal bilabial voiced " href="../../../phonemes/bilabial_nasal.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 20 </td>
+<td> n </td>
+<td> consonant nasal alveolar voiced [<a title="Audio sample for consonant nasal alveolar voiced " href="../../../phonemes/alveolar_nasal.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 21 </td>
+<td> o </td>
+<td> vowel close-mid back rounded [<a title="Audio sample for vowel close-mid back rounded " href="../../../phonemes/close-mid_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 22 </td>
+<td> p </td>
+<td> consonant plosive bilabial unvoiced [<a title="Audio sample for consonant plosive bilabial unvoiced " href="../../../phonemes/voiceless_bilabial_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 23 </td>
+<td> p̃ </td>
+<td> consonant plosive bilabial unvoiced [<a title="Audio sample for consonant plosive bilabial unvoiced " href="../../../phonemes/voiceless_bilabial_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 24 </td>
+<td> r </td>
+<td> consonant trill alveolar voiced [<a title="Audio sample for consonant trill alveolar voiced " href="../../../phonemes/alveolar_trill.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 25 </td>
+<td> s </td>
+<td> consonant fricative alveolar unvoiced [<a title="Audio sample for consonant fricative alveolar unvoiced " href="../../../phonemes/voiceless_alveolar_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 26 </td>
+<td> t </td>
+<td> consonant plosive alveolar unvoiced [<a title="Audio sample for consonant plosive alveolar unvoiced " href="../../../phonemes/voiceless_alveolar_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 27 </td>
+<td> u </td>
+<td> vowel close back rounded [<a title="Audio sample for vowel close back rounded " href="../../../phonemes/close_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 28 </td>
+<td> v </td>
+<td> consonant fricative labio-dental voiced [<a title="Audio sample for consonant fricative labio-dental voiced " href="../../../phonemes/voiced_labiodental_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 29 </td>
+<td> w </td>
+<td> consonant approximant bilabial voiced [<a title="Audio sample for consonant approximant bilabial voiced " href="../../../phonemes/voiced_bilabial_approximant.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 30 </td>
+<td> ã </td>
+<td> vowel open front unrounded [<a title="Audio sample for vowel open front unrounded " href="../../../phonemes/open_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 31 </td>
+<td> æ </td>
+<td> vowel near-open front unrounded [<a title="Audio sample for vowel near-open front unrounded " href="../../../phonemes/near-open_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 32 </td>
+<td> õ </td>
+<td> vowel close-mid back rounded [<a title="Audio sample for vowel close-mid back rounded " href="../../../phonemes/close-mid_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 33 </td>
+<td> ĩ </td>
+<td> vowel close front unrounded [<a title="Audio sample for vowel close front unrounded " href="../../../phonemes/close_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 34 </td>
+<td> ŋ </td>
+<td> consonant nasal velar voiced [<a title="Audio sample for consonant nasal velar voiced " href="../../../phonemes/velar_nasal.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 35 </td>
+<td> ũ </td>
+<td> vowel close back rounded [<a title="Audio sample for vowel close back rounded " href="../../../phonemes/close_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 36 </td>
+<td> ɐ </td>
+<td> vowel near-open central unrounded [<a title="Audio sample for vowel near-open central unrounded " href="../../../phonemes/near-open_central_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 37 </td>
+<td> ɑ </td>
+<td> vowel open back unrounded [<a title="Audio sample for vowel open back unrounded " href="../../../phonemes/open_back_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 38 </td>
+<td> ɒ </td>
+<td> vowel open back rounded [<a title="Audio sample for vowel open back rounded " href="../../../phonemes/open_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 39 </td>
+<td> ɔ </td>
+<td> vowel open-mid back rounded [<a title="Audio sample for vowel open-mid back rounded " href="../../../phonemes/open-mid_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 40 </td>
+<td> ɔ̃ </td>
+<td> vowel open-mid back rounded [<a title="Audio sample for vowel open-mid back rounded " href="../../../phonemes/open-mid_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 41 </td>
+<td> ɖ </td>
+<td> consonant plosive retroflex voiced [<a title="Audio sample for consonant plosive retroflex voiced " href="../../../phonemes/voiced_retroflex_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 42 </td>
+<td> ə </td>
+<td> vowel mid central unrounded </td>
+</tr>
+<tr>
+<td> 43 </td>
+<td> ɛ </td>
+<td> vowel open-mid front unrounded [<a title="Audio sample for vowel open-mid front unrounded " href="../../../phonemes/open-mid_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 44 </td>
+<td> ɜ </td>
+<td> vowel open-mid central unrounded [<a title="Audio sample for vowel open-mid central unrounded " href="../../../phonemes/open-mid_central_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 45 </td>
+<td> ɟ </td>
+<td> consonant plosive palatal voiced [<a title="Audio sample for consonant plosive palatal voiced " href="../../../phonemes/voiced_palatal_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 46 </td>
+<td> ɡ </td>
+<td> consonant plosive velar voiced [<a title="Audio sample for consonant plosive velar voiced " href="../../../phonemes/voiced_velar_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 47 </td>
+<td> ɪ </td>
+<td> vowel near-close near-front unrounded [<a title="Audio sample for vowel near-close near-front unrounded " href="../../../phonemes/near-close_near-front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 48 </td>
+<td> ɹ </td>
+<td> consonant approximant alveolar voiced [<a title="Audio sample for consonant approximant alveolar voiced " href="../../../phonemes/alveolar_approximant.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 49 </td>
+<td> ɾ </td>
+<td> consonant flap alveolar voiced </td>
+</tr>
+<tr>
+<td> 50 </td>
+<td> ʃ </td>
+<td> consonant fricative post-alveolar unvoiced [<a title="Audio sample for consonant fricative post-alveolar unvoiced " href="../../../phonemes/voiceless_postalveolar_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 51 </td>
+<td> ʈ </td>
+<td> consonant plosive retroflex unvoiced [<a title="Audio sample for consonant plosive retroflex unvoiced " href="../../../phonemes/voiceless_retroflex_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 52 </td>
+<td> ʊ </td>
+<td> vowel near-close near-back rounded [<a title="Audio sample for vowel near-close near-back rounded " href="../../../phonemes/near-close_near-back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 53 </td>
+<td> ʒ </td>
+<td> consonant fricative post-alveolar voiced [<a title="Audio sample for consonant fricative post-alveolar voiced " href="../../../phonemes/voiced_postalveolar_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 54 </td>
+<td> ʰ </td>
+<td>  </td>
+</tr>
+<tr>
+<td> 55 </td>
+<td> ː </td>
+<td> elongation </td>
+</tr>
+<tr>
+<td> 56 </td>
+<td> ẽ </td>
+<td> vowel close-mid front unrounded [<a title="Audio sample for vowel close-mid front unrounded " href="../../../phonemes/close-mid_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+</table>

voices/bn/multi_low/README.md.in ADDED Viewed

	@@ -0,0 +1,8 @@

+# Bengali multi (Low Quality)
+A multi-speaker model for Bengali based on:
+* [cmu-indic](http://festvox.org/cmu_indic/)
+* [google](http://www.openslr.org/37/)
+See LICENSE files for licenses.

voices/bn/multi_low/SOURCE ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ http://festvox.org/cmu_indic/
2	+ http://www.openslr.org/37/

voices/bn/multi_low/VERSION ADDED Viewed

	@@ -0,0 +1 @@


1	+ 0.1.0

voices/bn/multi_low/cmu-indic/LICENSE ADDED Viewed

	@@ -0,0 +1,20 @@

+                   Carnegie Mellon University
+                       Copyright (c) 2003
+                      All Rights Reserved.
+Permission to use, copy, modify, and license this software and its
+documentation for any purpose, is hereby granted without fee,
+subject to the following conditions:
+ 1. The code must retain the above copyright notice, this list of
+conditions and the following disclaimer.
+ 2. Any modifications must be clearly marked as such.
+ 3. Original authors' names are not deleted.
+THE AUTHORS OF THIS WORK DISCLAIM ALL WARRANTIES WITH REGARD TO
+THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+AND FITNESS, IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.

voices/bn/multi_low/cmu-indic/SOURCE ADDED Viewed

	@@ -0,0 +1 @@


1	+ http://festvox.org/cmu_indic/

voices/bn/multi_low/config.json ADDED Viewed

	@@ -0,0 +1,154 @@

+{
+    "seed": 1234,
+    "epochs": 10000,
+    "learning_rate": 0.0002,
+    "betas": [
+        0.8,
+        0.99
+    ],
+    "eps": 1e-09,
+    "batch_size": 32,
+    "fp16_run": true,
+    "lr_decay": 0.999875,
+    "segment_size": 8192,
+    "init_lr_ratio": 1.0,
+    "warmup_epochs": 0,
+    "c_mel": 45,
+    "c_kl": 1.0,
+    "grad_clip": null,
+    "min_seq_length": null,
+    "max_seq_length": 400,
+    "min_spec_length": null,
+    "max_spec_length": null,
+    "min_speaker_utterances": null,
+    "last_epoch": 1,
+    "global_step": 1,
+    "best_loss": null,
+    "audio": {
+        "filter_length": 1024,
+        "hop_length": 256,
+        "win_length": 1024,
+        "mel_channels": 80,
+        "sample_rate": 22050,
+        "sample_bytes": 2,
+        "channels": 1,
+        "mel_fmin": 0,
+        "mel_fmax": null,
+        "ref_level_db": 20,
+        "spec_gain": 1,
+        "signal_norm": true,
+        "min_level_db": -100,
+        "max_norm": 1,
+        "clip_norm": true,
+        "symmetric_norm": true,
+        "do_dynamic_range_compression": true,
+        "convert_db_to_amp": true,
+        "do_trim_silence": false,
+        "trim_silence_db": 40,
+        "trim_margin_sec": 0.01,
+        "trim_keep_sec": 0.25,
+        "scale_mels": false
+    },
+    "model": {
+        "num_symbols": 57,
+        "n_speakers": 16,
+        "inter_channels": 192,
+        "hidden_channels": 192,
+        "filter_channels": 768,
+        "n_heads": 2,
+        "n_layers": 6,
+        "kernel_size": 3,
+        "p_dropout": 0.1,
+        "resblock": "2",
+        "resblock_kernel_sizes": [
+            3,
+            5,
+            7
+        ],
+        "resblock_dilation_sizes": [
+            [
+                1,
+                2
+            ],
+            [
+                2,
+                6
+            ],
+            [
+                3,
+                12
+            ]
+        ],
+        "upsample_rates": [
+            8,
+            8,
+            4
+        ],
+        "upsample_initial_channel": 256,
+        "upsample_kernel_sizes": [
+            16,
+            16,
+            8
+        ],
+        "n_layers_q": 3,
+        "use_spectral_norm": false,
+        "gin_channels": 512,
+        "use_sdp": true
+    },
+    "phonemes": {
+        "phoneme_separator": "_",
+        "word_separator": "#",
+        "phoneme_to_id": null,
+        "pad": "_",
+        "bos": "^",
+        "eos": "$",
+        "blank": "_",
+        "blank_word": "#",
+        "blank_between": "tokens_and_words",
+        "blank_at_start": true,
+        "blank_at_end": true,
+        "simple_punctuation": true,
+        "punctuation_map": null,
+        "separate": [
+            "\u02c8",
+            "\u02cc"
+        ],
+        "separate_graphemes": false,
+        "separate_tones": false,
+        "tone_before": false,
+        "phoneme_map": null,
+        "auto_bos_eos": true,
+        "minor_break": ",",
+        "major_break": ".",
+        "break_phonemes_into_graphemes": true,
+        "break_phonemes_into_codepoints": false,
+        "drop_stress": false,
+        "symbols": null
+    },
+    "text_aligner": {
+        "aligner": null,
+        "casing": null
+    },
+    "text_language": "bn",
+    "phonemizer": "espeak",
+    "datasets": [
+        {
+            "name": "bn-multi",
+            "metadata_format": "text",
+            "multispeaker": true,
+            "text_language": null,
+            "audio_dir": "/media/12tb/bn/google/wavs",
+            "cache_dir": "/media/cache/bn-multi"
+        }
+    ],
+    "inference": {
+        "length_scale": 1.0,
+        "noise_scale": 0.333,
+        "noise_w": 0.333,
+        "minor_break_ms": 100,
+        "major_break_ms": 250,
+        "auto_append_text": "."
+    },
+    "version": 1,
+    "git_commit": ""
+}

voices/bn/multi_low/generator.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:115ebb90476abbd2d2828db90b549792e932340d731df885699f2ebdca697ba3
+size 76363361

voices/bn/multi_low/google/LICENSE ADDED Viewed

	@@ -0,0 +1 @@


1	+ Attribution-ShareAlike 4.0 (CC BY-SA 4.0)

voices/bn/multi_low/google/SOURCE ADDED Viewed

	@@ -0,0 +1 @@


1	+ http://www.openslr.org/37/

voices/bn/multi_low/phoneme_map.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ । .

voices/bn/multi_low/phonemes.txt ADDED Viewed

	@@ -0,0 +1,57 @@

+0 _
+1 ^
+2 $
+3 ,
+4 .
+5 #
+6 ˈ
+7 ˌ
+8 a
+9 b
+10 c
+11 d
+12 e
+13 f
+14 h
+15 i
+16 j
+17 k
+18 l
+19 m
+20 n
+21 o
+22 p
+23 p̃
+24 r
+25 s
+26 t
+27 u
+28 v
+29 w
+30 ã
+31 æ
+32 õ
+33 ĩ
+34 ŋ
+35 ũ
+36 ɐ
+37 ɑ
+38 ɒ
+39 ɔ
+40 ɔ̃
+41 ɖ
+42 ə
+43 ɛ
+44 ɜ
+45 ɟ
+46 ɡ
+47 ɪ
+48 ɹ
+49 ɾ
+50 ʃ
+51 ʈ
+52 ʊ
+53 ʒ
+54 ʰ
+55 ː
+56 ẽ

voices/bn/multi_low/speaker_map.csv ADDED Viewed

	@@ -0,0 +1,16 @@

+0|bn-multi|rm
+1|bn-multi|03042
+2|bn-multi|00737
+3|bn-multi|01232
+4|bn-multi|02194
+5|bn-multi|3108
+6|bn-multi|3713
+7|bn-multi|1010
+8|bn-multi|00779
+9|bn-multi|9169
+10|bn-multi|4046
+11|bn-multi|5958
+12|bn-multi|01701
+13|bn-multi|4811
+14|bn-multi|0834
+15|bn-multi|3958

voices/bn/multi_low/speakers.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+rm
+03042
+00737
+01232
+02194
+3108
+3713
+1010
+00779
+9169
+4046
+5958
+01701
+4811
+0834
+3958

voices/de_DE/m-ailabs_low/ALIASES ADDED Viewed

	@@ -0,0 +1 @@


1	+ de_DE/m-ailabs

voices/de_DE/m-ailabs_low/LICENSE ADDED Viewed

	@@ -0,0 +1,8 @@

+Copyright (c) 2017-2019 by the original creators @ M-AILABS with the following license:
+Redistribution and use in any form, including any commercial use, with or without modification are permitted provided that the following conditions are met:
+    Redistributions of source data must retain the above copyright notice, this list of conditions and the following disclaimer.
+    Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this downloaded data, source-code or binary-code without specific prior written permission.
+THIS DATA IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE and/or DATA, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

voices/de_DE/m-ailabs_low/README.md ADDED Viewed

	@@ -0,0 +1,296 @@

+# German M-AILabs (Low Quality)
+A multi-speaker model for German based on the [M-AILabs dataset](https://www.caito.de/2019/01/03/the-m-ailabs-speech-dataset/).
+See LICENSE file for license.
+## Phonemes
+<table><thead><th>&nbsp;</th><th>Phoneme</th><th>Description</th></thead>
+<tr>
+<td> 0 </td>
+<td> _ </td>
+<td> padding </td>
+</tr>
+<tr>
+<td> 1 </td>
+<td> ^ </td>
+<td> start utterance </td>
+</tr>
+<tr>
+<td> 2 </td>
+<td> $ </td>
+<td> end utterance </td>
+</tr>
+<tr>
+<td> 3 </td>
+<td> · </td>
+<td> silence </td>
+</tr>
+<tr>
+<td> 4 </td>
+<td> # </td>
+<td> word break </td>
+</tr>
+<tr>
+<td> 5 </td>
+<td> ˈ </td>
+<td> primary stress </td>
+</tr>
+<tr>
+<td> 6 </td>
+<td> ˌ </td>
+<td> secondary stress </td>
+</tr>
+<tr>
+<td> 7 </td>
+<td> a </td>
+<td> vowel open front unrounded [<a title="Audio sample for vowel open front unrounded " href="../../../phonemes/open_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 8 </td>
+<td> aɪ̯ </td>
+<td> dipthong </td>
+</tr>
+<tr>
+<td> 9 </td>
+<td> aʊ̯ </td>
+<td> dipthong </td>
+</tr>
+<tr>
+<td> 10 </td>
+<td> aː </td>
+<td> vowel open front unrounded [<a title="Audio sample for vowel open front unrounded " href="../../../phonemes/open_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 11 </td>
+<td> b </td>
+<td> consonant plosive bilabial voiced [<a title="Audio sample for consonant plosive bilabial voiced " href="../../../phonemes/voiced_bilabial_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 12 </td>
+<td> d </td>
+<td> consonant plosive alveolar voiced [<a title="Audio sample for consonant plosive alveolar voiced " href="../../../phonemes/voiced_alveolar_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 13 </td>
+<td> d͡ʒ </td>
+<td> consonant affricate post-alveolar voiced [<a title="Audio sample for consonant affricate post-alveolar voiced " href="../../../phonemes/voiced_postalveolar_affricate.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 14 </td>
+<td> eː </td>
+<td> vowel close-mid front unrounded [<a title="Audio sample for vowel close-mid front unrounded " href="../../../phonemes/close-mid_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 15 </td>
+<td> f </td>
+<td> consonant fricative labio-dental unvoiced [<a title="Audio sample for consonant fricative labio-dental unvoiced " href="../../../phonemes/voiceless_labiodental_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 16 </td>
+<td> g </td>
+<td> consonant plosive velar voiced [<a title="Audio sample for consonant plosive velar voiced " href="../../../phonemes/voiced_velar_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 17 </td>
+<td> h </td>
+<td> consonant fricative glottal unvoiced [<a title="Audio sample for consonant fricative glottal unvoiced " href="../../../phonemes/voiceless_glottal_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 18 </td>
+<td> iː </td>
+<td> vowel close front unrounded [<a title="Audio sample for vowel close front unrounded " href="../../../phonemes/close_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 19 </td>
+<td> j </td>
+<td> consonant approximant palatal voiced [<a title="Audio sample for consonant approximant palatal voiced " href="../../../phonemes/palatal_approximant.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 20 </td>
+<td> k </td>
+<td> consonant plosive velar unvoiced [<a title="Audio sample for consonant plosive velar unvoiced " href="../../../phonemes/voiceless_velar_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 21 </td>
+<td> l </td>
+<td> consonant lateral-approximant alveolar voiced [<a title="Audio sample for consonant lateral-approximant alveolar voiced " href="../../../phonemes/alveolar_lateral_approximant.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 22 </td>
+<td> m </td>
+<td> consonant nasal bilabial voiced [<a title="Audio sample for consonant nasal bilabial voiced " href="../../../phonemes/bilabial_nasal.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 23 </td>
+<td> n </td>
+<td> consonant nasal alveolar voiced [<a title="Audio sample for consonant nasal alveolar voiced " href="../../../phonemes/alveolar_nasal.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 24 </td>
+<td> oː </td>
+<td> vowel close-mid back rounded [<a title="Audio sample for vowel close-mid back rounded " href="../../../phonemes/close-mid_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 25 </td>
+<td> p </td>
+<td> consonant plosive bilabial unvoiced [<a title="Audio sample for consonant plosive bilabial unvoiced " href="../../../phonemes/voiceless_bilabial_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 26 </td>
+<td> p͡f </td>
+<td> consonant affricate labio-dental unvoiced </td>
+</tr>
+<tr>
+<td> 27 </td>
+<td> s </td>
+<td> consonant fricative alveolar unvoiced [<a title="Audio sample for consonant fricative alveolar unvoiced " href="../../../phonemes/voiceless_alveolar_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 28 </td>
+<td> t </td>
+<td> consonant plosive alveolar unvoiced [<a title="Audio sample for consonant plosive alveolar unvoiced " href="../../../phonemes/voiceless_alveolar_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 29 </td>
+<td> t͡s </td>
+<td> consonant affricate alveolar unvoiced [<a title="Audio sample for consonant affricate alveolar unvoiced " href="../../../phonemes/voiceless_alveolar_affricate.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 30 </td>
+<td> t͡ʃ </td>
+<td> consonant affricate post-alveolar unvoiced [<a title="Audio sample for consonant affricate post-alveolar unvoiced " href="../../../phonemes/voiceless_postalveolar_affricate.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 31 </td>
+<td> uː </td>
+<td> vowel close back rounded [<a title="Audio sample for vowel close back rounded " href="../../../phonemes/close_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 32 </td>
+<td> v </td>
+<td> consonant fricative labio-dental voiced [<a title="Audio sample for consonant fricative labio-dental voiced " href="../../../phonemes/voiced_labiodental_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 33 </td>
+<td> x </td>
+<td> consonant fricative velar unvoiced [<a title="Audio sample for consonant fricative velar unvoiced " href="../../../phonemes/voiceless_velar_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 34 </td>
+<td> yː </td>
+<td> vowel close front rounded [<a title="Audio sample for vowel close front rounded " href="../../../phonemes/close_front_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 35 </td>
+<td> z </td>
+<td> consonant fricative alveolar voiced [<a title="Audio sample for consonant fricative alveolar voiced " href="../../../phonemes/voiced_alveolar_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 36 </td>
+<td> ãː </td>
+<td> vowel open front unrounded [<a title="Audio sample for vowel open front unrounded " href="../../../phonemes/open_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 37 </td>
+<td> ç </td>
+<td> consonant fricative palatal unvoiced [<a title="Audio sample for consonant fricative palatal unvoiced " href="../../../phonemes/voiceless_palatal_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 38 </td>
+<td> õː </td>
+<td> vowel close-mid back rounded [<a title="Audio sample for vowel close-mid back rounded " href="../../../phonemes/close-mid_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 39 </td>
+<td> øː </td>
+<td> vowel close-mid front rounded [<a title="Audio sample for vowel close-mid front rounded " href="../../../phonemes/close-mid_front_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 40 </td>
+<td> ŋ </td>
+<td> consonant nasal velar voiced [<a title="Audio sample for consonant nasal velar voiced " href="../../../phonemes/velar_nasal.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 41 </td>
+<td> œ </td>
+<td> vowel open-mid front rounded [<a title="Audio sample for vowel open-mid front rounded " href="../../../phonemes/open-mid_front_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 42 </td>
+<td> ɐ </td>
+<td> vowel near-open central unrounded [<a title="Audio sample for vowel near-open central unrounded " href="../../../phonemes/near-open_central_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 43 </td>
+<td> ɔ </td>
+<td> vowel open-mid back rounded [<a title="Audio sample for vowel open-mid back rounded " href="../../../phonemes/open-mid_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 44 </td>
+<td> ɔʏ̯ </td>
+<td> dipthong </td>
+</tr>
+<tr>
+<td> 45 </td>
+<td> ə </td>
+<td> vowel mid central unrounded </td>
+</tr>
+<tr>
+<td> 46 </td>
+<td> ɛ </td>
+<td> vowel open-mid front unrounded [<a title="Audio sample for vowel open-mid front unrounded " href="../../../phonemes/open-mid_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 47 </td>
+<td> ɛː </td>
+<td> vowel open-mid front unrounded [<a title="Audio sample for vowel open-mid front unrounded " href="../../../phonemes/open-mid_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 48 </td>
+<td> ɛ̃ː </td>
+<td> vowel open-mid front unrounded [<a title="Audio sample for vowel open-mid front unrounded " href="../../../phonemes/open-mid_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 49 </td>
+<td> ɪ </td>
+<td> vowel near-close near-front unrounded [<a title="Audio sample for vowel near-close near-front unrounded " href="../../../phonemes/near-close_near-front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 50 </td>
+<td> ʁ </td>
+<td> consonant fricative uvular voiced [<a title="Audio sample for consonant fricative uvular voiced " href="../../../phonemes/voiced_uvular_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 51 </td>
+<td> ʃ </td>
+<td> consonant fricative post-alveolar unvoiced [<a title="Audio sample for consonant fricative post-alveolar unvoiced " href="../../../phonemes/voiceless_postalveolar_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 52 </td>
+<td> ʊ </td>
+<td> vowel near-close near-back rounded [<a title="Audio sample for vowel near-close near-back rounded " href="../../../phonemes/near-close_near-back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 53 </td>
+<td> ʏ </td>
+<td> vowel near-close near-front rounded [<a title="Audio sample for vowel near-close near-front rounded " href="../../../phonemes/near-close_near-front_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 54 </td>
+<td> ʒ </td>
+<td> consonant fricative post-alveolar voiced [<a title="Audio sample for consonant fricative post-alveolar voiced " href="../../../phonemes/voiced_postalveolar_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 55 </td>
+<td> ʔ </td>
+<td> consonant plosive glottal unvoiced [<a title="Audio sample for consonant plosive glottal unvoiced " href="../../../phonemes/glottal_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 56 </td>
+<td> χ </td>
+<td> consonant fricative uvular unvoiced [<a title="Audio sample for consonant fricative uvular unvoiced " href="../../../phonemes/voiceless_uvular_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+</table>

voices/de_DE/m-ailabs_low/README.md.in ADDED Viewed

	@@ -0,0 +1,5 @@

+# German M-AILabs (Low Quality)
+A multi-speaker model for German based on the [M-AILabs dataset](https://www.caito.de/2019/01/03/the-m-ailabs-speech-dataset/).
+See LICENSE file for license.

voices/de_DE/m-ailabs_low/SOURCE ADDED Viewed

	@@ -0,0 +1 @@


1	+ https://www.caito.de/2019/01/03/the-m-ailabs-speech-dataset/

voices/de_DE/m-ailabs_low/VERSION ADDED Viewed

	@@ -0,0 +1 @@


1	+ 0.1.0

voices/de_DE/m-ailabs_low/config.json ADDED Viewed

	@@ -0,0 +1,151 @@

+{
+    "seed": 1234,
+    "epochs": 10000,
+    "learning_rate": 0.0002,
+    "betas": [
+        0.8,
+        0.99
+    ],
+    "eps": 1e-09,
+    "batch_size": 32,
+    "fp16_run": true,
+    "lr_decay": 0.999875,
+    "segment_size": 8192,
+    "init_lr_ratio": 1.0,
+    "warmup_epochs": 0,
+    "c_mel": 45,
+    "c_kl": 1.0,
+    "grad_clip": null,
+    "min_seq_length": null,
+    "max_seq_length": 400,
+    "min_spec_length": null,
+    "max_spec_length": null,
+    "min_speaker_utterances": null,
+    "last_epoch": 1,
+    "global_step": 1,
+    "best_loss": null,
+    "audio": {
+        "filter_length": 1024,
+        "hop_length": 256,
+        "win_length": 1024,
+        "mel_channels": 80,
+        "sample_rate": 22050,
+        "sample_bytes": 2,
+        "channels": 1,
+        "mel_fmin": 0,
+        "mel_fmax": null,
+        "ref_level_db": 20,
+        "spec_gain": 1,
+        "signal_norm": true,
+        "min_level_db": -100,
+        "max_norm": 1,
+        "clip_norm": true,
+        "symmetric_norm": true,
+        "do_dynamic_range_compression": true,
+        "convert_db_to_amp": true,
+        "do_trim_silence": false,
+        "trim_silence_db": 40,
+        "trim_margin_sec": 0.01,
+        "trim_keep_sec": 0.25,
+        "scale_mels": false
+    },
+    "model": {
+        "num_symbols": 57,
+        "n_speakers": 5,
+        "inter_channels": 192,
+        "hidden_channels": 192,
+        "filter_channels": 768,
+        "n_heads": 2,
+        "n_layers": 6,
+        "kernel_size": 3,
+        "p_dropout": 0.1,
+        "resblock": "2",
+        "resblock_kernel_sizes": [
+            3,
+            5,
+            7
+        ],
+        "resblock_dilation_sizes": [
+            [
+                1,
+                2
+            ],
+            [
+                2,
+                6
+            ],
+            [
+                3,
+                12
+            ]
+        ],
+        "upsample_rates": [
+            8,
+            8,
+            4
+        ],
+        "upsample_initial_channel": 256,
+        "upsample_kernel_sizes": [
+            16,
+            16,
+            8
+        ],
+        "n_layers_q": 3,
+        "use_spectral_norm": false,
+        "gin_channels": 512,
+        "use_sdp": true
+    },
+    "phonemes": {
+        "phoneme_separator": "_",
+        "word_separator": "#",
+        "phoneme_to_id": null,
+        "pad": "_",
+        "bos": "^",
+        "eos": "$",
+        "blank": "_",
+        "blank_word": "#",
+        "blank_between": "tokens_and_words",
+        "blank_at_start": true,
+        "blank_at_end": true,
+        "simple_punctuation": true,
+        "punctuation_map": null,
+        "separate": [
+            "\u02c8",
+            "\u02cc"
+        ],
+        "separate_graphemes": false,
+        "separate_tones": false,
+        "tone_before": false,
+        "phoneme_map": null,
+        "auto_bos_eos": true,
+        "minor_break": "\u00b7",
+        "major_break": null,
+        "break_phonemes_into_graphemes": false,
+        "drop_stress": false,
+        "symbols": null
+    },
+    "text_aligner": {
+        "aligner": "kaldi_align",
+        "casing": "lower"
+    },
+    "text_language": "de-de",
+    "phonemizer": "gruut",
+    "datasets": [
+        {
+            "name": "m-ailabs",
+            "metadata_format": "text",
+            "multispeaker": true,
+            "text_language": null,
+            "audio_dir": "/media/12tb/de-de/m-ai-labs/de_DE",
+            "cache_dir": "/media/cache/m-ailabs/de_DE"
+        }
+    ],
+    "inference": {
+        "length_scale": 1.0,
+        "noise_scale": 0.333,
+        "noise_w": 0.333,
+        "auto_append_text": "."
+    },
+    "version": 1,
+    "git_commit": ""
+}

voices/de_DE/m-ailabs_low/generator.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3330372429b25fe3a38b10bbe914862a49b2cd0a58da332bbe30fa123035a067
+size 76340831

voices/de_DE/m-ailabs_low/phoneme_map.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ \| ·
2	+ ‖ · ·

voices/de_DE/m-ailabs_low/phonemes.txt ADDED Viewed

	@@ -0,0 +1,57 @@

+0 _
+1 ^
+2 $
+3 ·
+4 #
+5 ˈ
+6 ˌ
+7 a
+8 aɪ̯
+9 aʊ̯
+10 aː
+11 b
+12 d
+13 d͡ʒ
+14 eː
+15 f
+16 g
+17 h
+18 iː
+19 j
+20 k
+21 l
+22 m
+23 n
+24 oː
+25 p
+26 p͡f
+27 s
+28 t
+29 t͡s
+30 t͡ʃ
+31 uː
+32 v
+33 x
+34 yː
+35 z
+36 ãː
+37 ç
+38 õː
+39 øː
+40 ŋ
+41 œ
+42 ɐ
+43 ɔ
+44 ɔʏ̯
+45 ə
+46 ɛ
+47 ɛː
+48 ɛ̃ː
+49 ɪ
+50 ʁ
+51 ʃ
+52 ʊ
+53 ʏ
+54 ʒ
+55 ʔ
+56 χ

voices/de_DE/m-ailabs_low/speaker_map.csv ADDED Viewed

	@@ -0,0 +1,5 @@

+0|m-ailabs|ramona_deininger
+1|m-ailabs|karlsson
+2|m-ailabs|rebecca_braunert_plunkett
+3|m-ailabs|eva_k
+4|m-ailabs|angela_merkel

voices/de_DE/m-ailabs_low/speakers.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+ramona_deininger
+karlsson
+rebecca_braunert_plunkett
+eva_k
+angela_merkel

voices/de_DE/thorsten-emotion_low/LICENSE ADDED Viewed

	@@ -0,0 +1 @@


1	+ Creative Commons (CC0) Licence

voices/de_DE/thorsten-emotion_low/README.md ADDED Viewed

	@@ -0,0 +1,291 @@

+# German Thorsten Emotion (Low Quality)
+A single-speaker model for German based on the [Thorsten Emotional dataset](http://www.openslr.org/110/).
+See LICENSE file for license.
+## Phonemes
+<table><thead><th>&nbsp;</th><th>Phoneme</th><th>Description</th></thead>
+<tr>
+<td> 0 </td>
+<td> _ </td>
+<td> padding </td>
+</tr>
+<tr>
+<td> 1 </td>
+<td> ^ </td>
+<td> start utterance </td>
+</tr>
+<tr>
+<td> 2 </td>
+<td> $ </td>
+<td> end utterance </td>
+</tr>
+<tr>
+<td> 3 </td>
+<td> | </td>
+<td> short pause (minor break) </td>
+</tr>
+<tr>
+<td> 4 </td>
+<td> # </td>
+<td> word break </td>
+</tr>
+<tr>
+<td> 5 </td>
+<td> ˈ </td>
+<td> primary stress </td>
+</tr>
+<tr>
+<td> 6 </td>
+<td> ˌ </td>
+<td> secondary stress </td>
+</tr>
+<tr>
+<td> 7 </td>
+<td> a </td>
+<td> vowel open front unrounded [<a title="Audio sample for vowel open front unrounded " href="../../../phonemes/open_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 8 </td>
+<td> aɪ̯ </td>
+<td> dipthong </td>
+</tr>
+<tr>
+<td> 9 </td>
+<td> aʊ̯ </td>
+<td> dipthong </td>
+</tr>
+<tr>
+<td> 10 </td>
+<td> aː </td>
+<td> vowel open front unrounded [<a title="Audio sample for vowel open front unrounded " href="../../../phonemes/open_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 11 </td>
+<td> b </td>
+<td> consonant plosive bilabial voiced [<a title="Audio sample for consonant plosive bilabial voiced " href="../../../phonemes/voiced_bilabial_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 12 </td>
+<td> d </td>
+<td> consonant plosive alveolar voiced [<a title="Audio sample for consonant plosive alveolar voiced " href="../../../phonemes/voiced_alveolar_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 13 </td>
+<td> eː </td>
+<td> vowel close-mid front unrounded [<a title="Audio sample for vowel close-mid front unrounded " href="../../../phonemes/close-mid_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 14 </td>
+<td> f </td>
+<td> consonant fricative labio-dental unvoiced [<a title="Audio sample for consonant fricative labio-dental unvoiced " href="../../../phonemes/voiceless_labiodental_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 15 </td>
+<td> g </td>
+<td> consonant plosive velar voiced [<a title="Audio sample for consonant plosive velar voiced " href="../../../phonemes/voiced_velar_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 16 </td>
+<td> h </td>
+<td> consonant fricative glottal unvoiced [<a title="Audio sample for consonant fricative glottal unvoiced " href="../../../phonemes/voiceless_glottal_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 17 </td>
+<td> iː </td>
+<td> vowel close front unrounded [<a title="Audio sample for vowel close front unrounded " href="../../../phonemes/close_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 18 </td>
+<td> j </td>
+<td> consonant approximant palatal voiced [<a title="Audio sample for consonant approximant palatal voiced " href="../../../phonemes/palatal_approximant.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 19 </td>
+<td> k </td>
+<td> consonant plosive velar unvoiced [<a title="Audio sample for consonant plosive velar unvoiced " href="../../../phonemes/voiceless_velar_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 20 </td>
+<td> l </td>
+<td> consonant lateral-approximant alveolar voiced [<a title="Audio sample for consonant lateral-approximant alveolar voiced " href="../../../phonemes/alveolar_lateral_approximant.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 21 </td>
+<td> m </td>
+<td> consonant nasal bilabial voiced [<a title="Audio sample for consonant nasal bilabial voiced " href="../../../phonemes/bilabial_nasal.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 22 </td>
+<td> n </td>
+<td> consonant nasal alveolar voiced [<a title="Audio sample for consonant nasal alveolar voiced " href="../../../phonemes/alveolar_nasal.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 23 </td>
+<td> oː </td>
+<td> vowel close-mid back rounded [<a title="Audio sample for vowel close-mid back rounded " href="../../../phonemes/close-mid_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 24 </td>
+<td> p </td>
+<td> consonant plosive bilabial unvoiced [<a title="Audio sample for consonant plosive bilabial unvoiced " href="../../../phonemes/voiceless_bilabial_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 25 </td>
+<td> p͡f </td>
+<td> consonant affricate labio-dental unvoiced </td>
+</tr>
+<tr>
+<td> 26 </td>
+<td> s </td>
+<td> consonant fricative alveolar unvoiced [<a title="Audio sample for consonant fricative alveolar unvoiced " href="../../../phonemes/voiceless_alveolar_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 27 </td>
+<td> t </td>
+<td> consonant plosive alveolar unvoiced [<a title="Audio sample for consonant plosive alveolar unvoiced " href="../../../phonemes/voiceless_alveolar_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 28 </td>
+<td> t͡s </td>
+<td> consonant affricate alveolar unvoiced [<a title="Audio sample for consonant affricate alveolar unvoiced " href="../../../phonemes/voiceless_alveolar_affricate.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 29 </td>
+<td> t͡ʃ </td>
+<td> consonant affricate post-alveolar unvoiced [<a title="Audio sample for consonant affricate post-alveolar unvoiced " href="../../../phonemes/voiceless_postalveolar_affricate.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 30 </td>
+<td> uː </td>
+<td> vowel close back rounded [<a title="Audio sample for vowel close back rounded " href="../../../phonemes/close_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 31 </td>
+<td> v </td>
+<td> consonant fricative labio-dental voiced [<a title="Audio sample for consonant fricative labio-dental voiced " href="../../../phonemes/voiced_labiodental_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 32 </td>
+<td> x </td>
+<td> consonant fricative velar unvoiced [<a title="Audio sample for consonant fricative velar unvoiced " href="../../../phonemes/voiceless_velar_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 33 </td>
+<td> yː </td>
+<td> vowel close front rounded [<a title="Audio sample for vowel close front rounded " href="../../../phonemes/close_front_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 34 </td>
+<td> z </td>
+<td> consonant fricative alveolar voiced [<a title="Audio sample for consonant fricative alveolar voiced " href="../../../phonemes/voiced_alveolar_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 35 </td>
+<td> ãː </td>
+<td> vowel open front unrounded [<a title="Audio sample for vowel open front unrounded " href="../../../phonemes/open_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 36 </td>
+<td> ç </td>
+<td> consonant fricative palatal unvoiced [<a title="Audio sample for consonant fricative palatal unvoiced " href="../../../phonemes/voiceless_palatal_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 37 </td>
+<td> õː </td>
+<td> vowel close-mid back rounded [<a title="Audio sample for vowel close-mid back rounded " href="../../../phonemes/close-mid_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 38 </td>
+<td> øː </td>
+<td> vowel close-mid front rounded [<a title="Audio sample for vowel close-mid front rounded " href="../../../phonemes/close-mid_front_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 39 </td>
+<td> ŋ </td>
+<td> consonant nasal velar voiced [<a title="Audio sample for consonant nasal velar voiced " href="../../../phonemes/velar_nasal.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 40 </td>
+<td> œ </td>
+<td> vowel open-mid front rounded [<a title="Audio sample for vowel open-mid front rounded " href="../../../phonemes/open-mid_front_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 41 </td>
+<td> ɐ </td>
+<td> vowel near-open central unrounded [<a title="Audio sample for vowel near-open central unrounded " href="../../../phonemes/near-open_central_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 42 </td>
+<td> ɔ </td>
+<td> vowel open-mid back rounded [<a title="Audio sample for vowel open-mid back rounded " href="../../../phonemes/open-mid_back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 43 </td>
+<td> ɔʏ̯ </td>
+<td> dipthong </td>
+</tr>
+<tr>
+<td> 44 </td>
+<td> ə </td>
+<td> vowel mid central unrounded </td>
+</tr>
+<tr>
+<td> 45 </td>
+<td> ɛ </td>
+<td> vowel open-mid front unrounded [<a title="Audio sample for vowel open-mid front unrounded " href="../../../phonemes/open-mid_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 46 </td>
+<td> ɛː </td>
+<td> vowel open-mid front unrounded [<a title="Audio sample for vowel open-mid front unrounded " href="../../../phonemes/open-mid_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 47 </td>
+<td> ɛ̃ː </td>
+<td> vowel open-mid front unrounded [<a title="Audio sample for vowel open-mid front unrounded " href="../../../phonemes/open-mid_front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 48 </td>
+<td> ɪ </td>
+<td> vowel near-close near-front unrounded [<a title="Audio sample for vowel near-close near-front unrounded " href="../../../phonemes/near-close_near-front_unrounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 49 </td>
+<td> ʁ </td>
+<td> consonant fricative uvular voiced [<a title="Audio sample for consonant fricative uvular voiced " href="../../../phonemes/voiced_uvular_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 50 </td>
+<td> ʃ </td>
+<td> consonant fricative post-alveolar unvoiced [<a title="Audio sample for consonant fricative post-alveolar unvoiced " href="../../../phonemes/voiceless_postalveolar_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 51 </td>
+<td> ʊ </td>
+<td> vowel near-close near-back rounded [<a title="Audio sample for vowel near-close near-back rounded " href="../../../phonemes/near-close_near-back_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 52 </td>
+<td> ʏ </td>
+<td> vowel near-close near-front rounded [<a title="Audio sample for vowel near-close near-front rounded " href="../../../phonemes/near-close_near-front_rounded_vowel.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 53 </td>
+<td> ʒ </td>
+<td> consonant fricative post-alveolar voiced [<a title="Audio sample for consonant fricative post-alveolar voiced " href="../../../phonemes/voiced_postalveolar_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 54 </td>
+<td> ʔ </td>
+<td> consonant plosive glottal unvoiced [<a title="Audio sample for consonant plosive glottal unvoiced " href="../../../phonemes/glottal_plosive.wav?raw=true">Sample</a>] </td>
+</tr>
+<tr>
+<td> 55 </td>
+<td> χ </td>
+<td> consonant fricative uvular unvoiced [<a title="Audio sample for consonant fricative uvular unvoiced " href="../../../phonemes/voiceless_uvular_fricative.wav?raw=true">Sample</a>] </td>
+</tr>
+</table>

voices/de_DE/thorsten-emotion_low/README.md.in ADDED Viewed

	@@ -0,0 +1,5 @@

+# German Thorsten Emotion (Low Quality)
+A single-speaker model for German based on the [Thorsten Emotional dataset](http://www.openslr.org/110/).
+See LICENSE file for license.

voices/de_DE/thorsten-emotion_low/SOURCE ADDED Viewed

	@@ -0,0 +1 @@


1	+ http://www.openslr.org/110/

voices/de_DE/thorsten-emotion_low/VERSION ADDED Viewed

	@@ -0,0 +1 @@


1	+ 0.1.0

voices/de_DE/thorsten-emotion_low/config.json ADDED Viewed

	@@ -0,0 +1,157 @@

+{
+    "seed": 1234,
+    "epochs": 10000,
+    "learning_rate": 0.0002,
+    "betas": [
+        0.8,
+        0.99
+    ],
+    "eps": 1e-09,
+    "batch_size": 32,
+    "fp16_run": true,
+    "lr_decay": 0.999875,
+    "segment_size": 8192,
+    "init_lr_ratio": 1.0,
+    "warmup_epochs": 0,
+    "c_mel": 45,
+    "c_kl": 1.0,
+    "grad_clip": null,
+    "min_seq_length": null,
+    "max_seq_length": 400,
+    "min_spec_length": null,
+    "max_spec_length": null,
+    "min_speaker_utterances": null,
+    "last_epoch": 1,
+    "global_step": 1,
+    "best_loss": null,
+    "audio": {
+        "filter_length": 1024,
+        "hop_length": 256,
+        "win_length": 1024,
+        "mel_channels": 80,
+        "sample_rate": 22050,
+        "sample_bytes": 2,
+        "channels": 1,
+        "mel_fmin": 0.0,
+        "mel_fmax": null,
+        "ref_level_db": 20.0,
+        "spec_gain": 1.0,
+        "signal_norm": true,
+        "min_level_db": -100.0,
+        "max_norm": 1.0,
+        "clip_norm": true,
+        "symmetric_norm": true,
+        "do_dynamic_range_compression": true,
+        "convert_db_to_amp": true,
+        "do_trim_silence": false,
+        "trim_silence_db": 40.0,
+        "trim_margin_sec": 0.01,
+        "trim_keep_sec": 0.25,
+        "scale_mels": false
+    },
+    "model": {
+        "num_symbols": 56,
+        "n_speakers": 8,
+        "inter_channels": 192,
+        "hidden_channels": 192,
+        "filter_channels": 768,
+        "n_heads": 2,
+        "n_layers": 6,
+        "kernel_size": 3,
+        "p_dropout": 0.1,
+        "resblock": "2",
+        "resblock_kernel_sizes": [
+            3,
+            5,
+            7
+        ],
+        "resblock_dilation_sizes": [
+            [
+                1,
+                2
+            ],
+            [
+                2,
+                6
+            ],
+            [
+                3,
+                12
+            ]
+        ],
+        "upsample_rates": [
+            8,
+            8,
+            4
+        ],
+        "upsample_initial_channel": 256,
+        "upsample_kernel_sizes": [
+            16,
+            16,
+            8
+        ],
+        "n_layers_q": 3,
+        "use_spectral_norm": false,
+        "gin_channels": 512,
+        "use_sdp": true
+    },
+    "phonemes": {
+        "phoneme_separator": "_",
+        "word_separator": "#",
+        "phoneme_to_id": null,
+        "pad": "_",
+        "bos": "^",
+        "eos": "$",
+        "blank": "_",
+        "blank_word": "#",
+        "blank_between": "tokens_and_words",
+        "blank_at_start": true,
+        "blank_at_end": true,
+        "simple_punctuation": true,
+        "punctuation_map": null,
+        "separate": [
+            "\u02c8",
+            "\u02cc"
+        ],
+        "separate_graphemes": false,
+        "separate_tones": false,
+        "tone_before": false,
+        "phoneme_map": {
+            "\u2016": [
+                "|",
+                "|"
+            ]
+        },
+        "auto_bos_eos": true,
+        "minor_break": "|",
+        "major_break": null,
+        "break_phonemes_into_graphemes": false,
+        "break_phonemes_into_codepoints": false,
+        "drop_stress": false,
+        "symbols": null
+    },
+    "text_aligner": {
+        "aligner": null,
+        "casing": null
+    },
+    "text_language": "de_DE",
+    "phonemizer": "gruut",
+    "datasets": [
+        {
+            "name": "thorsten_emotion",
+            "metadata_format": "text",
+            "multispeaker": true,
+            "text_language": null,
+            "audio_dir": "/media/12tb/de-de/thorsten-emotional_v02",
+            "cache_dir": "/media/cache/thorsten_emotion"
+        }
+    ],
+    "inference": {
+        "length_scale": 1.0,
+        "noise_scale": 0.667,
+        "noise_w": 0.8,
+        "auto_append_text": "."
+    },
+    "version": 1,
+    "git_commit": ""
+}

voices/de_DE/thorsten-emotion_low/generator.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a2588308d23e51874f6c87dd9651fce2375302f4b26bdb98dfe125547d283a5
+size 76346209

voices/de_DE/thorsten-emotion_low/phonemes.txt ADDED Viewed

	@@ -0,0 +1,56 @@

+0 _
+1 ^
+2 $
+3 |
+4 #
+5 ˈ
+6 ˌ
+7 a
+8 aɪ̯
+9 aʊ̯
+10 aː
+11 b
+12 d
+13 eː
+14 f
+15 g
+16 h
+17 iː
+18 j
+19 k
+20 l
+21 m
+22 n
+23 oː
+24 p
+25 p͡f
+26 s
+27 t
+28 t͡s
+29 t͡ʃ
+30 uː
+31 v
+32 x
+33 yː
+34 z
+35 ãː
+36 ç
+37 õː
+38 øː
+39 ŋ
+40 œ
+41 ɐ
+42 ɔ
+43 ɔʏ̯
+44 ə
+45 ɛ
+46 ɛː
+47 ɛ̃ː
+48 ɪ
+49 ʁ
+50 ʃ
+51 ʊ
+52 ʏ
+53 ʒ
+54 ʔ
+55 χ

voices/de_DE/thorsten-emotion_low/speaker_map.csv ADDED Viewed

	@@ -0,0 +1,8 @@

+0|thorsten_emotion|amused
+1|thorsten_emotion|angry
+2|thorsten_emotion|disgusted
+3|thorsten_emotion|drunk
+4|thorsten_emotion|neutral
+5|thorsten_emotion|sleepy
+6|thorsten_emotion|surprised
+7|thorsten_emotion|whisper

voices/de_DE/thorsten-emotion_low/speakers.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+amused
+angry
+disgusted
+drunk
+neutral
+sleepy
+surprised
+whisper

voices/de_DE/thorsten_low/ALIASES ADDED Viewed

	@@ -0,0 +1,4 @@

+de
+de_DE
+thorsten
+de_DE/thorsten