style-tts-2

Running

App Files Files Community

Pendrokar commited on Oct 26, 2024

Commit

d665bf1

1 Parent(s): d1178af

lojban ipa support

Browse files

Files changed (3) hide show

app.py +3 -1
lojban.py +354 -0
styletts2importable.py +12 -4

app.py CHANGED Viewed

@@ -139,7 +139,7 @@ with gr.Blocks() as vctk:
             voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-2', interactive=True)
             lang =gr.Dropdown(
                 [
-                    ['English', 'en-us'],
                     ['Czech (Non-native)', 'cs'],
                     ['Danish (Non-native)', 'da'],
                     ['Dutch (Non-native)', 'nl'],
@@ -157,6 +157,8 @@ with gr.Blocks() as vctk:
                     ['Spanish (Non-native)', 'es'],
                     ['Swedish (Non-native)', 'sv'],
                     ['Turkish (Non-native)', 'tr'],
                 ],
                 label="Language",
             )

             voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-2', interactive=True)
             lang =gr.Dropdown(
                 [
+                    ['English (US)', 'en-us'],
                     ['Czech (Non-native)', 'cs'],
                     ['Danish (Non-native)', 'da'],
                     ['Dutch (Non-native)', 'nl'],
                     ['Spanish (Non-native)', 'es'],
                     ['Swedish (Non-native)', 'sv'],
                     ['Turkish (Non-native)', 'tr'],
+                    # artificial
+                    ['Lojban', 'jb'],
                 ],
                 label="Language",
             )

lojban.py ADDED Viewed

	@@ -0,0 +1,354 @@

+# credits: gleki
+from __future__ import annotations
+import sys
+import os
+from re import sub, compile
+from itertools import islice
+def krulermorna(text: str) -> str:
+    text = sub(r"\.", "", text)
+    text = sub(r"^", ".", text)
+    text = sub(r"u([aeiouy])", r"w\1", text)
+    text = sub(r"i([aeiouy])", r"ɩ\1", text)
+    text = sub(r"au", "ḁ", text)
+    text = sub(r"ai", "ą", text)
+    text = sub(r"ei", "ę", text)
+    text = sub(r"oi", "ǫ", text)
+    text = sub(r"\.", "", text)
+    return text
+def krulermornaize(words: list[str]) -> list[str]:
+    return [krulermorna(word) for word in words]
+ipa_vits = {
+    "a$": 'aː',
+    "a": 'aː',
+    # "e(?=v)": 'ɛːʔ',
+    # "e$": 'ɛːʔ',
+    "e": 'ɛː',
+    "i": 'iː',
+    "o": 'oː',
+    "u": 'ʊu',
+    # "u": 'ʊː',
+    "y": 'əː',
+    "ą": 'aɪ',
+    "ę": 'ɛɪ',
+    # "ę(?=\b)(?!')": 'ɛɪʔ',
+    "ǫ": 'ɔɪ',
+    "ḁ": 'aʊ',
+    "ɩa": 'jaː',
+    "ɩe": 'jɛː',
+    "ɩi": 'jiː',
+    "ɩo": 'jɔː',
+    "ɩu": 'juː',
+    "ɩy": 'jəː',
+    "ɩ": 'j',
+    "wa": 'waː',
+    "we": 'wɛː',
+    "wi": 'wiː',
+    "wo": 'wɔː',
+    "wu": 'wuː',
+    "wy": 'wəː',
+    "w": 'w',
+    "c": 'ʃ',
+    # "bj": 'bʒ',
+    "j": 'ʒ',
+    "s": 's',
+    "z": 'z',
+    "f": 'f',
+    "v": 'v',
+    "x": 'hhh',
+    "'": 'h',
+    # "dj":'dʒ',
+    # "tc":'tʃ',
+    # "dz":'ʣ',
+    # "ts":'ʦ',
+    'r': 'ɹ',
+    'r(?![ˈaeiouyḁąęǫ])': 'ɹɹ',
+    # 'r(?=[ˈaeiouyḁąęǫ])': 'ɹ',
+    "nˈu": 'nˈʊuː',
+    "nu": 'nʊuː',
+    "ng": 'n.g',
+    "n": 'n',
+    "m": 'm',
+    "l": 'l',
+    "b": 'b',
+    "d": 'd',
+    "g": 'ɡ',
+    "k": 'k',
+    "p": 'p',
+    "t": 't',
+    "h": 'h'
+}
+ipa_nix = {
+    "a$": 'aː',
+    "a": 'aː',
+    # "e(?=v)": 'ɛːʔ',
+    # "e$": 'ɛːʔ',
+    "e": 'ɛː',
+    "i": 'iː',
+    "o": 'oː',
+    "u": 'ʊu',
+    # "u": 'ʊː',
+    "y": 'əː',
+    "ą": 'aɪ',
+    "ę": 'ɛɪ',
+    # "ę(?=\b)(?!')": 'ɛɪʔ',
+    "ǫ": 'ɔɪ',
+    "ḁ": 'aʊ',
+    "ɩa": 'jaː',
+    "ɩe": 'jɛː',
+    "ɩi": 'jiː',
+    "ɩo": 'jɔː',
+    "ɩu": 'juː',
+    "ɩy": 'jəː',
+    "ɩ": 'j',
+    "wa": 'waː',
+    "we": 'wɛː',
+    "wi": 'wiː',
+    "wo": 'wɔː',
+    "wu": 'wuː',
+    "wy": 'wəː',
+    "w": 'w',
+    "c": 'ʃ',
+    "gj": 'gɪʒ',
+    "bj": 'bɪʒ',
+    "j": 'ʒ',
+    "s": 's',
+    "z": 'z',
+    "f": 'f',
+    "v": 'v',
+    "x": 'hh',
+    "'": 'h',
+    # "dj":'dʒ',
+    # "tc":'tʃ',
+    # "dz":'ʣ',
+    # "ts":'ʦ',
+    'r': 'ɹ',
+    'r(?![ˈaeiouyḁąęǫ])': 'ɹɹɹɪ',
+    # 'r(?=[ˈaeiouyḁąęǫ])': 'ɹ',
+    "nˈu": 'nˈʊuː',
+    "nu": 'nʊuː',
+    "ng": 'ng',
+    "n": 'n',
+    "m": 'm',
+    "l": 'l',
+    "b": 'b',
+    "d": 'd',
+    "g": 'ɡ',
+    "k": 'k',
+    "p": 'p',
+    "t": 't',
+    "h": 'h'
+}
+vowel_pattern = compile("[aeiouyąęǫḁ]")
+vowel_coming_pattern = compile("(?=[aeiouyąęǫḁ])")
+diphthong_coming_pattern = compile("(?=[ąęǫḁ])")
+question_words = krulermornaize(["ma", "mo", "xu"])
+starter_words = krulermornaize(["le", "lo", "lei", "loi"])
+terminator_words = krulermornaize(["kei", "ku'o", "vau", "li'u"])
+def lojban2ipa(text: str, mode: str) -> str:
+    if mode == 'vits':
+        return lojban2ipa_vits(text)
+    if mode == 'nix':
+        return lojban2ipa_nix(text)
+    return lojban2ipa_vits(text)
+def lojban2ipa_vits(text: str) -> str:
+    text = krulermorna(text.strip())
+    words = text.split(' ')
+    rebuilt_words = []
+    question_sentence = False
+    for index, word in enumerate([*words]):
+        modified_word = word
+        prefix, postfix = "", ""
+        if word in question_words:
+            postfix = "?"
+            prefix=" " + prefix
+            # question_sentence = True
+        if word in starter_words:
+            prefix=" " + prefix
+            # question_sentence = True
+        if word in terminator_words:
+            postfix = ", "
+        # if not vowel_pattern.match(word[-1:][0]):
+        #     postfix += "ʔ"
+        #     # cmevla
+        #     if not vowel_pattern.match(word[0]):
+        #         prefix += "ʔ"
+        # if vowel_pattern.match(word[0]):
+        #     prefix = "ʔ" + prefix
+        if index == 0 or word in ["ni'o", "i"]:
+            prefix = ", " + prefix
+        split_word = vowel_coming_pattern.split(word)
+        tail_word = split_word[-2:]
+        # add stress to {klama}, {ni'o}
+        if len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[0][0])) and bool(vowel_pattern.match(tail_word[1][0])):
+            head_word = split_word[:-2]
+            modified_word = "".join(head_word) + "ˈ" + "".join(tail_word)
+            # prefix=" " + prefix
+            # add a pause after two-syllable words
+            postfix = postfix + " "
+        # add stress to {lau}, {coi}
+        elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(diphthong_coming_pattern.match(tail_word[1][0])):
+            head_word = split_word[:-2]
+            modified_word = "".join(head_word) + tail_word[0] + "ˈ" + tail_word[1]
+            # prefix=" " + prefix
+            postfix = postfix + " "
+        # add stress to {le}
+        # elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[1][0])):
+        #     head_word = split_word[:-2]
+        #     modified_word = "".join(head_word) + tail_word[0] + "ˈ" + tail_word[1]+" "
+        #     postfix =postfix +" "
+        # add a pause even after a cmavo
+        if not (index - 1 >= 0 and words[index-1] in starter_words):
+            prefix = " " + prefix
+        # # add a pause before {.alis}
+        # if bool(vowel_pattern.match(word[0])):
+        #     word = ", " + word
+        """
+        for each letter: if the slice matches then convert the letter
+        """
+        rebuilt_word = ""
+        lit = enumerate([*modified_word])
+        for idx, x in lit:
+            tail = modified_word[idx:]
+            matched = False
+            consumed = 1
+            for attr, val in sorted(ipa_vits.items(), key=lambda x: len(str(x[0])), reverse=True):
+                pattern = compile("^"+attr)
+                matches = pattern.findall(tail)
+                if len(matches)>0:
+                    match = matches[0]
+                    consumed = len(match)
+                    rebuilt_word += val
+                    matched = True
+                    break
+            if not matched:
+                rebuilt_word += x
+            [next(lit, None) for _ in range(consumed - 1)]
+        rebuilt_words.append(prefix+rebuilt_word+postfix)
+    output = "".join(rebuilt_words).strip()
+    output = sub(r" {2,}", " ", output)
+    output = sub(r", ?(?=,)", "", output)
+    if question_sentence == True:
+        output += "?"
+    elif bool(vowel_pattern.match(text[-1:][0])):
+        output += "."
+    return output
+def lojban2ipa_nix(text: str) -> str:
+    text = krulermorna(text.strip())
+    words = text.split(' ')
+    rebuilt_words = []
+    question_sentence = False
+    for index, word in enumerate([*words]):
+        modified_word = word
+        prefix, postfix = "", ""
+        if word in question_words:
+            # postfix = "?"
+            prefix=" " + prefix
+            # question_sentence = True
+        if word in starter_words:
+            prefix=" " + prefix
+            # question_sentence = True
+        if word in terminator_words:
+            postfix = ", "
+        # if not vowel_pattern.match(word[-1:][0]):
+        #     postfix += "ʔ"
+        #     # cmevla
+        #     if not vowel_pattern.match(word[0]):
+        #         prefix += "ʔ"
+        # if vowel_pattern.match(word[0]):
+        #     prefix = "ʔ" + prefix
+        if index == 0 or word in ["ni'o", "i"]:
+            prefix = ", " + prefix
+        split_word = vowel_coming_pattern.split(word)
+        tail_word = split_word[-2:]
+        # add stress to {klama}, {ni'o}
+        if len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[0][0])) and bool(vowel_pattern.match(tail_word[1][0])):
+            head_word = split_word[:-2]
+            modified_word = "".join(head_word) + "ˈ" + "".join(tail_word)
+            # prefix=" " + prefix
+            # add a pause after two-syllable words
+            postfix = postfix + " "
+        # add stress to {lau}, {coi}
+        elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(diphthong_coming_pattern.match(tail_word[1][0])):
+            head_word = split_word[:-2]
+            modified_word = "".join(head_word) + tail_word[0] + "ˈ" + tail_word[1]
+            # prefix=" " + prefix
+            postfix = postfix + " "
+        # add stress to {le}
+        # elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[1][0])):
+        #     head_word = split_word[:-2]
+        #     modified_word = "".join(head_word) + tail_word[0] + "ˈ" + tail_word[1]+" "
+        #     postfix =postfix +" "
+        # add a pause even after a cmavo
+        if not (index - 1 >= 0 and words[index-1] in starter_words):
+            prefix = " " + prefix
+        # # add a pause before {.alis}
+        # if bool(vowel_pattern.match(word[0])):
+        #     word = ", " + word
+        """
+        for each letter: if the slice matches then convert the letter
+        """
+        rebuilt_word = ""
+        lit = enumerate([*modified_word])
+        for idx, x in lit:
+            tail = modified_word[idx:]
+            matched = False
+            consumed = 1
+            for attr, val in sorted(ipa_nix.items(), key=lambda x: len(str(x[0])), reverse=True):
+                pattern = compile("^"+attr)
+                matches = pattern.findall(tail)
+                if len(matches)>0:
+                    match = matches[0]
+                    consumed = len(match)
+                    rebuilt_word += val
+                    matched = True
+                    break
+            if not matched:
+                rebuilt_word += x
+            [next(lit, None) for _ in range(consumed - 1)]
+        rebuilt_words.append(prefix+rebuilt_word+postfix)
+    output = "".join(rebuilt_words).strip()
+    output = sub(r" {2,}", " ", output)
+    output = sub(r", ?(?=,)", "", output)
+    if question_sentence == True:
+        output += "?"
+    elif bool(vowel_pattern.match(text[-1:][0])):
+        output += "."
+    return output
+# print(lojban2ipa("ni'o le pa tirxu be me'e zo .teris. pu ki kansa le za'u pendo be le nei le ka xabju le foldi be loi spati"))

styletts2importable.py CHANGED Viewed

@@ -136,6 +136,7 @@ sampler = DiffusionSampler(
 )
 LANG_NAMES = {
     'en-us': 'english',
     'cs': 'czech',
     'da': 'danish',
@@ -169,10 +170,17 @@ def inference(text, ref_s, lang='en-us', alpha = 0.3, beta = 0.7, diffusion_step
     if (ipa_sections is not None):
         text = re.sub(regex, '[]', text, 0, re.MULTILINE)
-    local_phonemizer = phonemizer.backend.EspeakBackend(language=lang, preserve_punctuation=True, with_stress=True)
-    ps = local_phonemizer.phonemize([text])
-    ps = word_tokenize(ps[0], language=LANG_NAMES[lang])
-    ps = ' '.join(ps)
     # add the IPA back
     if (ipa_sections is not None):

 )
 LANG_NAMES = {
+    # natural; supported by nltk
     'en-us': 'english',
     'cs': 'czech',
     'da': 'danish',
     if (ipa_sections is not None):
         text = re.sub(regex, '[]', text, 0, re.MULTILINE)
+    if lang in LANG_NAMES:
+        local_phonemizer = phonemizer.backend.EspeakBackend(language=lang, preserve_punctuation=True, with_stress=True)
+        ps = local_phonemizer.phonemize([text])
+        ps = word_tokenize(ps[0], language=LANG_NAMES[lang])
+        ps = ' '.join(ps)
+    elif lang == 'jb':
+        # Lojban language
+        import lojban
+        ps = lojban.lojban2ipa(text, 'vits')
+    else:
+        ps = text
     # add the IPA back
     if (ipa_sections is not None):