Spaces:

Flux9665
/

EnglishToucan

Running on Zero

App Files Files Community

Flux9665 commited on Jul 25, 2024

Commit

c255993

1 Parent(s): f66c1f0

update to current version

Browse files

Files changed (2) hide show

InferenceInterfaces/ControllableInterface.py +1 -44
app.py +0 -8

InferenceInterfaces/ControllableInterface.py CHANGED Viewed

@@ -26,8 +26,6 @@ class ControllableInterface:
     def read(self,
              prompt,
              reference_audio,
-             language,
-             accent,
              voice_seed,
              prosody_creativity,
              duration_scaling_factor,
@@ -42,14 +40,6 @@ class ControllableInterface:
              emb_slider_6,
              loudness_in_db
              ):
-        if self.current_language != language:
-            self.model.set_phonemizer_language(language)
-            print(f"switched phonemizer language to {language}")
-            self.current_language = language
-        if self.current_accent != accent:
-            self.model.set_accent_language(accent)
-            print(f"switched accent language to {accent}")
-            self.current_accent = accent
         if reference_audio is None:
             self.wgan.set_latent(voice_seed)
             controllability_vector = torch.tensor([emb_slider_1,
@@ -65,40 +55,7 @@ class ControllableInterface:
         phones = self.model.text2phone.get_phone_string(prompt)
         if len(phones) > 1800:
-            if language == "deu":
-                prompt = "Deine Eingabe war zu lang. Bitte versuche es entweder mit einem kürzeren Text oder teile ihn in mehrere Teile auf."
-            elif language == "ell":
-                prompt = "Η εισήγησή σας ήταν πολύ μεγάλη. Παρακαλώ δοκιμάστε είτε ένα μικρότερο κείμενο είτε χωρίστε το σε διάφορα μέρη."
-            elif language == "spa":
-                prompt = "Su entrada es demasiado larga. Por favor, intente un texto más corto o divídalo en varias partes."
-            elif language == "fin":
-                prompt = "Vastauksesi oli liian pitkä. Kokeile joko lyhyempää tekstiä tai jaa se useampaan osaan."
-            elif language == "rus":
-                prompt = "Ваш текст слишком длинный. Пожалуйста, попробуйте либо сократить текст, либо разделить его на несколько частей."
-            elif language == "hun":
-                prompt = "Túl hosszú volt a bevitele. Kérjük, próbáljon meg rövidebb szöveget írni, vagy ossza több részre."
-            elif language == "nld":
-                prompt = "Uw input was te lang. Probeer een kortere tekst of splits het in verschillende delen."
-            elif language == "fra":
-                prompt = "Votre saisie était trop longue. Veuillez essayer un texte plus court ou le diviser en plusieurs parties."
-            elif language == 'pol':
-                prompt = "Twój wpis był zbyt długi. Spróbuj skrócić tekst lub podzielić go na kilka części."
-            elif language == 'por':
-                prompt = "O seu contributo foi demasiado longo. Por favor, tente um texto mais curto ou divida-o em várias partes."
-            elif language == 'ita':
-                prompt = "Il tuo input era troppo lungo. Per favore, prova un testo più corto o dividilo in più parti."
-            elif language == 'cmn':
-                prompt = "你的输入太长了。请尝试使用较短的文本或将其拆分为多个部分。"
-            elif language == 'vie':
-                prompt = "Đầu vào của bạn quá dài. Vui lòng thử một văn bản ngắn hơn hoặc chia nó thành nhiều phần."
-            else:
-                prompt = "Your input was too long. Please try either a shorter text or split it into several parts."
-                if self.current_language != "eng":
-                    self.model.set_phonemizer_language("eng")
-                    self.current_language = "eng"
-                if self.current_accent != "eng":
-                    self.model.set_accent_language("eng")
-                    self.current_accent = "eng"
         print(prompt + "\n\n")
         wav, sr, fig = self.model(prompt,

     def read(self,
              prompt,
              reference_audio,
              voice_seed,
              prosody_creativity,
              duration_scaling_factor,
              emb_slider_6,
              loudness_in_db
              ):
         if reference_audio is None:
             self.wgan.set_latent(voice_seed)
             controllability_vector = torch.tensor([emb_slider_1,
         phones = self.model.text2phone.get_phone_string(prompt)
         if len(phones) > 1800:
+            prompt = "Your input was too long. Please try either a shorter text or split it into several parts."
         print(prompt + "\n\n")
         wav, sr, fig = self.model(prompt,

app.py CHANGED Viewed

@@ -10,7 +10,6 @@ class TTSWebUI:
     def __init__(self, gpu_id="cpu", title="Stochastic Speech Synthesis with ToucanTTS", article="", available_artificial_voices=1000, path_to_iso_list="Preprocessing/multilinguality/iso_to_fullname.json"):
         iso_to_name = load_json_from_path(path_to_iso_list)
-        text_selection = [f"{iso_to_name[iso_code]} ({iso_code})" for iso_code in iso_to_name]
         # accent_selection = [f"{iso_to_name[iso_code]} Accent ({iso_code})" for iso_code in iso_to_name]
         self.controllable_ui = ControllableInterface(gpu_id=gpu_id,
@@ -20,10 +19,6 @@ class TTSWebUI:
                                                      placeholder="write what you want the synthesis to read here...",
                                                      value="What I cannot create, I do not understand.",
                                                      label="Text input"),
-                                          gr.Dropdown(text_selection,
-                                                      type="value",
-                                                      value='English (eng)',
-                                                      label="Select the Language of the Text (type on your keyboard to find it quickly)"),
                                           gr.Audio(type="filepath", show_label=True, container=True, label="Voice to Clone (if left empty, will use an artificial voice instead)"),
                                           gr.Slider(minimum=0, maximum=available_artificial_voices, step=1,
                                                     value=279,
@@ -45,7 +40,6 @@ class TTSWebUI:
     def read(self,
              prompt,
-             language,
              reference_audio,
              voice_seed,
              prosody_creativity,
@@ -57,8 +51,6 @@ class TTSWebUI:
              ):
         sr, wav, fig = self.controllable_ui.read(prompt,
                                                  reference_audio,
-                                                 language.split(" ")[-1].split("(")[1].split(")")[0],
-                                                 language.split(" ")[-1].split("(")[1].split(")")[0],
                                                  voice_seed,
                                                  prosody_creativity,
                                                  duration_scaling_factor,

     def __init__(self, gpu_id="cpu", title="Stochastic Speech Synthesis with ToucanTTS", article="", available_artificial_voices=1000, path_to_iso_list="Preprocessing/multilinguality/iso_to_fullname.json"):
         iso_to_name = load_json_from_path(path_to_iso_list)
         # accent_selection = [f"{iso_to_name[iso_code]} Accent ({iso_code})" for iso_code in iso_to_name]
         self.controllable_ui = ControllableInterface(gpu_id=gpu_id,
                                                      placeholder="write what you want the synthesis to read here...",
                                                      value="What I cannot create, I do not understand.",
                                                      label="Text input"),
                                           gr.Audio(type="filepath", show_label=True, container=True, label="Voice to Clone (if left empty, will use an artificial voice instead)"),
                                           gr.Slider(minimum=0, maximum=available_artificial_voices, step=1,
                                                     value=279,
     def read(self,
              prompt,
              reference_audio,
              voice_seed,
              prosody_creativity,
              ):
         sr, wav, fig = self.controllable_ui.read(prompt,
                                                  reference_audio,
                                                  voice_seed,
                                                  prosody_creativity,
                                                  duration_scaling_factor,