Spaces:
Running
on
Zero
Running
on
Zero
update to current version
Browse files- InferenceInterfaces/ControllableInterface.py +1 -44
- app.py +0 -8
InferenceInterfaces/ControllableInterface.py
CHANGED
@@ -26,8 +26,6 @@ class ControllableInterface:
|
|
26 |
def read(self,
|
27 |
prompt,
|
28 |
reference_audio,
|
29 |
-
language,
|
30 |
-
accent,
|
31 |
voice_seed,
|
32 |
prosody_creativity,
|
33 |
duration_scaling_factor,
|
@@ -42,14 +40,6 @@ class ControllableInterface:
|
|
42 |
emb_slider_6,
|
43 |
loudness_in_db
|
44 |
):
|
45 |
-
if self.current_language != language:
|
46 |
-
self.model.set_phonemizer_language(language)
|
47 |
-
print(f"switched phonemizer language to {language}")
|
48 |
-
self.current_language = language
|
49 |
-
if self.current_accent != accent:
|
50 |
-
self.model.set_accent_language(accent)
|
51 |
-
print(f"switched accent language to {accent}")
|
52 |
-
self.current_accent = accent
|
53 |
if reference_audio is None:
|
54 |
self.wgan.set_latent(voice_seed)
|
55 |
controllability_vector = torch.tensor([emb_slider_1,
|
@@ -65,40 +55,7 @@ class ControllableInterface:
|
|
65 |
|
66 |
phones = self.model.text2phone.get_phone_string(prompt)
|
67 |
if len(phones) > 1800:
|
68 |
-
|
69 |
-
prompt = "Deine Eingabe war zu lang. Bitte versuche es entweder mit einem kürzeren Text oder teile ihn in mehrere Teile auf."
|
70 |
-
elif language == "ell":
|
71 |
-
prompt = "Η εισήγησή σας ήταν πολύ μεγάλη. Παρακαλώ δοκιμάστε είτε ένα μικρότερο κείμενο είτε χωρίστε το σε διάφορα μέρη."
|
72 |
-
elif language == "spa":
|
73 |
-
prompt = "Su entrada es demasiado larga. Por favor, intente un texto más corto o divídalo en varias partes."
|
74 |
-
elif language == "fin":
|
75 |
-
prompt = "Vastauksesi oli liian pitkä. Kokeile joko lyhyempää tekstiä tai jaa se useampaan osaan."
|
76 |
-
elif language == "rus":
|
77 |
-
prompt = "Ваш текст слишком длинный. Пожалуйста, попробуйте либо сократить текст, либо разделить его на несколько частей."
|
78 |
-
elif language == "hun":
|
79 |
-
prompt = "Túl hosszú volt a bevitele. Kérjük, próbáljon meg rövidebb szöveget írni, vagy ossza több részre."
|
80 |
-
elif language == "nld":
|
81 |
-
prompt = "Uw input was te lang. Probeer een kortere tekst of splits het in verschillende delen."
|
82 |
-
elif language == "fra":
|
83 |
-
prompt = "Votre saisie était trop longue. Veuillez essayer un texte plus court ou le diviser en plusieurs parties."
|
84 |
-
elif language == 'pol':
|
85 |
-
prompt = "Twój wpis był zbyt długi. Spróbuj skrócić tekst lub podzielić go na kilka części."
|
86 |
-
elif language == 'por':
|
87 |
-
prompt = "O seu contributo foi demasiado longo. Por favor, tente um texto mais curto ou divida-o em várias partes."
|
88 |
-
elif language == 'ita':
|
89 |
-
prompt = "Il tuo input era troppo lungo. Per favore, prova un testo più corto o dividilo in più parti."
|
90 |
-
elif language == 'cmn':
|
91 |
-
prompt = "你的输入太长了。请尝试使用较短的文本或将其拆分为多个部分。"
|
92 |
-
elif language == 'vie':
|
93 |
-
prompt = "Đầu vào của bạn quá dài. Vui lòng thử một văn bản ngắn hơn hoặc chia nó thành nhiều phần."
|
94 |
-
else:
|
95 |
-
prompt = "Your input was too long. Please try either a shorter text or split it into several parts."
|
96 |
-
if self.current_language != "eng":
|
97 |
-
self.model.set_phonemizer_language("eng")
|
98 |
-
self.current_language = "eng"
|
99 |
-
if self.current_accent != "eng":
|
100 |
-
self.model.set_accent_language("eng")
|
101 |
-
self.current_accent = "eng"
|
102 |
|
103 |
print(prompt + "\n\n")
|
104 |
wav, sr, fig = self.model(prompt,
|
|
|
26 |
def read(self,
|
27 |
prompt,
|
28 |
reference_audio,
|
|
|
|
|
29 |
voice_seed,
|
30 |
prosody_creativity,
|
31 |
duration_scaling_factor,
|
|
|
40 |
emb_slider_6,
|
41 |
loudness_in_db
|
42 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
if reference_audio is None:
|
44 |
self.wgan.set_latent(voice_seed)
|
45 |
controllability_vector = torch.tensor([emb_slider_1,
|
|
|
55 |
|
56 |
phones = self.model.text2phone.get_phone_string(prompt)
|
57 |
if len(phones) > 1800:
|
58 |
+
prompt = "Your input was too long. Please try either a shorter text or split it into several parts."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
print(prompt + "\n\n")
|
61 |
wav, sr, fig = self.model(prompt,
|
app.py
CHANGED
@@ -10,7 +10,6 @@ class TTSWebUI:
|
|
10 |
|
11 |
def __init__(self, gpu_id="cpu", title="Stochastic Speech Synthesis with ToucanTTS", article="", available_artificial_voices=1000, path_to_iso_list="Preprocessing/multilinguality/iso_to_fullname.json"):
|
12 |
iso_to_name = load_json_from_path(path_to_iso_list)
|
13 |
-
text_selection = [f"{iso_to_name[iso_code]} ({iso_code})" for iso_code in iso_to_name]
|
14 |
# accent_selection = [f"{iso_to_name[iso_code]} Accent ({iso_code})" for iso_code in iso_to_name]
|
15 |
|
16 |
self.controllable_ui = ControllableInterface(gpu_id=gpu_id,
|
@@ -20,10 +19,6 @@ class TTSWebUI:
|
|
20 |
placeholder="write what you want the synthesis to read here...",
|
21 |
value="What I cannot create, I do not understand.",
|
22 |
label="Text input"),
|
23 |
-
gr.Dropdown(text_selection,
|
24 |
-
type="value",
|
25 |
-
value='English (eng)',
|
26 |
-
label="Select the Language of the Text (type on your keyboard to find it quickly)"),
|
27 |
gr.Audio(type="filepath", show_label=True, container=True, label="Voice to Clone (if left empty, will use an artificial voice instead)"),
|
28 |
gr.Slider(minimum=0, maximum=available_artificial_voices, step=1,
|
29 |
value=279,
|
@@ -45,7 +40,6 @@ class TTSWebUI:
|
|
45 |
|
46 |
def read(self,
|
47 |
prompt,
|
48 |
-
language,
|
49 |
reference_audio,
|
50 |
voice_seed,
|
51 |
prosody_creativity,
|
@@ -57,8 +51,6 @@ class TTSWebUI:
|
|
57 |
):
|
58 |
sr, wav, fig = self.controllable_ui.read(prompt,
|
59 |
reference_audio,
|
60 |
-
language.split(" ")[-1].split("(")[1].split(")")[0],
|
61 |
-
language.split(" ")[-1].split("(")[1].split(")")[0],
|
62 |
voice_seed,
|
63 |
prosody_creativity,
|
64 |
duration_scaling_factor,
|
|
|
10 |
|
11 |
def __init__(self, gpu_id="cpu", title="Stochastic Speech Synthesis with ToucanTTS", article="", available_artificial_voices=1000, path_to_iso_list="Preprocessing/multilinguality/iso_to_fullname.json"):
|
12 |
iso_to_name = load_json_from_path(path_to_iso_list)
|
|
|
13 |
# accent_selection = [f"{iso_to_name[iso_code]} Accent ({iso_code})" for iso_code in iso_to_name]
|
14 |
|
15 |
self.controllable_ui = ControllableInterface(gpu_id=gpu_id,
|
|
|
19 |
placeholder="write what you want the synthesis to read here...",
|
20 |
value="What I cannot create, I do not understand.",
|
21 |
label="Text input"),
|
|
|
|
|
|
|
|
|
22 |
gr.Audio(type="filepath", show_label=True, container=True, label="Voice to Clone (if left empty, will use an artificial voice instead)"),
|
23 |
gr.Slider(minimum=0, maximum=available_artificial_voices, step=1,
|
24 |
value=279,
|
|
|
40 |
|
41 |
def read(self,
|
42 |
prompt,
|
|
|
43 |
reference_audio,
|
44 |
voice_seed,
|
45 |
prosody_creativity,
|
|
|
51 |
):
|
52 |
sr, wav, fig = self.controllable_ui.read(prompt,
|
53 |
reference_audio,
|
|
|
|
|
54 |
voice_seed,
|
55 |
prosody_creativity,
|
56 |
duration_scaling_factor,
|