Flux9665 commited on
Commit
c255993
1 Parent(s): f66c1f0

update to current version

Browse files
InferenceInterfaces/ControllableInterface.py CHANGED
@@ -26,8 +26,6 @@ class ControllableInterface:
26
  def read(self,
27
  prompt,
28
  reference_audio,
29
- language,
30
- accent,
31
  voice_seed,
32
  prosody_creativity,
33
  duration_scaling_factor,
@@ -42,14 +40,6 @@ class ControllableInterface:
42
  emb_slider_6,
43
  loudness_in_db
44
  ):
45
- if self.current_language != language:
46
- self.model.set_phonemizer_language(language)
47
- print(f"switched phonemizer language to {language}")
48
- self.current_language = language
49
- if self.current_accent != accent:
50
- self.model.set_accent_language(accent)
51
- print(f"switched accent language to {accent}")
52
- self.current_accent = accent
53
  if reference_audio is None:
54
  self.wgan.set_latent(voice_seed)
55
  controllability_vector = torch.tensor([emb_slider_1,
@@ -65,40 +55,7 @@ class ControllableInterface:
65
 
66
  phones = self.model.text2phone.get_phone_string(prompt)
67
  if len(phones) > 1800:
68
- if language == "deu":
69
- prompt = "Deine Eingabe war zu lang. Bitte versuche es entweder mit einem kürzeren Text oder teile ihn in mehrere Teile auf."
70
- elif language == "ell":
71
- prompt = "Η εισήγησή σας ήταν πολύ μεγάλη. Παρακαλώ δοκιμάστε είτε ένα μικρότερο κείμενο είτε χωρίστε το σε διάφορα μέρη."
72
- elif language == "spa":
73
- prompt = "Su entrada es demasiado larga. Por favor, intente un texto más corto o divídalo en varias partes."
74
- elif language == "fin":
75
- prompt = "Vastauksesi oli liian pitkä. Kokeile joko lyhyempää tekstiä tai jaa se useampaan osaan."
76
- elif language == "rus":
77
- prompt = "Ваш текст слишком длинный. Пожалуйста, попробуйте либо сократить текст, либо разделить его на несколько частей."
78
- elif language == "hun":
79
- prompt = "Túl hosszú volt a bevitele. Kérjük, próbáljon meg rövidebb szöveget írni, vagy ossza több részre."
80
- elif language == "nld":
81
- prompt = "Uw input was te lang. Probeer een kortere tekst of splits het in verschillende delen."
82
- elif language == "fra":
83
- prompt = "Votre saisie était trop longue. Veuillez essayer un texte plus court ou le diviser en plusieurs parties."
84
- elif language == 'pol':
85
- prompt = "Twój wpis był zbyt długi. Spróbuj skrócić tekst lub podzielić go na kilka części."
86
- elif language == 'por':
87
- prompt = "O seu contributo foi demasiado longo. Por favor, tente um texto mais curto ou divida-o em várias partes."
88
- elif language == 'ita':
89
- prompt = "Il tuo input era troppo lungo. Per favore, prova un testo più corto o dividilo in più parti."
90
- elif language == 'cmn':
91
- prompt = "你的输入太长了。请尝试使用较短的文本或将其拆分为多个部分。"
92
- elif language == 'vie':
93
- prompt = "Đầu vào của bạn quá dài. Vui lòng thử một văn bản ngắn hơn hoặc chia nó thành nhiều phần."
94
- else:
95
- prompt = "Your input was too long. Please try either a shorter text or split it into several parts."
96
- if self.current_language != "eng":
97
- self.model.set_phonemizer_language("eng")
98
- self.current_language = "eng"
99
- if self.current_accent != "eng":
100
- self.model.set_accent_language("eng")
101
- self.current_accent = "eng"
102
 
103
  print(prompt + "\n\n")
104
  wav, sr, fig = self.model(prompt,
 
26
  def read(self,
27
  prompt,
28
  reference_audio,
 
 
29
  voice_seed,
30
  prosody_creativity,
31
  duration_scaling_factor,
 
40
  emb_slider_6,
41
  loudness_in_db
42
  ):
 
 
 
 
 
 
 
 
43
  if reference_audio is None:
44
  self.wgan.set_latent(voice_seed)
45
  controllability_vector = torch.tensor([emb_slider_1,
 
55
 
56
  phones = self.model.text2phone.get_phone_string(prompt)
57
  if len(phones) > 1800:
58
+ prompt = "Your input was too long. Please try either a shorter text or split it into several parts."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  print(prompt + "\n\n")
61
  wav, sr, fig = self.model(prompt,
app.py CHANGED
@@ -10,7 +10,6 @@ class TTSWebUI:
10
 
11
  def __init__(self, gpu_id="cpu", title="Stochastic Speech Synthesis with ToucanTTS", article="", available_artificial_voices=1000, path_to_iso_list="Preprocessing/multilinguality/iso_to_fullname.json"):
12
  iso_to_name = load_json_from_path(path_to_iso_list)
13
- text_selection = [f"{iso_to_name[iso_code]} ({iso_code})" for iso_code in iso_to_name]
14
  # accent_selection = [f"{iso_to_name[iso_code]} Accent ({iso_code})" for iso_code in iso_to_name]
15
 
16
  self.controllable_ui = ControllableInterface(gpu_id=gpu_id,
@@ -20,10 +19,6 @@ class TTSWebUI:
20
  placeholder="write what you want the synthesis to read here...",
21
  value="What I cannot create, I do not understand.",
22
  label="Text input"),
23
- gr.Dropdown(text_selection,
24
- type="value",
25
- value='English (eng)',
26
- label="Select the Language of the Text (type on your keyboard to find it quickly)"),
27
  gr.Audio(type="filepath", show_label=True, container=True, label="Voice to Clone (if left empty, will use an artificial voice instead)"),
28
  gr.Slider(minimum=0, maximum=available_artificial_voices, step=1,
29
  value=279,
@@ -45,7 +40,6 @@ class TTSWebUI:
45
 
46
  def read(self,
47
  prompt,
48
- language,
49
  reference_audio,
50
  voice_seed,
51
  prosody_creativity,
@@ -57,8 +51,6 @@ class TTSWebUI:
57
  ):
58
  sr, wav, fig = self.controllable_ui.read(prompt,
59
  reference_audio,
60
- language.split(" ")[-1].split("(")[1].split(")")[0],
61
- language.split(" ")[-1].split("(")[1].split(")")[0],
62
  voice_seed,
63
  prosody_creativity,
64
  duration_scaling_factor,
 
10
 
11
  def __init__(self, gpu_id="cpu", title="Stochastic Speech Synthesis with ToucanTTS", article="", available_artificial_voices=1000, path_to_iso_list="Preprocessing/multilinguality/iso_to_fullname.json"):
12
  iso_to_name = load_json_from_path(path_to_iso_list)
 
13
  # accent_selection = [f"{iso_to_name[iso_code]} Accent ({iso_code})" for iso_code in iso_to_name]
14
 
15
  self.controllable_ui = ControllableInterface(gpu_id=gpu_id,
 
19
  placeholder="write what you want the synthesis to read here...",
20
  value="What I cannot create, I do not understand.",
21
  label="Text input"),
 
 
 
 
22
  gr.Audio(type="filepath", show_label=True, container=True, label="Voice to Clone (if left empty, will use an artificial voice instead)"),
23
  gr.Slider(minimum=0, maximum=available_artificial_voices, step=1,
24
  value=279,
 
40
 
41
  def read(self,
42
  prompt,
 
43
  reference_audio,
44
  voice_seed,
45
  prosody_creativity,
 
51
  ):
52
  sr, wav, fig = self.controllable_ui.read(prompt,
53
  reference_audio,
 
 
54
  voice_seed,
55
  prosody_creativity,
56
  duration_scaling_factor,