Spaces:

Flux9665
/

EnglishToucan

Running on Zero

Flux9665 commited on Jul 10, 2024

Commit

20636e5

•

1 Parent(s): bbed20a

add voice cloning interface

Files changed (2) hide show

Architectures/ToucanTTS/InferenceToucanTTS.py CHANGED Viewed

@@ -207,6 +207,8 @@ class ToucanTTS(torch.nn.Module):
         text_tensors = torch.clamp(text_tensors, max=1.0)
         # this is necessary, because of the way we represent modifiers to keep them identifiable.
         if not self.multilingual_model:
             lang_ids = None

         text_tensors = torch.clamp(text_tensors, max=1.0)
         # this is necessary, because of the way we represent modifiers to keep them identifiable.
+        utterance_embedding = torch.nn.functional.normalize(utterance_embedding)
         if not self.multilingual_model:
             lang_ids = None

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ class TTSWebUI:
         self.iface = gr.Interface(fn=self.read,
                                   inputs=[gr.Textbox(lines=2,
                                                      placeholder="write what you want the synthesis to read here...",
-                                                     value="The woods are lovely, dark and deep, but I have promises to keep, and miles to go, before I sleep.",
                                                      label="Text input"),
                                           gr.Audio(type="filepath", show_label=True, container=True, label="Voice to Clone (if left empty, will use an artificial voice instead)"),
                                           gr.Slider(minimum=0.0, maximum=0.8, step=0.1, value=0.4, label="Prosody Creativity"),

         self.iface = gr.Interface(fn=self.read,
                                   inputs=[gr.Textbox(lines=2,
                                                      placeholder="write what you want the synthesis to read here...",
+                                                     value="What I cannot create, I do not understand.",
                                                      label="Text input"),
                                           gr.Audio(type="filepath", show_label=True, container=True, label="Voice to Clone (if left empty, will use an artificial voice instead)"),
                                           gr.Slider(minimum=0.0, maximum=0.8, step=0.1, value=0.4, label="Prosody Creativity"),