svc-gradioapi-whisper

Sleeping

App Files Files Community

rodrigomasini commited on Apr 5

Commit

5e4694a

•

1 Parent(s): ae73f04

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -26

app.py CHANGED Viewed

@@ -11,27 +11,15 @@ from whisperspeech.pipeline import Pipeline
 DEVEL=os.environ.get('DEVEL', False)
 title = """
-<picture>
-  <source srcset="https://huggingface.co/spaces/collabora/whisperspeech/resolve/main/dark-banner.png" media="(prefers-color-scheme: dark)" />
-  <img alt="WhisperSpeech banner with Collabora and LAION logos" src="https://huggingface.co/spaces/collabora/whisperspeech/resolve/main/light-banner.png" style="width: 60%; margin: 0 auto;" />
-</picture>
-# Welcome to Collabora's WhisperSpeech
-WhisperSpeech is an Open Source text-to-speech system built by Collabora and LAION by inverting Whisper.
-The model is fully open and you can run it on your local hardware. It's like **Stable Diffusion but for speech**
-– both powerful and easily customizable.
-[You can contribute to WhisperSpeech on Github.](https://github.com/collabora/WhisperSpeech)
-You can also join the discussion on Discord [![](https://dcbadge.vercel.app/api/server/FANw4rHD5E)](https://discord.gg/FANw4rHD5E)
-Huge thanks to [Tonic](https://huggingface.co/Tonic) who helped build this Space for WhisperSpeech.
 ### How to Use It
 Write you text in the box, you can use language tags (`<en>` or `<pl>`) to create multilingual speech.
-Optionally you can upload a speech sample or give it a file URL to clone an existing voice. Check out the
-examples at the bottom of the page for inspiration.
 """
 footer = """
@@ -96,10 +84,6 @@ def whisper_speech_demo(multilingual_text, speaker_audio=None, speaker_url="", c
     return (24000, audio.T.numpy())
-    # Did not work for me in Safari:
-    # mp3 = io.BytesIO()
-    # torchaudio.save(mp3, audio, 24000, format='mp3')
-    # return mp3.getvalue()
 pipe = Pipeline(torch_compile=not DEVEL)
 # warmup will come from regenerating the examples
@@ -110,21 +94,21 @@ with gr.Blocks() as demo:
         with gr.Column(scale=2):
             text_input = gr.Textbox(label="Enter multilingual text💬📝",
                                     value=text_examples[0][0],
-                                    info="You can use `<en>` for English and `<pl>` for Polish, see examples below.")
             cps = gr.Slider(value=14, minimum=10, maximum=15, step=.25,
-                            label="Tempo (in characters per second)")
             with gr.Row(equal_height=True):
-                speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)🌬️💬",
                                      sources=["upload", "microphone"],
                                      type='filepath')
                 url_input = gr.Textbox(label="alternatively, you can paste in an audio file URL:")
             gr.Markdown("  \n  ") # fixes the bottom overflow from Audio
-            generate_button = gr.Button("Try Collabora's WhisperSpeech🌟")
         with gr.Column(scale=1):
-            output_audio = gr.Audio(label="WhisperSpeech says…")
     with gr.Column():
-        gr.Markdown("### Try these examples to get started !🌟🌬️")
         gr.Examples(
             examples=text_examples,
             inputs=[text_input, url_input],

 DEVEL=os.environ.get('DEVEL', False)
 title = """
+# Whisper
+Based on WhisperSpeech - Open Source text-to-speech system - built by Collabora and LAION by inverting Whisper.
+It's like **Stable Diffusion but for speech**
 ### How to Use It
 Write you text in the box, you can use language tags (`<en>` or `<pl>`) to create multilingual speech.
+Optionally you can upload a speech sample or give it a file URL to clone an existing voice.
 """
 footer = """
     return (24000, audio.T.numpy())
 pipe = Pipeline(torch_compile=not DEVEL)
 # warmup will come from regenerating the examples
         with gr.Column(scale=2):
             text_input = gr.Textbox(label="Enter multilingual text💬📝",
                                     value=text_examples[0][0],
+                                    info="You can use `<en>` for English.")
             cps = gr.Slider(value=14, minimum=10, maximum=15, step=.25,
+                            label="Time (in characters per second)")
             with gr.Row(equal_height=True):
+                speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)",
                                      sources=["upload", "microphone"],
                                      type='filepath')
                 url_input = gr.Textbox(label="alternatively, you can paste in an audio file URL:")
             gr.Markdown("  \n  ") # fixes the bottom overflow from Audio
+            generate_button = gr.Button("Run")
         with gr.Column(scale=1):
+            output_audio = gr.Audio(label="Result")
     with gr.Column():
+        gr.Markdown("### Examples:")
         gr.Examples(
             examples=text_examples,
             inputs=[text_input, url_input],