Spaces:
Sleeping
Sleeping
rodrigomasini
commited on
Commit
•
5e4694a
1
Parent(s):
ae73f04
Update app.py
Browse files
app.py
CHANGED
@@ -11,27 +11,15 @@ from whisperspeech.pipeline import Pipeline
|
|
11 |
DEVEL=os.environ.get('DEVEL', False)
|
12 |
|
13 |
title = """
|
14 |
-
|
15 |
-
<source srcset="https://huggingface.co/spaces/collabora/whisperspeech/resolve/main/dark-banner.png" media="(prefers-color-scheme: dark)" />
|
16 |
-
<img alt="WhisperSpeech banner with Collabora and LAION logos" src="https://huggingface.co/spaces/collabora/whisperspeech/resolve/main/light-banner.png" style="width: 60%; margin: 0 auto;" />
|
17 |
-
</picture>
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
WhisperSpeech is an Open Source text-to-speech system built by Collabora and LAION by inverting Whisper.
|
22 |
-
The model is fully open and you can run it on your local hardware. It's like **Stable Diffusion but for speech**
|
23 |
-
– both powerful and easily customizable.
|
24 |
-
|
25 |
-
[You can contribute to WhisperSpeech on Github.](https://github.com/collabora/WhisperSpeech)
|
26 |
-
You can also join the discussion on Discord [![](https://dcbadge.vercel.app/api/server/FANw4rHD5E)](https://discord.gg/FANw4rHD5E)
|
27 |
-
|
28 |
-
Huge thanks to [Tonic](https://huggingface.co/Tonic) who helped build this Space for WhisperSpeech.
|
29 |
|
30 |
### How to Use It
|
31 |
|
32 |
Write you text in the box, you can use language tags (`<en>` or `<pl>`) to create multilingual speech.
|
33 |
-
Optionally you can upload a speech sample or give it a file URL to clone an existing voice.
|
34 |
-
examples at the bottom of the page for inspiration.
|
35 |
"""
|
36 |
|
37 |
footer = """
|
@@ -96,10 +84,6 @@ def whisper_speech_demo(multilingual_text, speaker_audio=None, speaker_url="", c
|
|
96 |
|
97 |
return (24000, audio.T.numpy())
|
98 |
|
99 |
-
# Did not work for me in Safari:
|
100 |
-
# mp3 = io.BytesIO()
|
101 |
-
# torchaudio.save(mp3, audio, 24000, format='mp3')
|
102 |
-
# return mp3.getvalue()
|
103 |
|
104 |
pipe = Pipeline(torch_compile=not DEVEL)
|
105 |
# warmup will come from regenerating the examples
|
@@ -110,21 +94,21 @@ with gr.Blocks() as demo:
|
|
110 |
with gr.Column(scale=2):
|
111 |
text_input = gr.Textbox(label="Enter multilingual text💬📝",
|
112 |
value=text_examples[0][0],
|
113 |
-
info="You can use `<en>` for English
|
114 |
cps = gr.Slider(value=14, minimum=10, maximum=15, step=.25,
|
115 |
-
label="
|
116 |
with gr.Row(equal_height=True):
|
117 |
-
speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)
|
118 |
sources=["upload", "microphone"],
|
119 |
type='filepath')
|
120 |
url_input = gr.Textbox(label="alternatively, you can paste in an audio file URL:")
|
121 |
gr.Markdown(" \n ") # fixes the bottom overflow from Audio
|
122 |
-
generate_button = gr.Button("
|
123 |
with gr.Column(scale=1):
|
124 |
-
output_audio = gr.Audio(label="
|
125 |
|
126 |
with gr.Column():
|
127 |
-
gr.Markdown("###
|
128 |
gr.Examples(
|
129 |
examples=text_examples,
|
130 |
inputs=[text_input, url_input],
|
|
|
11 |
DEVEL=os.environ.get('DEVEL', False)
|
12 |
|
13 |
title = """
|
14 |
+
# Whisper
|
|
|
|
|
|
|
15 |
|
16 |
+
Based on WhisperSpeech - Open Source text-to-speech system - built by Collabora and LAION by inverting Whisper.
|
17 |
+
It's like **Stable Diffusion but for speech**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
### How to Use It
|
20 |
|
21 |
Write you text in the box, you can use language tags (`<en>` or `<pl>`) to create multilingual speech.
|
22 |
+
Optionally you can upload a speech sample or give it a file URL to clone an existing voice.
|
|
|
23 |
"""
|
24 |
|
25 |
footer = """
|
|
|
84 |
|
85 |
return (24000, audio.T.numpy())
|
86 |
|
|
|
|
|
|
|
|
|
87 |
|
88 |
pipe = Pipeline(torch_compile=not DEVEL)
|
89 |
# warmup will come from regenerating the examples
|
|
|
94 |
with gr.Column(scale=2):
|
95 |
text_input = gr.Textbox(label="Enter multilingual text💬📝",
|
96 |
value=text_examples[0][0],
|
97 |
+
info="You can use `<en>` for English.")
|
98 |
cps = gr.Slider(value=14, minimum=10, maximum=15, step=.25,
|
99 |
+
label="Time (in characters per second)")
|
100 |
with gr.Row(equal_height=True):
|
101 |
+
speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)",
|
102 |
sources=["upload", "microphone"],
|
103 |
type='filepath')
|
104 |
url_input = gr.Textbox(label="alternatively, you can paste in an audio file URL:")
|
105 |
gr.Markdown(" \n ") # fixes the bottom overflow from Audio
|
106 |
+
generate_button = gr.Button("Run")
|
107 |
with gr.Column(scale=1):
|
108 |
+
output_audio = gr.Audio(label="Result")
|
109 |
|
110 |
with gr.Column():
|
111 |
+
gr.Markdown("### Examples:")
|
112 |
gr.Examples(
|
113 |
examples=text_examples,
|
114 |
inputs=[text_input, url_input],
|