Spaces:
Running
Running
poemsforaphrodite
commited on
Commit
•
8c070ea
1
Parent(s):
61f84ab
Upload openvoice_app.py with huggingface_hub
Browse files- openvoice_app.py +10 -26
openvoice_app.py
CHANGED
@@ -34,21 +34,15 @@ zh_source_se = torch.load(f'{zh_ckpt_base}/zh_default_se.pth').to(device)
|
|
34 |
# This online demo mainly supports English and Chinese
|
35 |
supported_languages = ['zh', 'en']
|
36 |
|
37 |
-
def predict(prompt, style, audio_file_pth
|
38 |
# initialize a empty info
|
39 |
text_hint = ''
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
gr.Warning("Please accept the Terms & Condition!")
|
44 |
-
return (
|
45 |
-
text_hint,
|
46 |
-
None,
|
47 |
-
None,
|
48 |
-
)
|
49 |
|
50 |
# first detect the input language
|
51 |
-
language_predicted = langid.classify(prompt)[0].strip()
|
52 |
print(f"Detected language:{language_predicted}")
|
53 |
|
54 |
if language_predicted not in supported_languages:
|
@@ -116,6 +110,8 @@ def predict(prompt, style, audio_file_pth, agree):
|
|
116 |
# note diffusion_conditioning not used on hifigan (default mode), it will be empty but need to pass it to model.inference
|
117 |
try:
|
118 |
target_se, audio_name = se_extractor.get_se(speaker_wav, tone_color_converter, target_dir='processed', vad=True)
|
|
|
|
|
119 |
except Exception as e:
|
120 |
text_hint += f"[ERROR] Get target tone color error {str(e)} \n"
|
121 |
gr.Warning(
|
@@ -159,18 +155,15 @@ examples = [
|
|
159 |
"今天天气真好,我们一起出去吃饭吧。",
|
160 |
'default',
|
161 |
"resources/demo_speaker1.mp3",
|
162 |
-
True,
|
163 |
],[
|
164 |
"This audio is generated by open voice with a half-performance model.",
|
165 |
'whispering',
|
166 |
"resources/demo_speaker2.mp3",
|
167 |
-
True,
|
168 |
],
|
169 |
[
|
170 |
"He hoped there would be stew for dinner, turnips and carrots and bruised potatoes and fat mutton pieces to be ladled out in thick, peppered, flour-fattened sauce.",
|
171 |
'sad',
|
172 |
"resources/demo_speaker0.mp3",
|
173 |
-
True,
|
174 |
],
|
175 |
]
|
176 |
|
@@ -185,19 +178,16 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
185 |
style_gr = gr.Dropdown(
|
186 |
label="Style",
|
187 |
choices=['default', 'whispering', 'cheerful', 'terrified', 'angry', 'sad', 'friendly'],
|
|
|
188 |
max_choices=1,
|
189 |
value="default",
|
190 |
)
|
191 |
ref_gr = gr.Audio(
|
192 |
label="Reference Audio",
|
|
|
193 |
type="filepath",
|
194 |
value="resources/demo_speaker2.mp3",
|
195 |
)
|
196 |
-
tos_gr = gr.Checkbox(
|
197 |
-
label="Agree",
|
198 |
-
value=False,
|
199 |
-
info="I agree to the terms of the cc-by-nc-4.0 license-: https://github.com/myshell-ai/OpenVoice/blob/main/LICENSE",
|
200 |
-
)
|
201 |
|
202 |
tts_button = gr.Button("Send", elem_id="send-btn", visible=True)
|
203 |
|
@@ -207,13 +197,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
207 |
audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
|
208 |
ref_audio_gr = gr.Audio(label="Reference Audio Used")
|
209 |
|
210 |
-
|
211 |
-
label="Examples",
|
212 |
-
inputs=[input_text_gr, style_gr, ref_gr, tos_gr],
|
213 |
-
outputs=[out_text_gr, audio_gr, ref_audio_gr],
|
214 |
-
fn=predict,
|
215 |
-
cache_examples=False,)
|
216 |
-
tts_button.click(predict, [input_text_gr, style_gr, ref_gr, tos_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])
|
217 |
|
218 |
demo.queue()
|
219 |
demo.launch(debug=True, show_api=True, share=True)
|
|
|
34 |
# This online demo mainly supports English and Chinese
|
35 |
supported_languages = ['zh', 'en']
|
36 |
|
37 |
+
def predict(prompt, style, audio_file_pth):
|
38 |
# initialize a empty info
|
39 |
text_hint = ''
|
40 |
+
|
41 |
+
# set agree to True by default
|
42 |
+
agree = True
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
# first detect the input language
|
45 |
+
language_predicted = langid.classify(prompt)[0].strip()
|
46 |
print(f"Detected language:{language_predicted}")
|
47 |
|
48 |
if language_predicted not in supported_languages:
|
|
|
110 |
# note diffusion_conditioning not used on hifigan (default mode), it will be empty but need to pass it to model.inference
|
111 |
try:
|
112 |
target_se, audio_name = se_extractor.get_se(speaker_wav, tone_color_converter, target_dir='processed', vad=True)
|
113 |
+
# base_speaker = f"{output_dir}/openai_source_output.mp3"
|
114 |
+
# source_se, audio_name = se_extractor.get_se(base_speaker, tone_color_converter, vad=True)
|
115 |
except Exception as e:
|
116 |
text_hint += f"[ERROR] Get target tone color error {str(e)} \n"
|
117 |
gr.Warning(
|
|
|
155 |
"今天天气真好,我们一起出去吃饭吧。",
|
156 |
'default',
|
157 |
"resources/demo_speaker1.mp3",
|
|
|
158 |
],[
|
159 |
"This audio is generated by open voice with a half-performance model.",
|
160 |
'whispering',
|
161 |
"resources/demo_speaker2.mp3",
|
|
|
162 |
],
|
163 |
[
|
164 |
"He hoped there would be stew for dinner, turnips and carrots and bruised potatoes and fat mutton pieces to be ladled out in thick, peppered, flour-fattened sauce.",
|
165 |
'sad',
|
166 |
"resources/demo_speaker0.mp3",
|
|
|
167 |
],
|
168 |
]
|
169 |
|
|
|
178 |
style_gr = gr.Dropdown(
|
179 |
label="Style",
|
180 |
choices=['default', 'whispering', 'cheerful', 'terrified', 'angry', 'sad', 'friendly'],
|
181 |
+
info="Please upload a reference audio file, it should be 1 minute long and clear.",
|
182 |
max_choices=1,
|
183 |
value="default",
|
184 |
)
|
185 |
ref_gr = gr.Audio(
|
186 |
label="Reference Audio",
|
187 |
+
|
188 |
type="filepath",
|
189 |
value="resources/demo_speaker2.mp3",
|
190 |
)
|
|
|
|
|
|
|
|
|
|
|
191 |
|
192 |
tts_button = gr.Button("Send", elem_id="send-btn", visible=True)
|
193 |
|
|
|
197 |
audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
|
198 |
ref_audio_gr = gr.Audio(label="Reference Audio Used")
|
199 |
|
200 |
+
tts_button.click(predict, [input_text_gr, style_gr, ref_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
demo.queue()
|
203 |
demo.launch(debug=True, show_api=True, share=True)
|