Spaces:
Running
on
Zero
Running
on
Zero
Add voice selector
Browse files
app.py
CHANGED
@@ -104,11 +104,14 @@ def generate_response(messages, model, tokenizer):
|
|
104 |
|
105 |
@gpu_decorator
|
106 |
def infer(
|
107 |
-
ref_audio_orig, ref_text, gen_text, model, remove_silence, cross_fade_duration=0.15, speed=0.8, show_info=gr.Info
|
108 |
):
|
109 |
# ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=show_info)
|
110 |
-
|
111 |
-
|
|
|
|
|
|
|
112 |
|
113 |
if model == "F5-TTS":
|
114 |
ema_model = F5TTS_ema_model
|
@@ -165,7 +168,8 @@ with gr.Blocks() as app_credits:
|
|
165 |
""")
|
166 |
with gr.Blocks() as app_tts:
|
167 |
gr.Markdown("# Batched TTS")
|
168 |
-
|
|
|
169 |
gen_text_input = gr.Textbox(label="Text to Generate", lines=10, value=Path("./sample.txt").read_text())
|
170 |
generate_btn = gr.Button("Synthesize", variant="primary")
|
171 |
with gr.Accordion("Advanced Settings", open=False):
|
@@ -174,7 +178,7 @@ with gr.Blocks() as app_tts:
|
|
174 |
info="Leave blank to automatically transcribe the reference audio. If you enter text it will override automatic transcription.",
|
175 |
lines=2,
|
176 |
visible=False,
|
177 |
-
value=Path("./
|
178 |
)
|
179 |
remove_silence = gr.Checkbox(
|
180 |
label="Remove Silences",
|
@@ -203,6 +207,7 @@ with gr.Blocks() as app_tts:
|
|
203 |
|
204 |
@gpu_decorator
|
205 |
def basic_tts(
|
|
|
206 |
ref_audio_input,
|
207 |
ref_text_input,
|
208 |
gen_text_input,
|
@@ -211,6 +216,7 @@ with gr.Blocks() as app_tts:
|
|
211 |
speed_slider,
|
212 |
):
|
213 |
audio_out, spectrogram_path, ref_text_out = infer(
|
|
|
214 |
ref_audio_input,
|
215 |
ref_text_input,
|
216 |
gen_text_input,
|
@@ -224,6 +230,7 @@ with gr.Blocks() as app_tts:
|
|
224 |
generate_btn.click(
|
225 |
basic_tts,
|
226 |
inputs=[
|
|
|
227 |
ref_audio_input,
|
228 |
ref_text_input,
|
229 |
gen_text_input,
|
|
|
104 |
|
105 |
@gpu_decorator
|
106 |
def infer(
|
107 |
+
ref_voice, ref_audio_orig, ref_text, gen_text, model, remove_silence, cross_fade_duration=0.15, speed=0.8, show_info=gr.Info
|
108 |
):
|
109 |
# ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=show_info)
|
110 |
+
if ref_voice:
|
111 |
+
ref_audio = Path(f"./ref_{str(voice).lower()}.wav").read_bytes()
|
112 |
+
ref_text = Path(f"./ref_{str(voice).lower()}.txt").read_text()
|
113 |
+
else:
|
114 |
+
ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=show_info)
|
115 |
|
116 |
if model == "F5-TTS":
|
117 |
ema_model = F5TTS_ema_model
|
|
|
168 |
""")
|
169 |
with gr.Blocks() as app_tts:
|
170 |
gr.Markdown("# Batched TTS")
|
171 |
+
ref_voice = gr.Radio(["Female", "Male"], label="Voice", info="Reference voice for inference")
|
172 |
+
ref_audio_input = gr.Audio(label="Reference Audio", type="filepath", visible=False, value=Path("./ref_female.wav"))
|
173 |
gen_text_input = gr.Textbox(label="Text to Generate", lines=10, value=Path("./sample.txt").read_text())
|
174 |
generate_btn = gr.Button("Synthesize", variant="primary")
|
175 |
with gr.Accordion("Advanced Settings", open=False):
|
|
|
178 |
info="Leave blank to automatically transcribe the reference audio. If you enter text it will override automatic transcription.",
|
179 |
lines=2,
|
180 |
visible=False,
|
181 |
+
value=Path("./ref_female.txt").read_text()
|
182 |
)
|
183 |
remove_silence = gr.Checkbox(
|
184 |
label="Remove Silences",
|
|
|
207 |
|
208 |
@gpu_decorator
|
209 |
def basic_tts(
|
210 |
+
ref_voice,
|
211 |
ref_audio_input,
|
212 |
ref_text_input,
|
213 |
gen_text_input,
|
|
|
216 |
speed_slider,
|
217 |
):
|
218 |
audio_out, spectrogram_path, ref_text_out = infer(
|
219 |
+
ref_voice,
|
220 |
ref_audio_input,
|
221 |
ref_text_input,
|
222 |
gen_text_input,
|
|
|
230 |
generate_btn.click(
|
231 |
basic_tts,
|
232 |
inputs=[
|
233 |
+
ref_voice,
|
234 |
ref_audio_input,
|
235 |
ref_text_input,
|
236 |
gen_text_input,
|