versae commited on
Commit
24ac2a0
·
1 Parent(s): 34dd2ff

Add voice selector

Browse files
Files changed (1) hide show
  1. app.py +12 -5
app.py CHANGED
@@ -104,11 +104,14 @@ def generate_response(messages, model, tokenizer):
104
 
105
  @gpu_decorator
106
  def infer(
107
- ref_audio_orig, ref_text, gen_text, model, remove_silence, cross_fade_duration=0.15, speed=0.8, show_info=gr.Info
108
  ):
109
  # ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=show_info)
110
- ref_audio = Path("./ref_male.wav").read_bytes()
111
- ref_text = Path("./ref_male.txt").read_text()
 
 
 
112
 
113
  if model == "F5-TTS":
114
  ema_model = F5TTS_ema_model
@@ -165,7 +168,8 @@ with gr.Blocks() as app_credits:
165
  """)
166
  with gr.Blocks() as app_tts:
167
  gr.Markdown("# Batched TTS")
168
- ref_audio_input = gr.Audio(label="Reference Audio", type="filepath", visible=False, value=Path("./ref_male.wav"))
 
169
  gen_text_input = gr.Textbox(label="Text to Generate", lines=10, value=Path("./sample.txt").read_text())
170
  generate_btn = gr.Button("Synthesize", variant="primary")
171
  with gr.Accordion("Advanced Settings", open=False):
@@ -174,7 +178,7 @@ with gr.Blocks() as app_tts:
174
  info="Leave blank to automatically transcribe the reference audio. If you enter text it will override automatic transcription.",
175
  lines=2,
176
  visible=False,
177
- value=Path("./ref_male.txt").read_text()
178
  )
179
  remove_silence = gr.Checkbox(
180
  label="Remove Silences",
@@ -203,6 +207,7 @@ with gr.Blocks() as app_tts:
203
 
204
  @gpu_decorator
205
  def basic_tts(
 
206
  ref_audio_input,
207
  ref_text_input,
208
  gen_text_input,
@@ -211,6 +216,7 @@ with gr.Blocks() as app_tts:
211
  speed_slider,
212
  ):
213
  audio_out, spectrogram_path, ref_text_out = infer(
 
214
  ref_audio_input,
215
  ref_text_input,
216
  gen_text_input,
@@ -224,6 +230,7 @@ with gr.Blocks() as app_tts:
224
  generate_btn.click(
225
  basic_tts,
226
  inputs=[
 
227
  ref_audio_input,
228
  ref_text_input,
229
  gen_text_input,
 
104
 
105
  @gpu_decorator
106
  def infer(
107
+ ref_voice, ref_audio_orig, ref_text, gen_text, model, remove_silence, cross_fade_duration=0.15, speed=0.8, show_info=gr.Info
108
  ):
109
  # ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=show_info)
110
+ if ref_voice:
111
+ ref_audio = Path(f"./ref_{str(voice).lower()}.wav").read_bytes()
112
+ ref_text = Path(f"./ref_{str(voice).lower()}.txt").read_text()
113
+ else:
114
+ ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=show_info)
115
 
116
  if model == "F5-TTS":
117
  ema_model = F5TTS_ema_model
 
168
  """)
169
  with gr.Blocks() as app_tts:
170
  gr.Markdown("# Batched TTS")
171
+ ref_voice = gr.Radio(["Female", "Male"], label="Voice", info="Reference voice for inference")
172
+ ref_audio_input = gr.Audio(label="Reference Audio", type="filepath", visible=False, value=Path("./ref_female.wav"))
173
  gen_text_input = gr.Textbox(label="Text to Generate", lines=10, value=Path("./sample.txt").read_text())
174
  generate_btn = gr.Button("Synthesize", variant="primary")
175
  with gr.Accordion("Advanced Settings", open=False):
 
178
  info="Leave blank to automatically transcribe the reference audio. If you enter text it will override automatic transcription.",
179
  lines=2,
180
  visible=False,
181
+ value=Path("./ref_female.txt").read_text()
182
  )
183
  remove_silence = gr.Checkbox(
184
  label="Remove Silences",
 
207
 
208
  @gpu_decorator
209
  def basic_tts(
210
+ ref_voice,
211
  ref_audio_input,
212
  ref_text_input,
213
  gen_text_input,
 
216
  speed_slider,
217
  ):
218
  audio_out, spectrogram_path, ref_text_out = infer(
219
+ ref_voice,
220
  ref_audio_input,
221
  ref_text_input,
222
  gen_text_input,
 
230
  generate_btn.click(
231
  basic_tts,
232
  inputs=[
233
+ ref_voice,
234
  ref_audio_input,
235
  ref_text_input,
236
  gen_text_input,