刘悦 commited on
Commit
3ee6e19
1 Parent(s): ead9644

Update webui.py

Browse files

add speed_factor

Files changed (1) hide show
  1. webui.py +14 -5
webui.py CHANGED
@@ -28,7 +28,7 @@ import logging
28
  logging.getLogger('matplotlib').setLevel(logging.WARNING)
29
 
30
  from cosyvoice.cli.cosyvoice import CosyVoice
31
- from cosyvoice.utils.file_utils import load_wav
32
 
33
  logging.basicConfig(level=logging.DEBUG,
34
  format='%(asctime)s %(levelname)s %(message)s')
@@ -66,7 +66,7 @@ instruct_dict = {'预训练音色': '1. 选择预训练音色\n2. 点击生成
66
  def change_instruction(mode_checkbox_group):
67
  return instruct_dict[mode_checkbox_group]
68
 
69
- def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, prompt_wav_upload, prompt_wav_record, instruct_text, seed):
70
  if prompt_wav_upload is not None:
71
  prompt_wav = prompt_wav_upload
72
  elif prompt_wav_record is not None:
@@ -132,7 +132,16 @@ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, pro
132
  logging.info('get instruct inference request')
133
  set_all_random_seed(seed)
134
  output = cosyvoice.inference_instruct(tts_text, sft_dropdown, instruct_text)
135
- audio_data = output['tts_speech'].numpy().flatten()
 
 
 
 
 
 
 
 
 
136
  return (target_sr, audio_data)
137
 
138
  def main():
@@ -141,7 +150,7 @@ def main():
141
  gr.Markdown("#### 请输入需要合成的文本,选择推理模式,并按照提示步骤进行操作")
142
 
143
  tts_text = gr.Textbox(label="输入合成文本", lines=1, value="我是通义实验室语音团队全新推出的生成式语音大模型,提供舒适自然的语音合成能力。")
144
-
145
  with gr.Row():
146
  mode_checkbox_group = gr.Radio(choices=inference_mode_list, label='选择推理模式', value=inference_mode_list[0])
147
  instruction_text = gr.Text(label="操作步骤", value=instruct_dict[inference_mode_list[0]], scale=0.5)
@@ -162,7 +171,7 @@ def main():
162
 
163
  seed_button.click(generate_seed, inputs=[], outputs=seed)
164
  generate_button.click(generate_audio,
165
- inputs=[tts_text, mode_checkbox_group, sft_dropdown, prompt_text, prompt_wav_upload, prompt_wav_record, instruct_text, seed],
166
  outputs=[audio_output])
167
  mode_checkbox_group.change(fn=change_instruction, inputs=[mode_checkbox_group], outputs=[instruction_text])
168
  demo.queue(max_size=4, default_concurrency_limit=2)
 
28
  logging.getLogger('matplotlib').setLevel(logging.WARNING)
29
 
30
  from cosyvoice.cli.cosyvoice import CosyVoice
31
+ from cosyvoice.utils.file_utils import load_wav,speed_change
32
 
33
  logging.basicConfig(level=logging.DEBUG,
34
  format='%(asctime)s %(levelname)s %(message)s')
 
66
  def change_instruction(mode_checkbox_group):
67
  return instruct_dict[mode_checkbox_group]
68
 
69
+ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, prompt_wav_upload, prompt_wav_record, instruct_text, seed,speed_factor):
70
  if prompt_wav_upload is not None:
71
  prompt_wav = prompt_wav_upload
72
  elif prompt_wav_record is not None:
 
132
  logging.info('get instruct inference request')
133
  set_all_random_seed(seed)
134
  output = cosyvoice.inference_instruct(tts_text, sft_dropdown, instruct_text)
135
+
136
+ if speed_factor != 1.0:
137
+ try:
138
+ audio_data , sample_rate = speed_change(output["tts_speech"],target_sr,str(speed_factor))
139
+ audio_data = audio_data.numpy().flatten()
140
+ except Exception as e:
141
+ print(f"Failed to change speed of audio: \n{e}")
142
+ else:
143
+ audio_data = output['tts_speech'].numpy().flatten()
144
+
145
  return (target_sr, audio_data)
146
 
147
  def main():
 
150
  gr.Markdown("#### 请输入需要合成的文本,选择推理模式,并按照提示步骤进行操作")
151
 
152
  tts_text = gr.Textbox(label="输入合成文本", lines=1, value="我是通义实验室语音团队全新推出的生成式语音大模型,提供舒适自然的语音合成能力。")
153
+ speed_factor = gr.Slider(minimum=0.25,maximum=4,step=0.05,label="语速调节",value=1.0,interactive=True)
154
  with gr.Row():
155
  mode_checkbox_group = gr.Radio(choices=inference_mode_list, label='选择推理模式', value=inference_mode_list[0])
156
  instruction_text = gr.Text(label="操作步骤", value=instruct_dict[inference_mode_list[0]], scale=0.5)
 
171
 
172
  seed_button.click(generate_seed, inputs=[], outputs=seed)
173
  generate_button.click(generate_audio,
174
+ inputs=[tts_text, mode_checkbox_group, sft_dropdown, prompt_text, prompt_wav_upload, prompt_wav_record, instruct_text, seed,speed_factor],
175
  outputs=[audio_output])
176
  mode_checkbox_group.change(fn=change_instruction, inputs=[mode_checkbox_group], outputs=[instruction_text])
177
  demo.queue(max_size=4, default_concurrency_limit=2)