tanbw commited on
Commit
ed727af
1 Parent(s): 1423e41

Update webui.py

Browse files
Files changed (1) hide show
  1. webui.py +13 -12
webui.py CHANGED
@@ -66,6 +66,7 @@ def change_instruction(mode_checkbox_group):
66
  @spaces.GPU
67
  def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, prompt_wav_upload, prompt_wav_record, instruct_text,
68
  seed, stream, speed):
 
69
  if prompt_wav_upload is not None:
70
  prompt_wav = prompt_wav_upload
71
  elif prompt_wav_record is not None:
@@ -76,31 +77,31 @@ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, pro
76
  if mode_checkbox_group in ['自然语言控制']:
77
  if get_cosyvoice().frontend.instruct is False:
78
  gr.Warning('您正在使用自然语言控制模式, {}模型不支持此模式, 请使用iic/CosyVoice-300M-Instruct模型'.format(args.model_dir))
79
- yield (target_sr, default_data)
80
  if instruct_text == '':
81
  gr.Warning('您正在使用自然语言控制模式, 请输入instruct文本')
82
- yield (target_sr, default_data)
83
  if prompt_wav is not None or prompt_text != '':
84
  gr.Info('您正在使用自然语言控制模式, prompt音频/prompt文本会被忽略')
85
  # if cross_lingual mode, please make sure that model is iic/CosyVoice-300M and tts_text prompt_text are different language
86
  if mode_checkbox_group in ['跨语种复刻']:
87
  if get_cosyvoice().frontend.instruct is True:
88
  gr.Warning('您正在使用跨语种复刻模式, {}模型不支持此模式, 请使用iic/CosyVoice-300M模型'.format(args.model_dir))
89
- yield (target_sr, default_data)
90
  if instruct_text != '':
91
  gr.Info('您正在使用跨语种复刻模式, instruct文本会被忽略')
92
  if prompt_wav is None:
93
  gr.Warning('您正在使用跨语种复刻模式, 请提供prompt音频')
94
- yield (target_sr, default_data)
95
  gr.Info('您正在使用跨语种复刻模式, 请确保合成文本和prompt文本为不同语言')
96
  # if in zero_shot cross_lingual, please make sure that prompt_text and prompt_wav meets requirements
97
  if mode_checkbox_group in ['3s极速复刻', '跨语种复刻']:
98
  if prompt_wav is None:
99
  gr.Warning('prompt音频为空,您是否忘记输入prompt音频?')
100
- yield (target_sr, default_data)
101
  if torchaudio.info(prompt_wav).sample_rate < prompt_sr:
102
  gr.Warning('prompt音频采样率{}低于{}'.format(torchaudio.info(prompt_wav).sample_rate, prompt_sr))
103
- yield (target_sr, default_data)
104
  # sft mode only use sft_dropdown
105
  if mode_checkbox_group in ['预训练音色']:
106
  if instruct_text != '' or prompt_wav is not None or prompt_text != '':
@@ -109,7 +110,7 @@ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, pro
109
  if mode_checkbox_group in ['3s极速复刻']:
110
  if prompt_text == '':
111
  gr.Warning('prompt文本为空,您是否忘记输入prompt文本?')
112
- yield (target_sr, default_data)
113
  if instruct_text != '':
114
  gr.Info('您正在使用3s极速复刻模式,预训练音色/instruct文本会被忽略!')
115
 
@@ -117,24 +118,24 @@ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, pro
117
  logging.info('get sft inference request')
118
  set_all_random_seed(seed)
119
  for i in get_cosyvoice().inference_sft(tts_text, sft_dropdown, stream=stream, speed=speed):
120
- yield (target_sr, i['tts_speech'].numpy().flatten())
121
  elif mode_checkbox_group == '3s极速复刻':
122
  logging.info('get zero_shot inference request')
123
  prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
124
  set_all_random_seed(seed)
125
  for i in get_cosyvoice().inference_zero_shot(tts_text, prompt_text, prompt_speech_16k, stream=stream, speed=speed):
126
- yield (target_sr, i['tts_speech'].numpy().flatten())
127
  elif mode_checkbox_group == '跨语种复刻':
128
  logging.info('get cross_lingual inference request')
129
  prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
130
  set_all_random_seed(seed)
131
  for i in get_cosyvoice().inference_cross_lingual(tts_text, prompt_speech_16k, stream=stream, speed=speed):
132
- yield (target_sr, i['tts_speech'].numpy().flatten())
133
  else:
134
  logging.info('get instruct inference request')
135
  set_all_random_seed(seed)
136
  for i in get_cosyvoice().inference_instruct(tts_text, sft_dropdown, instruct_text, stream=stream, speed=speed):
137
- yield (target_sr, i['tts_speech'].numpy().flatten())
138
 
139
  # SDK模型下载
140
  import platform
@@ -214,7 +215,7 @@ with gr.Blocks() as demo:
214
 
215
  generate_button = gr.Button("生成音频")
216
 
217
- audio_output = gr.Audio(label="合成音频", autoplay=True, streaming=True)
218
 
219
  seed_button.click(generate_seed, inputs=[], outputs=seed)
220
  generate_button.click(generate_audio,
 
66
  @spaces.GPU
67
  def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, prompt_wav_upload, prompt_wav_record, instruct_text,
68
  seed, stream, speed):
69
+ stream=False
70
  if prompt_wav_upload is not None:
71
  prompt_wav = prompt_wav_upload
72
  elif prompt_wav_record is not None:
 
77
  if mode_checkbox_group in ['自然语言控制']:
78
  if get_cosyvoice().frontend.instruct is False:
79
  gr.Warning('您正在使用自然语言控制模式, {}模型不支持此模式, 请使用iic/CosyVoice-300M-Instruct模型'.format(args.model_dir))
80
+ return (target_sr, default_data)
81
  if instruct_text == '':
82
  gr.Warning('您正在使用自然语言控制模式, 请输入instruct文本')
83
+ return (target_sr, default_data)
84
  if prompt_wav is not None or prompt_text != '':
85
  gr.Info('您正在使用自然语言控制模式, prompt音频/prompt文本会被忽略')
86
  # if cross_lingual mode, please make sure that model is iic/CosyVoice-300M and tts_text prompt_text are different language
87
  if mode_checkbox_group in ['跨语种复刻']:
88
  if get_cosyvoice().frontend.instruct is True:
89
  gr.Warning('您正在使用跨语种复刻模式, {}模型不支持此模式, 请使用iic/CosyVoice-300M模型'.format(args.model_dir))
90
+ return (target_sr, default_data)
91
  if instruct_text != '':
92
  gr.Info('您正在使用跨语种复刻模式, instruct文本会被忽略')
93
  if prompt_wav is None:
94
  gr.Warning('您正在使用跨语种复刻模式, 请提供prompt音频')
95
+ return (target_sr, default_data)
96
  gr.Info('您正在使用跨语种复刻模式, 请确保合成文本和prompt文本为不同语言')
97
  # if in zero_shot cross_lingual, please make sure that prompt_text and prompt_wav meets requirements
98
  if mode_checkbox_group in ['3s极速复刻', '跨语种复刻']:
99
  if prompt_wav is None:
100
  gr.Warning('prompt音频为空,您是否忘记输入prompt音频?')
101
+ return (target_sr, default_data)
102
  if torchaudio.info(prompt_wav).sample_rate < prompt_sr:
103
  gr.Warning('prompt音频采样率{}低于{}'.format(torchaudio.info(prompt_wav).sample_rate, prompt_sr))
104
+ return (target_sr, default_data)
105
  # sft mode only use sft_dropdown
106
  if mode_checkbox_group in ['预训练音色']:
107
  if instruct_text != '' or prompt_wav is not None or prompt_text != '':
 
110
  if mode_checkbox_group in ['3s极速复刻']:
111
  if prompt_text == '':
112
  gr.Warning('prompt文本为空,您是否忘记输入prompt文本?')
113
+ return (target_sr, default_data)
114
  if instruct_text != '':
115
  gr.Info('您正在使用3s极速复刻模式,预训练音色/instruct文本会被忽略!')
116
 
 
118
  logging.info('get sft inference request')
119
  set_all_random_seed(seed)
120
  for i in get_cosyvoice().inference_sft(tts_text, sft_dropdown, stream=stream, speed=speed):
121
+ return (target_sr, i['tts_speech'].numpy().flatten())
122
  elif mode_checkbox_group == '3s极速复刻':
123
  logging.info('get zero_shot inference request')
124
  prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
125
  set_all_random_seed(seed)
126
  for i in get_cosyvoice().inference_zero_shot(tts_text, prompt_text, prompt_speech_16k, stream=stream, speed=speed):
127
+ return (target_sr, i['tts_speech'].numpy().flatten())
128
  elif mode_checkbox_group == '跨语种复刻':
129
  logging.info('get cross_lingual inference request')
130
  prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
131
  set_all_random_seed(seed)
132
  for i in get_cosyvoice().inference_cross_lingual(tts_text, prompt_speech_16k, stream=stream, speed=speed):
133
+ return (target_sr, i['tts_speech'].numpy().flatten())
134
  else:
135
  logging.info('get instruct inference request')
136
  set_all_random_seed(seed)
137
  for i in get_cosyvoice().inference_instruct(tts_text, sft_dropdown, instruct_text, stream=stream, speed=speed):
138
+ return (target_sr, i['tts_speech'].numpy().flatten())
139
 
140
  # SDK模型下载
141
  import platform
 
215
 
216
  generate_button = gr.Button("生成音频")
217
 
218
+ audio_output = gr.Audio(label="合成音频", autoplay=True, streaming=False)
219
 
220
  seed_button.click(generate_seed, inputs=[], outputs=seed)
221
  generate_button.click(generate_audio,