tanbw commited on
Commit
93215c8
1 Parent(s): 0e16d91

Update webui.py

Browse files
Files changed (1) hide show
  1. webui.py +16 -6
webui.py CHANGED
@@ -64,6 +64,7 @@ def change_instruction(mode_checkbox_group):
64
  def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, prompt_wav_upload, prompt_wav_record, instruct_text,
65
  seed, stream, speed):
66
  stream=False
 
67
  if prompt_wav_upload is not None:
68
  prompt_wav = prompt_wav_upload
69
  elif prompt_wav_record is not None:
@@ -111,28 +112,36 @@ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, pro
111
  if instruct_text != '':
112
  gr.Info('您正在使用3s极速复刻模式,预训练音色/instruct文本会被忽略!')
113
 
 
 
114
  if mode_checkbox_group == '预训练音色':
115
  logging.info('get sft inference request')
116
  set_all_random_seed(seed)
117
  for i in get_cosyvoice().inference_sft(tts_text, sft_dropdown, stream=stream, speed=speed):
118
- return (target_sr, i['tts_speech'].numpy().flatten())
119
  elif mode_checkbox_group == '3s极速复刻':
120
  logging.info('get zero_shot inference request')
121
  prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
122
  set_all_random_seed(seed)
123
  for i in get_cosyvoice().inference_zero_shot(tts_text, prompt_text, prompt_speech_16k, stream=stream, speed=speed):
124
- return (target_sr, i['tts_speech'].numpy().flatten())
125
  elif mode_checkbox_group == '跨语种复刻':
126
  logging.info('get cross_lingual inference request')
127
  prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
128
  set_all_random_seed(seed)
129
  for i in get_cosyvoice().inference_cross_lingual(tts_text, prompt_speech_16k, stream=stream, speed=speed):
130
- return (target_sr, i['tts_speech'].numpy().flatten())
131
  else:
132
  logging.info('get instruct inference request')
133
  set_all_random_seed(seed)
134
  for i in get_cosyvoice().inference_instruct(tts_text, sft_dropdown, instruct_text, stream=stream, speed=speed):
135
- return (target_sr, i['tts_speech'].numpy().flatten())
 
 
 
 
 
 
136
 
137
  # SDK模型下载
138
  import platform
@@ -171,8 +180,9 @@ def get_cosyvoice():
171
  with cosyvoice_lock:
172
  if cosyvoice_instance is not None:
173
  return cosyvoice_instance
174
- cosyvoice_instance=CosyVoice(model_dir)
175
- return cosyvoice_instance
 
176
 
177
  def load_sft_options():
178
  sound_choices=get_cosyvoice().list_avaliable_spks()
 
64
  def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, prompt_wav_upload, prompt_wav_record, instruct_text,
65
  seed, stream, speed):
66
  stream=False
67
+ global cosyvoice_instance, model_dir
68
  if prompt_wav_upload is not None:
69
  prompt_wav = prompt_wav_upload
70
  elif prompt_wav_record is not None:
 
112
  if instruct_text != '':
113
  gr.Info('您正在使用3s极速复刻模式,预训练音色/instruct文本会被忽略!')
114
 
115
+ audio_data_list = []
116
+
117
  if mode_checkbox_group == '预训练音色':
118
  logging.info('get sft inference request')
119
  set_all_random_seed(seed)
120
  for i in get_cosyvoice().inference_sft(tts_text, sft_dropdown, stream=stream, speed=speed):
121
+ audio_data_list.append(i['tts_speech'].numpy().flatten())
122
  elif mode_checkbox_group == '3s极速复刻':
123
  logging.info('get zero_shot inference request')
124
  prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
125
  set_all_random_seed(seed)
126
  for i in get_cosyvoice().inference_zero_shot(tts_text, prompt_text, prompt_speech_16k, stream=stream, speed=speed):
127
+ audio_data_list.append(i['tts_speech'].numpy().flatten())
128
  elif mode_checkbox_group == '跨语种复刻':
129
  logging.info('get cross_lingual inference request')
130
  prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
131
  set_all_random_seed(seed)
132
  for i in get_cosyvoice().inference_cross_lingual(tts_text, prompt_speech_16k, stream=stream, speed=speed):
133
+ audio_data_list.append(i['tts_speech'].numpy().flatten())
134
  else:
135
  logging.info('get instruct inference request')
136
  set_all_random_seed(seed)
137
  for i in get_cosyvoice().inference_instruct(tts_text, sft_dropdown, instruct_text, stream=stream, speed=speed):
138
+ audio_data_list.append(i['tts_speech'].numpy().flatten())
139
+
140
+ # 将所有的音频数据拼接起来
141
+ concatenated_audio_data = np.concatenate(audio_data_list)
142
+
143
+ # 返回拼接后的音频数据和目标采样率
144
+ return (target_sr, concatenated_audio_data)
145
 
146
  # SDK模型下载
147
  import platform
 
180
  with cosyvoice_lock:
181
  if cosyvoice_instance is not None:
182
  return cosyvoice_instance
183
+ else:
184
+ cosyvoice_instance=CosyVoice(model_dir)
185
+ return cosyvoice_instance
186
 
187
  def load_sft_options():
188
  sound_choices=get_cosyvoice().list_avaliable_spks()