Spaces:
Running
on
Zero
Running
on
Zero
Update webui.py
Browse files
webui.py
CHANGED
@@ -66,6 +66,7 @@ def change_instruction(mode_checkbox_group):
|
|
66 |
@spaces.GPU
|
67 |
def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, prompt_wav_upload, prompt_wav_record, instruct_text,
|
68 |
seed, stream, speed):
|
|
|
69 |
if prompt_wav_upload is not None:
|
70 |
prompt_wav = prompt_wav_upload
|
71 |
elif prompt_wav_record is not None:
|
@@ -76,31 +77,31 @@ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, pro
|
|
76 |
if mode_checkbox_group in ['自然语言控制']:
|
77 |
if get_cosyvoice().frontend.instruct is False:
|
78 |
gr.Warning('您正在使用自然语言控制模式, {}模型不支持此模式, 请使用iic/CosyVoice-300M-Instruct模型'.format(args.model_dir))
|
79 |
-
|
80 |
if instruct_text == '':
|
81 |
gr.Warning('您正在使用自然语言控制模式, 请输入instruct文本')
|
82 |
-
|
83 |
if prompt_wav is not None or prompt_text != '':
|
84 |
gr.Info('您正在使用自然语言控制模式, prompt音频/prompt文本会被忽略')
|
85 |
# if cross_lingual mode, please make sure that model is iic/CosyVoice-300M and tts_text prompt_text are different language
|
86 |
if mode_checkbox_group in ['跨语种复刻']:
|
87 |
if get_cosyvoice().frontend.instruct is True:
|
88 |
gr.Warning('您正在使用跨语种复刻模式, {}模型不支持此模式, 请使用iic/CosyVoice-300M模型'.format(args.model_dir))
|
89 |
-
|
90 |
if instruct_text != '':
|
91 |
gr.Info('您正在使用跨语种复刻模式, instruct文本会被忽略')
|
92 |
if prompt_wav is None:
|
93 |
gr.Warning('您正在使用跨语种复刻模式, 请提供prompt音频')
|
94 |
-
|
95 |
gr.Info('您正在使用跨语种复刻模式, 请确保合成文本和prompt文本为不同语言')
|
96 |
# if in zero_shot cross_lingual, please make sure that prompt_text and prompt_wav meets requirements
|
97 |
if mode_checkbox_group in ['3s极速复刻', '跨语种复刻']:
|
98 |
if prompt_wav is None:
|
99 |
gr.Warning('prompt音频为空,您是否忘记输入prompt音频?')
|
100 |
-
|
101 |
if torchaudio.info(prompt_wav).sample_rate < prompt_sr:
|
102 |
gr.Warning('prompt音频采样率{}低于{}'.format(torchaudio.info(prompt_wav).sample_rate, prompt_sr))
|
103 |
-
|
104 |
# sft mode only use sft_dropdown
|
105 |
if mode_checkbox_group in ['预训练音色']:
|
106 |
if instruct_text != '' or prompt_wav is not None or prompt_text != '':
|
@@ -109,7 +110,7 @@ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, pro
|
|
109 |
if mode_checkbox_group in ['3s极速复刻']:
|
110 |
if prompt_text == '':
|
111 |
gr.Warning('prompt文本为空,您是否忘记输入prompt文本?')
|
112 |
-
|
113 |
if instruct_text != '':
|
114 |
gr.Info('您正在使用3s极速复刻模式,预训练音色/instruct文本会被忽略!')
|
115 |
|
@@ -117,24 +118,24 @@ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, pro
|
|
117 |
logging.info('get sft inference request')
|
118 |
set_all_random_seed(seed)
|
119 |
for i in get_cosyvoice().inference_sft(tts_text, sft_dropdown, stream=stream, speed=speed):
|
120 |
-
|
121 |
elif mode_checkbox_group == '3s极速复刻':
|
122 |
logging.info('get zero_shot inference request')
|
123 |
prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
|
124 |
set_all_random_seed(seed)
|
125 |
for i in get_cosyvoice().inference_zero_shot(tts_text, prompt_text, prompt_speech_16k, stream=stream, speed=speed):
|
126 |
-
|
127 |
elif mode_checkbox_group == '跨语种复刻':
|
128 |
logging.info('get cross_lingual inference request')
|
129 |
prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
|
130 |
set_all_random_seed(seed)
|
131 |
for i in get_cosyvoice().inference_cross_lingual(tts_text, prompt_speech_16k, stream=stream, speed=speed):
|
132 |
-
|
133 |
else:
|
134 |
logging.info('get instruct inference request')
|
135 |
set_all_random_seed(seed)
|
136 |
for i in get_cosyvoice().inference_instruct(tts_text, sft_dropdown, instruct_text, stream=stream, speed=speed):
|
137 |
-
|
138 |
|
139 |
# SDK模型下载
|
140 |
import platform
|
@@ -214,7 +215,7 @@ with gr.Blocks() as demo:
|
|
214 |
|
215 |
generate_button = gr.Button("生成音频")
|
216 |
|
217 |
-
audio_output = gr.Audio(label="合成音频", autoplay=True, streaming=
|
218 |
|
219 |
seed_button.click(generate_seed, inputs=[], outputs=seed)
|
220 |
generate_button.click(generate_audio,
|
|
|
66 |
@spaces.GPU
|
67 |
def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, prompt_wav_upload, prompt_wav_record, instruct_text,
|
68 |
seed, stream, speed):
|
69 |
+
stream=False
|
70 |
if prompt_wav_upload is not None:
|
71 |
prompt_wav = prompt_wav_upload
|
72 |
elif prompt_wav_record is not None:
|
|
|
77 |
if mode_checkbox_group in ['自然语言控制']:
|
78 |
if get_cosyvoice().frontend.instruct is False:
|
79 |
gr.Warning('您正在使用自然语言控制模式, {}模型不支持此模式, 请使用iic/CosyVoice-300M-Instruct模型'.format(args.model_dir))
|
80 |
+
return (target_sr, default_data)
|
81 |
if instruct_text == '':
|
82 |
gr.Warning('您正在使用自然语言控制模式, 请输入instruct文本')
|
83 |
+
return (target_sr, default_data)
|
84 |
if prompt_wav is not None or prompt_text != '':
|
85 |
gr.Info('您正在使用自然语言控制模式, prompt音频/prompt文本会被忽略')
|
86 |
# if cross_lingual mode, please make sure that model is iic/CosyVoice-300M and tts_text prompt_text are different language
|
87 |
if mode_checkbox_group in ['跨语种复刻']:
|
88 |
if get_cosyvoice().frontend.instruct is True:
|
89 |
gr.Warning('您正在使用跨语种复刻模式, {}模型不支持此模式, 请使用iic/CosyVoice-300M模型'.format(args.model_dir))
|
90 |
+
return (target_sr, default_data)
|
91 |
if instruct_text != '':
|
92 |
gr.Info('您正在使用跨语种复刻模式, instruct文本会被忽略')
|
93 |
if prompt_wav is None:
|
94 |
gr.Warning('您正在使用跨语种复刻模式, 请提供prompt音频')
|
95 |
+
return (target_sr, default_data)
|
96 |
gr.Info('您正在使用跨语种复刻模式, 请确保合成文本和prompt文本为不同语言')
|
97 |
# if in zero_shot cross_lingual, please make sure that prompt_text and prompt_wav meets requirements
|
98 |
if mode_checkbox_group in ['3s极速复刻', '跨语种复刻']:
|
99 |
if prompt_wav is None:
|
100 |
gr.Warning('prompt音频为空,您是否忘记输入prompt音频?')
|
101 |
+
return (target_sr, default_data)
|
102 |
if torchaudio.info(prompt_wav).sample_rate < prompt_sr:
|
103 |
gr.Warning('prompt音频采样率{}低于{}'.format(torchaudio.info(prompt_wav).sample_rate, prompt_sr))
|
104 |
+
return (target_sr, default_data)
|
105 |
# sft mode only use sft_dropdown
|
106 |
if mode_checkbox_group in ['预训练音色']:
|
107 |
if instruct_text != '' or prompt_wav is not None or prompt_text != '':
|
|
|
110 |
if mode_checkbox_group in ['3s极速复刻']:
|
111 |
if prompt_text == '':
|
112 |
gr.Warning('prompt文本为空,您是否忘记输入prompt文本?')
|
113 |
+
return (target_sr, default_data)
|
114 |
if instruct_text != '':
|
115 |
gr.Info('您正在使用3s极速复刻模式,预训练音色/instruct文本会被忽略!')
|
116 |
|
|
|
118 |
logging.info('get sft inference request')
|
119 |
set_all_random_seed(seed)
|
120 |
for i in get_cosyvoice().inference_sft(tts_text, sft_dropdown, stream=stream, speed=speed):
|
121 |
+
return (target_sr, i['tts_speech'].numpy().flatten())
|
122 |
elif mode_checkbox_group == '3s极速复刻':
|
123 |
logging.info('get zero_shot inference request')
|
124 |
prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
|
125 |
set_all_random_seed(seed)
|
126 |
for i in get_cosyvoice().inference_zero_shot(tts_text, prompt_text, prompt_speech_16k, stream=stream, speed=speed):
|
127 |
+
return (target_sr, i['tts_speech'].numpy().flatten())
|
128 |
elif mode_checkbox_group == '跨语种复刻':
|
129 |
logging.info('get cross_lingual inference request')
|
130 |
prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
|
131 |
set_all_random_seed(seed)
|
132 |
for i in get_cosyvoice().inference_cross_lingual(tts_text, prompt_speech_16k, stream=stream, speed=speed):
|
133 |
+
return (target_sr, i['tts_speech'].numpy().flatten())
|
134 |
else:
|
135 |
logging.info('get instruct inference request')
|
136 |
set_all_random_seed(seed)
|
137 |
for i in get_cosyvoice().inference_instruct(tts_text, sft_dropdown, instruct_text, stream=stream, speed=speed):
|
138 |
+
return (target_sr, i['tts_speech'].numpy().flatten())
|
139 |
|
140 |
# SDK模型下载
|
141 |
import platform
|
|
|
215 |
|
216 |
generate_button = gr.Button("生成音频")
|
217 |
|
218 |
+
audio_output = gr.Audio(label="合成音频", autoplay=True, streaming=False)
|
219 |
|
220 |
seed_button.click(generate_seed, inputs=[], outputs=seed)
|
221 |
generate_button.click(generate_audio,
|