Spaces:
Running
on
Zero
Running
on
Zero
no message
Browse files
webui.py
CHANGED
@@ -132,47 +132,6 @@ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, pro
|
|
132 |
for i in cosyvoice.inference_instruct(tts_text, sft_dropdown, instruct_text, stream=stream, speed=speed):
|
133 |
yield (target_sr, i['tts_speech'].numpy().flatten())
|
134 |
|
135 |
-
|
136 |
-
def main():
|
137 |
-
with gr.Blocks() as demo:
|
138 |
-
gr.Markdown("### 代码库 [CosyVoice](https://github.com/FunAudioLLM/CosyVoice) \
|
139 |
-
预训练模型 [CosyVoice-300M](https://www.modelscope.cn/models/iic/CosyVoice-300M) \
|
140 |
-
[CosyVoice-300M-Instruct](https://www.modelscope.cn/models/iic/CosyVoice-300M-Instruct) \
|
141 |
-
[CosyVoice-300M-SFT](https://www.modelscope.cn/models/iic/CosyVoice-300M-SFT)")
|
142 |
-
gr.Markdown("#### 请输入需要合成的文本,选择推理模式,并按照提示步骤进行操作")
|
143 |
-
|
144 |
-
tts_text = gr.Textbox(label="输入合成文本", lines=1, value="我是通义实验室语音团队全新推出的生成式语音大模型,提供舒适自然的语音合成能力。")
|
145 |
-
with gr.Row():
|
146 |
-
mode_checkbox_group = gr.Radio(choices=inference_mode_list, label='选择推理模式', value=inference_mode_list[0])
|
147 |
-
instruction_text = gr.Text(label="操作步骤", value=instruct_dict[inference_mode_list[0]], scale=0.5)
|
148 |
-
sft_dropdown = gr.Dropdown(choices=sft_spk, label='选择预训练音色', value=sft_spk[0], scale=0.25)
|
149 |
-
stream = gr.Radio(choices=stream_mode_list, label='是否流式推理', value=stream_mode_list[0][1])
|
150 |
-
speed = gr.Number(value=1, label="速度调节(仅支持非流式推理)", minimum=0.5, maximum=2.0, step=0.1)
|
151 |
-
with gr.Column(scale=0.25):
|
152 |
-
seed_button = gr.Button(value="\U0001F3B2")
|
153 |
-
seed = gr.Number(value=0, label="随机推理种子")
|
154 |
-
|
155 |
-
with gr.Row():
|
156 |
-
prompt_wav_upload = gr.Audio(sources='upload', type='filepath', label='选择prompt音频文件,注意采样率不低于16khz')
|
157 |
-
prompt_wav_record = gr.Audio(sources='microphone', type='filepath', label='录制prompt音频文件')
|
158 |
-
prompt_text = gr.Textbox(label="输入prompt文本", lines=1, placeholder="请输入prompt文本,需与prompt音频内容一致,暂时不支持自动识别...", value='')
|
159 |
-
instruct_text = gr.Textbox(label="输入instruct文本", lines=1, placeholder="请输入instruct文本.", value='')
|
160 |
-
|
161 |
-
generate_button = gr.Button("生成音频")
|
162 |
-
|
163 |
-
audio_output = gr.Audio(label="合成音频", autoplay=True, streaming=True)
|
164 |
-
|
165 |
-
seed_button.click(generate_seed, inputs=[], outputs=seed)
|
166 |
-
generate_button.click(generate_audio,
|
167 |
-
inputs=[tts_text, mode_checkbox_group, sft_dropdown, prompt_text, prompt_wav_upload, prompt_wav_record, instruct_text,
|
168 |
-
seed, stream, speed],
|
169 |
-
outputs=[audio_output])
|
170 |
-
mode_checkbox_group.change(fn=change_instruction, inputs=[mode_checkbox_group], outputs=[instruction_text])
|
171 |
-
demo.queue(max_size=4, default_concurrency_limit=2)
|
172 |
-
demo.launch(server_name='0.0.0.0', server_port=args.port)
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
# SDK模型下载
|
177 |
from modelscope import snapshot_download
|
178 |
snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
|
@@ -192,4 +151,43 @@ cosyvoice = CosyVoice(args.model_dir)
|
|
192 |
sft_spk = cosyvoice.list_avaliable_spks()
|
193 |
prompt_sr, target_sr = 16000, 22050
|
194 |
default_data = np.zeros(target_sr)
|
195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
for i in cosyvoice.inference_instruct(tts_text, sft_dropdown, instruct_text, stream=stream, speed=speed):
|
133 |
yield (target_sr, i['tts_speech'].numpy().flatten())
|
134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
# SDK模型下载
|
136 |
from modelscope import snapshot_download
|
137 |
snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
|
|
|
151 |
sft_spk = cosyvoice.list_avaliable_spks()
|
152 |
prompt_sr, target_sr = 16000, 22050
|
153 |
default_data = np.zeros(target_sr)
|
154 |
+
|
155 |
+
with gr.Blocks() as demo:
|
156 |
+
gr.Markdown("### 代码库 [CosyVoice](https://github.com/FunAudioLLM/CosyVoice) \
|
157 |
+
预训练模型 [CosyVoice-300M](https://www.modelscope.cn/models/iic/CosyVoice-300M) \
|
158 |
+
[CosyVoice-300M-Instruct](https://www.modelscope.cn/models/iic/CosyVoice-300M-Instruct) \
|
159 |
+
[CosyVoice-300M-SFT](https://www.modelscope.cn/models/iic/CosyVoice-300M-SFT)")
|
160 |
+
gr.Markdown("#### 请输入需要合成的文本,选择推理模式,并按照提示步骤进行操作")
|
161 |
+
|
162 |
+
tts_text = gr.Textbox(label="输入合成文本", lines=1, value="我是通义实验室语音团队全新推出的生成式语音大模型,提供舒适自然的语音合成能力。")
|
163 |
+
with gr.Row():
|
164 |
+
mode_checkbox_group = gr.Radio(choices=inference_mode_list, label='选择推理模式', value=inference_mode_list[0])
|
165 |
+
instruction_text = gr.Text(label="操作步骤", value=instruct_dict[inference_mode_list[0]], scale=0.5)
|
166 |
+
sft_dropdown = gr.Dropdown(choices=sft_spk, label='选择预训练音色', value=sft_spk[0], scale=0.25)
|
167 |
+
stream = gr.Radio(choices=stream_mode_list, label='是否流式推理', value=stream_mode_list[0][1])
|
168 |
+
speed = gr.Number(value=1, label="速度调节(仅支持非流式推理)", minimum=0.5, maximum=2.0, step=0.1)
|
169 |
+
with gr.Column(scale=0.25):
|
170 |
+
seed_button = gr.Button(value="\U0001F3B2")
|
171 |
+
seed = gr.Number(value=0, label="随机推理种子")
|
172 |
+
|
173 |
+
with gr.Row():
|
174 |
+
prompt_wav_upload = gr.Audio(sources='upload', type='filepath', label='选择prompt音频文件,注意采样率不低于16khz')
|
175 |
+
prompt_wav_record = gr.Audio(sources='microphone', type='filepath', label='录制prompt音频文件')
|
176 |
+
prompt_text = gr.Textbox(label="输入prompt文本", lines=1, placeholder="请输入prompt文本,需与prompt音频内容一致,暂时不支持自动识别...", value='')
|
177 |
+
instruct_text = gr.Textbox(label="输入instruct文本", lines=1, placeholder="请输入instruct文本.", value='')
|
178 |
+
|
179 |
+
generate_button = gr.Button("生成音频")
|
180 |
+
|
181 |
+
audio_output = gr.Audio(label="合成音频", autoplay=True, streaming=True)
|
182 |
+
|
183 |
+
seed_button.click(generate_seed, inputs=[], outputs=seed)
|
184 |
+
generate_button.click(generate_audio,
|
185 |
+
inputs=[tts_text, mode_checkbox_group, sft_dropdown, prompt_text, prompt_wav_upload, prompt_wav_record, instruct_text,
|
186 |
+
seed, stream, speed],
|
187 |
+
outputs=[audio_output])
|
188 |
+
mode_checkbox_group.change(fn=change_instruction, inputs=[mode_checkbox_group], outputs=[instruction_text])
|
189 |
+
demo.queue(max_size=4, default_concurrency_limit=2)
|
190 |
+
demo.launch(server_name='0.0.0.0', server_port=args.port)
|
191 |
+
|
192 |
+
|
193 |
+
|