tanbw commited on
Commit
5b76a16
1 Parent(s): d0f730b

no message

Browse files
Files changed (1) hide show
  1. webui.py +40 -42
webui.py CHANGED
@@ -132,47 +132,6 @@ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, pro
132
  for i in cosyvoice.inference_instruct(tts_text, sft_dropdown, instruct_text, stream=stream, speed=speed):
133
  yield (target_sr, i['tts_speech'].numpy().flatten())
134
 
135
-
136
- def main():
137
- with gr.Blocks() as demo:
138
- gr.Markdown("### 代码库 [CosyVoice](https://github.com/FunAudioLLM/CosyVoice) \
139
- 预训练模型 [CosyVoice-300M](https://www.modelscope.cn/models/iic/CosyVoice-300M) \
140
- [CosyVoice-300M-Instruct](https://www.modelscope.cn/models/iic/CosyVoice-300M-Instruct) \
141
- [CosyVoice-300M-SFT](https://www.modelscope.cn/models/iic/CosyVoice-300M-SFT)")
142
- gr.Markdown("#### 请输入需要合成的文本,选择推理模式,并按照提示步骤进行操作")
143
-
144
- tts_text = gr.Textbox(label="输入合成文本", lines=1, value="我是通义实验室语音团队全新推出的生成式语音大模型,提供舒适自然的语音合成能力。")
145
- with gr.Row():
146
- mode_checkbox_group = gr.Radio(choices=inference_mode_list, label='选择推理模式', value=inference_mode_list[0])
147
- instruction_text = gr.Text(label="操作步骤", value=instruct_dict[inference_mode_list[0]], scale=0.5)
148
- sft_dropdown = gr.Dropdown(choices=sft_spk, label='选择预训练音色', value=sft_spk[0], scale=0.25)
149
- stream = gr.Radio(choices=stream_mode_list, label='是否流式推理', value=stream_mode_list[0][1])
150
- speed = gr.Number(value=1, label="速度调节(仅支持非流式推理)", minimum=0.5, maximum=2.0, step=0.1)
151
- with gr.Column(scale=0.25):
152
- seed_button = gr.Button(value="\U0001F3B2")
153
- seed = gr.Number(value=0, label="随机推理种子")
154
-
155
- with gr.Row():
156
- prompt_wav_upload = gr.Audio(sources='upload', type='filepath', label='选择prompt音频文件,注意采样率不低于16khz')
157
- prompt_wav_record = gr.Audio(sources='microphone', type='filepath', label='录制prompt音频文件')
158
- prompt_text = gr.Textbox(label="输入prompt文本", lines=1, placeholder="请输入prompt文本,需与prompt音频内容一致,暂时不支持自动识别...", value='')
159
- instruct_text = gr.Textbox(label="输入instruct文本", lines=1, placeholder="请输入instruct文本.", value='')
160
-
161
- generate_button = gr.Button("生成音频")
162
-
163
- audio_output = gr.Audio(label="合成音频", autoplay=True, streaming=True)
164
-
165
- seed_button.click(generate_seed, inputs=[], outputs=seed)
166
- generate_button.click(generate_audio,
167
- inputs=[tts_text, mode_checkbox_group, sft_dropdown, prompt_text, prompt_wav_upload, prompt_wav_record, instruct_text,
168
- seed, stream, speed],
169
- outputs=[audio_output])
170
- mode_checkbox_group.change(fn=change_instruction, inputs=[mode_checkbox_group], outputs=[instruction_text])
171
- demo.queue(max_size=4, default_concurrency_limit=2)
172
- demo.launch(server_name='0.0.0.0', server_port=args.port)
173
-
174
-
175
-
176
  # SDK模型下载
177
  from modelscope import snapshot_download
178
  snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
@@ -192,4 +151,43 @@ cosyvoice = CosyVoice(args.model_dir)
192
  sft_spk = cosyvoice.list_avaliable_spks()
193
  prompt_sr, target_sr = 16000, 22050
194
  default_data = np.zeros(target_sr)
195
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  for i in cosyvoice.inference_instruct(tts_text, sft_dropdown, instruct_text, stream=stream, speed=speed):
133
  yield (target_sr, i['tts_speech'].numpy().flatten())
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  # SDK模型下载
136
  from modelscope import snapshot_download
137
  snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
 
151
  sft_spk = cosyvoice.list_avaliable_spks()
152
  prompt_sr, target_sr = 16000, 22050
153
  default_data = np.zeros(target_sr)
154
+
155
+ with gr.Blocks() as demo:
156
+ gr.Markdown("### 代码库 [CosyVoice](https://github.com/FunAudioLLM/CosyVoice) \
157
+ 预训练模型 [CosyVoice-300M](https://www.modelscope.cn/models/iic/CosyVoice-300M) \
158
+ [CosyVoice-300M-Instruct](https://www.modelscope.cn/models/iic/CosyVoice-300M-Instruct) \
159
+ [CosyVoice-300M-SFT](https://www.modelscope.cn/models/iic/CosyVoice-300M-SFT)")
160
+ gr.Markdown("#### 请输入需要合成的文本,选择推理模式,并按照提示步骤进行操作")
161
+
162
+ tts_text = gr.Textbox(label="输入合成文本", lines=1, value="我是通义实验室语音团队全新推出的生成式语音大模型,提供舒适自然的语音合成能力。")
163
+ with gr.Row():
164
+ mode_checkbox_group = gr.Radio(choices=inference_mode_list, label='选择推理模式', value=inference_mode_list[0])
165
+ instruction_text = gr.Text(label="操作步骤", value=instruct_dict[inference_mode_list[0]], scale=0.5)
166
+ sft_dropdown = gr.Dropdown(choices=sft_spk, label='选择预训练音色', value=sft_spk[0], scale=0.25)
167
+ stream = gr.Radio(choices=stream_mode_list, label='是否流式推理', value=stream_mode_list[0][1])
168
+ speed = gr.Number(value=1, label="速度调节(仅支持非流式推理)", minimum=0.5, maximum=2.0, step=0.1)
169
+ with gr.Column(scale=0.25):
170
+ seed_button = gr.Button(value="\U0001F3B2")
171
+ seed = gr.Number(value=0, label="随机推理种子")
172
+
173
+ with gr.Row():
174
+ prompt_wav_upload = gr.Audio(sources='upload', type='filepath', label='选择prompt音频文件,注意采样率不低于16khz')
175
+ prompt_wav_record = gr.Audio(sources='microphone', type='filepath', label='录制prompt音频文件')
176
+ prompt_text = gr.Textbox(label="输入prompt文本", lines=1, placeholder="请输入prompt文本,需与prompt音频内容一致,暂时不支持自动识别...", value='')
177
+ instruct_text = gr.Textbox(label="输入instruct文本", lines=1, placeholder="请输入instruct文本.", value='')
178
+
179
+ generate_button = gr.Button("生成音频")
180
+
181
+ audio_output = gr.Audio(label="合成音频", autoplay=True, streaming=True)
182
+
183
+ seed_button.click(generate_seed, inputs=[], outputs=seed)
184
+ generate_button.click(generate_audio,
185
+ inputs=[tts_text, mode_checkbox_group, sft_dropdown, prompt_text, prompt_wav_upload, prompt_wav_record, instruct_text,
186
+ seed, stream, speed],
187
+ outputs=[audio_output])
188
+ mode_checkbox_group.change(fn=change_instruction, inputs=[mode_checkbox_group], outputs=[instruction_text])
189
+ demo.queue(max_size=4, default_concurrency_limit=2)
190
+ demo.launch(server_name='0.0.0.0', server_port=args.port)
191
+
192
+
193
+