Spaces:
Running
on
Zero
Running
on
Zero
no message
Browse files
webui.py
CHANGED
@@ -72,7 +72,7 @@ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, pro
|
|
72 |
prompt_wav = None
|
73 |
# if instruct mode, please make sure that model is iic/CosyVoice-300M-Instruct and not cross_lingual mode
|
74 |
if mode_checkbox_group in ['自然语言控制']:
|
75 |
-
if
|
76 |
gr.Warning('您正在使用自然语言控制模式, {}模型不支持此模式, 请使用iic/CosyVoice-300M-Instruct模型'.format(args.model_dir))
|
77 |
yield (target_sr, default_data)
|
78 |
if instruct_text == '':
|
@@ -82,7 +82,7 @@ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, pro
|
|
82 |
gr.Info('您正在使用自然语言控制模式, prompt音频/prompt文本会被忽略')
|
83 |
# if cross_lingual mode, please make sure that model is iic/CosyVoice-300M and tts_text prompt_text are different language
|
84 |
if mode_checkbox_group in ['跨语种复刻']:
|
85 |
-
if
|
86 |
gr.Warning('您正在使用跨语种复刻模式, {}模型不支持此模式, 请使用iic/CosyVoice-300M模型'.format(args.model_dir))
|
87 |
yield (target_sr, default_data)
|
88 |
if instruct_text != '':
|
@@ -114,24 +114,24 @@ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, pro
|
|
114 |
if mode_checkbox_group == '预训练音色':
|
115 |
logging.info('get sft inference request')
|
116 |
set_all_random_seed(seed)
|
117 |
-
for i in
|
118 |
yield (target_sr, i['tts_speech'].numpy().flatten())
|
119 |
elif mode_checkbox_group == '3s极速复刻':
|
120 |
logging.info('get zero_shot inference request')
|
121 |
prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
|
122 |
set_all_random_seed(seed)
|
123 |
-
for i in
|
124 |
yield (target_sr, i['tts_speech'].numpy().flatten())
|
125 |
elif mode_checkbox_group == '跨语种复刻':
|
126 |
logging.info('get cross_lingual inference request')
|
127 |
prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
|
128 |
set_all_random_seed(seed)
|
129 |
-
for i in
|
130 |
yield (target_sr, i['tts_speech'].numpy().flatten())
|
131 |
else:
|
132 |
logging.info('get instruct inference request')
|
133 |
set_all_random_seed(seed)
|
134 |
-
for i in
|
135 |
yield (target_sr, i['tts_speech'].numpy().flatten())
|
136 |
|
137 |
# SDK模型下载
|
@@ -157,9 +157,10 @@ parser.add_argument('--model_dir',
|
|
157 |
args = parser.parse_args()
|
158 |
|
159 |
cosyvoice_instance = None
|
160 |
-
|
161 |
@spaces.GPU
|
162 |
-
def
|
|
|
163 |
# 在这里加入你需要的处理逻辑
|
164 |
if cosyvoice_instance is not None:
|
165 |
return cosyvoice_instance
|
@@ -168,9 +169,8 @@ def create_cosyvoice(model_dir):
|
|
168 |
|
169 |
@spaces.GPU
|
170 |
def load_sft_options():
|
171 |
-
return
|
172 |
-
|
173 |
-
cosyvoice = create_cosyvoice(args.model_dir)
|
174 |
|
175 |
prompt_sr, target_sr = 16000, 22050
|
176 |
default_data = np.zeros(target_sr)
|
|
|
72 |
prompt_wav = None
|
73 |
# if instruct mode, please make sure that model is iic/CosyVoice-300M-Instruct and not cross_lingual mode
|
74 |
if mode_checkbox_group in ['自然语言控制']:
|
75 |
+
if get_cosyvoice().frontend.instruct is False:
|
76 |
gr.Warning('您正在使用自然语言控制模式, {}模型不支持此模式, 请使用iic/CosyVoice-300M-Instruct模型'.format(args.model_dir))
|
77 |
yield (target_sr, default_data)
|
78 |
if instruct_text == '':
|
|
|
82 |
gr.Info('您正在使用自然语言控制模式, prompt音频/prompt文本会被忽略')
|
83 |
# if cross_lingual mode, please make sure that model is iic/CosyVoice-300M and tts_text prompt_text are different language
|
84 |
if mode_checkbox_group in ['跨语种复刻']:
|
85 |
+
if get_cosyvoice().frontend.instruct is True:
|
86 |
gr.Warning('您正在使用跨语种复刻模式, {}模型不支持此模式, 请使用iic/CosyVoice-300M模型'.format(args.model_dir))
|
87 |
yield (target_sr, default_data)
|
88 |
if instruct_text != '':
|
|
|
114 |
if mode_checkbox_group == '预训练音色':
|
115 |
logging.info('get sft inference request')
|
116 |
set_all_random_seed(seed)
|
117 |
+
for i in get_cosyvoice().inference_sft(tts_text, sft_dropdown, stream=stream, speed=speed):
|
118 |
yield (target_sr, i['tts_speech'].numpy().flatten())
|
119 |
elif mode_checkbox_group == '3s极速复刻':
|
120 |
logging.info('get zero_shot inference request')
|
121 |
prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
|
122 |
set_all_random_seed(seed)
|
123 |
+
for i in get_cosyvoice().inference_zero_shot(tts_text, prompt_text, prompt_speech_16k, stream=stream, speed=speed):
|
124 |
yield (target_sr, i['tts_speech'].numpy().flatten())
|
125 |
elif mode_checkbox_group == '跨语种复刻':
|
126 |
logging.info('get cross_lingual inference request')
|
127 |
prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
|
128 |
set_all_random_seed(seed)
|
129 |
+
for i in get_cosyvoice().inference_cross_lingual(tts_text, prompt_speech_16k, stream=stream, speed=speed):
|
130 |
yield (target_sr, i['tts_speech'].numpy().flatten())
|
131 |
else:
|
132 |
logging.info('get instruct inference request')
|
133 |
set_all_random_seed(seed)
|
134 |
+
for i in get_cosyvoice().inference_instruct(tts_text, sft_dropdown, instruct_text, stream=stream, speed=speed):
|
135 |
yield (target_sr, i['tts_speech'].numpy().flatten())
|
136 |
|
137 |
# SDK模型下载
|
|
|
157 |
args = parser.parse_args()
|
158 |
|
159 |
cosyvoice_instance = None
|
160 |
+
model_dir=args.model_dir
|
161 |
@spaces.GPU
|
162 |
+
def get_cosyvoice():
|
163 |
+
global cosyvoice_instance, model_dir
|
164 |
# 在这里加入你需要的处理逻辑
|
165 |
if cosyvoice_instance is not None:
|
166 |
return cosyvoice_instance
|
|
|
169 |
|
170 |
@spaces.GPU
|
171 |
def load_sft_options():
|
172 |
+
return get_cosyvoice().list_avaliable_spks()
|
173 |
+
|
|
|
174 |
|
175 |
prompt_sr, target_sr = 16000, 22050
|
176 |
default_data = np.zeros(target_sr)
|