Spaces:
Runtime error
Runtime error
File size: 3,757 Bytes
f654d12 12bfd03 f654d12 12bfd03 73c1b13 12bfd03 73c1b13 12bfd03 73c1b13 12bfd03 73c1b13 12bfd03 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import spaces
import random
import gradio as gr
from css.utils import *
# 定制语音生成
def custom():
def random_seed():
return random.randint(1, 100000000)
@spaces.GPU
def generate_audio(_recorded_audio, _prompt_input_textbox, _language_radio,
_synthetic_input_textbox, _seed):
import time
t1 = time.time()
print(_recorded_audio, _prompt_input_textbox, _language_radio, _synthetic_input_textbox, _seed)
if _synthetic_input_textbox == '':
gr.Warning('合成文本为空,您是否忘记输入合成文本?')
return (target_sr, default_data)
set_all_random_seed(_seed)
if use_instruct(_synthetic_input_textbox):
model = cosyvoice_instruct
else:
model = cosyvoice
prompt_speech_16k = postprocess(load_wav(_recorded_audio, prompt_sr))
t2 = time.time()
if _language_radio == 'cross' or _prompt_input_textbox == '':
output = model.inference_cross_lingual(_synthetic_input_textbox, prompt_speech_16k)
else:
output = model.inference_zero_shot(_synthetic_input_textbox, _prompt_input_textbox, prompt_speech_16k)
t3 = time.time()
audio_data = postprocess(output['tts_speech']).numpy().flatten()
t4 = time.time()
print(f'load and preprocess time: {t2-t1}s')
print(f'inference time: {t3-t2}s')
print(f'postprocess time: {t4-t3}s')
return (target_sr, audio_data)
with gr.Column():
with gr.Row():
with gr.Column(scale=1, min_width=400):
with gr.Group():
recorded_audio = gr.Audio(sources=['microphone'],
label="录制音频文件",
type='filepath')
gr.Text("请点击录制,并朗读右方文字(中文或英文)完成录入",
max_lines=1,
container=False,
interactive=False)
with gr.Column(scale=10):
prompt_input_textbox = gr.Textbox(label="输入待录制文本")
gr.Examples(
label="示例待录制文本",
examples=example_prompt_text,
inputs=[prompt_input_textbox])
with gr.Column():
language_radio = gr.Radio(choices=[('同语种', 'same'), ('跨语种', 'cross')],
value='same',
label="输入合成文本")
synthetic_input_textbox = gr.Textbox(show_label=False)
gr.Examples(
label="示例文本",
examples=example_tts_text,
inputs=[synthetic_input_textbox])
with gr.Accordion(label="随机种子"):
with gr.Row():
with gr.Column(scale=1, min_width=180):
seed_button = gr.Button(value="\U0001F3B2 随机换一换",
elem_classes="full-height")
with gr.Column(scale=10):
seed = gr.Number(show_label=False,
value=0,
container=False,
elem_classes="full-height")
with gr.Column():
generate_button = gr.Button("生成音频", variant="primary", size="lg")
with gr.Column():
output_audio = gr.Audio(label="合成音频")
seed_button.click(fn=random_seed, outputs=[seed])
generate_button.click(
fn=generate_audio,
inputs=[recorded_audio, prompt_input_textbox, language_radio, synthetic_input_textbox, seed],
outputs=[output_audio])
|