|
import torch |
|
|
|
from transformers import pipeline |
|
|
|
import numpy as np |
|
import gradio as gr |
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
pipe = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs", device=device) |
|
|
|
|
|
def generate_audio(text): |
|
|
|
output = pipe(text) |
|
output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=False, label="Response Voice Player", show_label=True, |
|
visible=True) |
|
|
|
|
|
return output |
|
|
|
css = """ |
|
#container{ |
|
margin: 0 auto; |
|
max-width: 80rem; |
|
} |
|
#intro{ |
|
max-width: 100%; |
|
text-align: center; |
|
margin: 0 auto; |
|
} |
|
""" |
|
|
|
|
|
with gr.Blocks(css=css) as demo_blocks: |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
inp_text = gr.Textbox(label="Input Text", info="What sentence would you like to synthesise?") |
|
btn = gr.Button("Generate Audio!") |
|
|
|
|
|
with gr.Column(): |
|
out_audio = gr.Audio(type="numpy", autoplay=True, label="Generated Audio - British Female Speaker", show_label=True, visible=True) |
|
|
|
btn.click(generate_audio, [inp_text], out_audio) |
|
|
|
|
|
demo_blocks.queue().launch() |