File size: 1,432 Bytes
5817424 7771452 e48d3ab 7771452 e48d3ab 7771452 e4337eb e48d3ab e4337eb d8982a8 e48d3ab 7771452 e48d3ab 7771452 e48d3ab d8920f1 e48d3ab 7771452 d8920f1 e48d3ab e4337eb f72f872 d8982a8 e48d3ab 7771452 e48d3ab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import torch
from transformers import pipeline
import numpy as np
import gradio as gr
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pipe = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs", device=device)
# Inference
def generate_audio(text):
output = pipe(text)
output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=False, label="Response Voice Player", show_label=True,
visible=True)
###############language = "english"
return output
css = """
#container{
margin: 0 auto;
max-width: 80rem;
}
#intro{
max-width: 100%;
text-align: center;
margin: 0 auto;
}
"""
# Gradio blocks demo
with gr.Blocks(css=css) as demo_blocks:
with gr.Row():
with gr.Column():
inp_text = gr.Textbox(label="Input Text", info="What sentence would you like to synthesise?")
btn = gr.Button("Generate Audio!")
#"Enter the text you would like to synthesise into speech. Amazing! One plus one is equal to two. \n The quick brown fox jumps over the lazy dog. \n 1. Mangoes \n 2. Fruits"
with gr.Column():
out_audio = gr.Audio(type="numpy", autoplay=True, label="Generated Audio - British Female Speaker", show_label=True, visible=True)
btn.click(generate_audio, [inp_text], out_audio)
demo_blocks.queue().launch() |