Spaces:

VanYsa
/

MyAlexa

Paused

File size: 1,432 Bytes

5817424
7771452
e48d3ab
7771452
e48d3ab
 
7771452
 
 
e4337eb
e48d3ab
 
 
 
e4337eb
d8982a8
e48d3ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7771452
e48d3ab
 
7771452
e48d3ab
 
d8920f1
e48d3ab
7771452
d8920f1
e48d3ab
e4337eb
f72f872
d8982a8
e48d3ab
7771452
e48d3ab

import torch

from transformers import pipeline

import numpy as np
import gradio as gr

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

pipe = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs", device=device)

# Inference
def generate_audio(text):

    output = pipe(text)
    output =  gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=False, label="Response Voice Player", show_label=True,
                               visible=True)
    
    ###############language = "english"
    return output

css = """
#container{
    margin: 0 auto;
    max-width: 80rem;
}
#intro{
    max-width: 100%;
    text-align: center;
    margin: 0 auto;
}
"""

# Gradio blocks demo    
with gr.Blocks(css=css) as demo_blocks:

    with gr.Row():
        with gr.Column():
            inp_text = gr.Textbox(label="Input Text", info="What sentence would you like to synthesise?") 
            btn = gr.Button("Generate Audio!")

        #"Enter the text you would like to synthesise into speech. Amazing! One plus one is equal to two. \n The quick brown fox jumps over the lazy dog. \n 1. Mangoes \n 2. Fruits" 
        with gr.Column():
            out_audio = gr.Audio(type="numpy", autoplay=True, label="Generated Audio - British Female Speaker", show_label=True, visible=True)

    btn.click(generate_audio, [inp_text], out_audio)
    

demo_blocks.queue().launch()