import torch

from transformers import pipeline

import numpy as np
import gradio as gr

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

pipe = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs", device=device)

# Inference
def generate_audio(text):

    output = pipe(text)
    output =  gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=False, label="Response Voice Player", show_label=True,
                               visible=True)
    
    ###############language = "english"
    return output

css = """
#container{
    margin: 0 auto;
    max-width: 80rem;
}
#intro{
    max-width: 100%;
    text-align: center;
    margin: 0 auto;
}
"""

# Gradio blocks demo    
with gr.Blocks(css=css) as demo_blocks:

    with gr.Row():
        with gr.Column():
            inp_text = gr.Textbox(label="Input Text", info="What sentence would you like to synthesise?") 
            btn = gr.Button("Generate Audio!")

        #"Enter the text you would like to synthesise into speech. Amazing! One plus one is equal to two. \n The quick brown fox jumps over the lazy dog. \n 1. Mangoes \n 2. Fruits" 
        with gr.Column():
            out_audio = gr.Audio(type="numpy", autoplay=True, label="Generated Audio - British Female Speaker", show_label=True, visible=True)

    btn.click(generate_audio, [inp_text], out_audio)
    

demo_blocks.queue().launch()