import torch from transformers import pipeline import numpy as np import gradio as gr device = torch.device("cuda" if torch.cuda.is_available() else "cpu") pipe = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs", device=device) # Inference def generate_audio(text): output = pipe(text) output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=False, label="Response Voice Player", show_label=True, visible=True) ###############language = "english" return output css = """ #container{ margin: 0 auto; max-width: 80rem; } #intro{ max-width: 100%; text-align: center; margin: 0 auto; } """ # Gradio blocks demo with gr.Blocks(css=css) as demo_blocks: with gr.Row(): with gr.Column(): inp_text = gr.Textbox(label="Input Text", info="What sentence would you like to synthesise?") btn = gr.Button("Generate Audio!") #"Enter the text you would like to synthesise into speech. Amazing! One plus one is equal to two. \n The quick brown fox jumps over the lazy dog. \n 1. Mangoes \n 2. Fruits" with gr.Column(): out_audio = gr.Audio(type="numpy", autoplay=True, label="Generated Audio - British Female Speaker", show_label=True, visible=True) btn.click(generate_audio, [inp_text], out_audio) demo_blocks.queue().launch()