from transformers import pipeline
import gradio as gr

# Initialize the text-to-speech pipeline with a model from Hugging Face's Model Hub
model_name = "kakao-enterprise/vits-ljs"
text_to_speech_pipeline = pipeline("text-to-speech", model=model_name)

def generate_speech(text):
    # Generate speech from the input text
    out = text_to_speech_pipeline(text)
    # The output is a list of tensors, convert to numpy array
    audio_data = out[0]["array"]
    return audio_data, 22050  # Return audio data and sampling rate

# Create the Gradio interface
interface = gr.Interface(fn=generate_speech,
                         inputs=gr.Textbox(lines=2, placeholder="Type something here..."),
                         outputs=gr.Audio(type="numpy", label="Generated Speech"),
                         title="Text-to-Speech with Hugging Face",
                         description="Enter text to generate speech using a model from Hugging Face's Model Hub.")

# Launch the app
interface.launch()