Spaces:
Sleeping
Sleeping
# Install necessary libraries (if not installed) | |
# !pip install gradio transformers soundfile torch | |
import gradio as gr | |
import torch | |
import soundfile as sf | |
from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor, SpeechT5HifiGan | |
# Load the pre-trained model, vocoder, and processor | |
model = SpeechT5ForTextToSpeech.from_pretrained("krishna195/speecht5_krishna_finatuned") | |
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") | |
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") | |
# Speaker embeddings for speech generation (replace this with actual embeddings if needed) | |
speaker_embeddings = torch.randn(1, 512) # Example speaker embedding size (dummy embeddings) | |
# Function to generate speech from input text | |
def text_to_speech(input_text): | |
# Process the input text | |
inputs = processor(text=input_text, return_tensors="pt") | |
# Generate speech using the model and vocoder | |
speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder) | |
# Save the audio to a file (temporary storage) | |
output_file = "generated_speech.wav" | |
sf.write(output_file, speech.numpy(), 16000) | |
# Return the path to the audio file for Gradio to play it | |
return output_file | |
# Create Gradio UI | |
iface = gr.Interface( | |
fn=text_to_speech, | |
inputs="text", | |
outputs="audio", | |
title="Text to Speech Generator", | |
description="Enter the text you want to convert to speech, and the model will generate the corresponding speech.", | |
examples=[ | |
["Hello, how are you doing today?"], | |
["The CUDA programming model allows parallel computing on GPUs."], | |
["TensorFlow and PyTorch are popular machine learning frameworks."] | |
] | |
) | |
# Launch the Gradio interface | |
iface.launch() | |