Spaces:
Build error
Build error
import streamlit as st | |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech | |
import torch | |
import soundfile as sf | |
from datasets import load_dataset | |
# Initialize the processor and model | |
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") | |
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts") | |
# Load the vocoder | |
vocoder = torch.hub.load("s3prl/s3prl", "mb_melgan") | |
# Initialize session state | |
if 'text' not in st.session_state: | |
st.session_state['text'] = "Hello, my dog is cooler than you!" | |
# Function to update session state | |
def update_text(): | |
st.session_state['text'] = st.text_area("Text", st.session_state['text']) | |
st.title("Text-to-Speech with SpeechT5") | |
st.write("Enter the text you want to convert to speech:") | |
# Use session state to store text | |
update_text() | |
if st.button("Generate Speech"): | |
st.write("Generating speech...") | |
# Process the input text | |
inputs = processor(text=st.session_state['text'], return_tensors="pt") | |
# Generate speech | |
speech = model.generate_speech(inputs["input_ids"], speaker_embeddings=None) | |
# Use the vocoder to convert the generated speech to audio | |
with torch.no_grad(): | |
audio = vocoder(speech) | |
# Save the audio to a file | |
sf.write("output.wav", audio.cpu().numpy(), samplerate=16000) | |
# Provide a download link for the generated speech | |
st.audio("output.wav", format="audio/wav") | |
st.write("Speech generation complete. You can listen to the generated speech above.") | |