Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
from datasets import load_dataset | |
import soundfile as sf | |
import torch | |
# Initialize the text-to-speech pipeline | |
synthesiser = pipeline("text-to-speech", "umarigan/speecht5_tts_tr_v1.0") | |
# Load the speaker embedding dataset | |
embeddings_dataset = load_dataset("umarigan/turkish_voice_dataset_embedded", split="train") | |
# Define the speech generation function | |
def generate_speech(text, speaker_id): | |
speaker_embedding = torch.tensor(embeddings_dataset[speaker_id]["speaker_embeddings"]).unsqueeze(0) | |
speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding}) | |
# Save the generated audio to a file | |
sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"]) | |
# Return the audio file path to Gradio | |
return "speech.wav" | |
# Define the Gradio interface | |
inputs = [ | |
gr.Textbox(label="Enter Text", placeholder="Bir berber bir berbere gel beraber bir berber kuralım demiş"), | |
gr.Number(label="Speaker ID", value=736, precision=0) | |
] | |
outputs = gr.Audio(label="Generated Speech") | |
gr.Interface(fn=generate_speech, inputs=inputs, outputs=outputs, title="Turkish Text-to-Speech").launch() | |