Spaces:

krishna195
/

IITR_Text_to_audio

Sleeping

App Files Files Community

krishna195 commited on Oct 24, 2024

Commit

10bf035

verified ·

1 Parent(s): 45b014a

Create app.py

Browse files

Files changed (1) hide show

app.py +47 -0

app.py ADDED Viewed

	@@ -0,0 +1,47 @@

+# Install necessary libraries (if not installed)
+# !pip install gradio transformers soundfile torch
+import gradio as gr
+import torch
+import soundfile as sf
+from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor, SpeechT5HifiGan
+# Load the pre-trained model, vocoder, and processor
+model = SpeechT5ForTextToSpeech.from_pretrained("krishna195/speecht5_krishna_finatuned")
+vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
+processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
+# Speaker embeddings for speech generation (replace this with actual embeddings if needed)
+speaker_embeddings = torch.randn(1, 512)  # Example speaker embedding size (dummy embeddings)
+# Function to generate speech from input text
+def text_to_speech(input_text):
+    # Process the input text
+    inputs = processor(text=input_text, return_tensors="pt")
+    # Generate speech using the model and vocoder
+    speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
+    # Save the audio to a file (temporary storage)
+    output_file = "generated_speech.wav"
+    sf.write(output_file, speech.numpy(), 16000)
+    # Return the path to the audio file for Gradio to play it
+    return output_file
+# Create Gradio UI
+iface = gr.Interface(
+    fn=text_to_speech,
+    inputs="text",
+    outputs="audio",
+    title="Text to Speech Generator",
+    description="Enter the text you want to convert to speech, and the model will generate the corresponding speech.",
+    examples=[
+        ["Hello, how are you doing today?"],
+        ["The CUDA programming model allows parallel computing on GPUs."],
+        ["TensorFlow and PyTorch are popular machine learning frameworks."]
+    ]
+)
+# Launch the Gradio interface
+iface.launch()