Spaces:

krishna195
/

IITR_Text_to_audio

Sleeping

App Files Files Community

krishna195 commited on Oct 24, 2024

Commit

66fe0e6

verified ·

1 Parent(s): 9cf9f2f

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -16

app.py CHANGED Viewed

@@ -1,20 +1,20 @@
-# Install necessary libraries (if not installed)
-# !pip install gradio transformers soundfile torch
-import gradio as gr
 import torch
 import soundfile as sf
 from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor, SpeechT5HifiGan
-# Load the pre-trained model, vocoder, and processor
 model = SpeechT5ForTextToSpeech.from_pretrained("krishna195/speecht5_krishna_finatuned")
 vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
 processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
-# Dummy speaker embeddings for speech generation (replace with actual embeddings if needed)
-speaker_embeddings = torch.randn(1, 512)  # Example speaker embedding size
-# Function to generate speech from input text
 def text_to_speech(input_text):
     # Process the input text
     inputs = processor(text=input_text, return_tensors="pt")
@@ -22,23 +22,26 @@ def text_to_speech(input_text):
     # Generate speech using the model and vocoder
     speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
-    # Save the audio to a file (temporary storage)
     output_file = "generated_speech.wav"
     sf.write(output_file, speech.numpy(), 16000)
-    # Return the path to the audio file for Gradio to play it
     return output_file
-# Create Gradio UI
 iface = gr.Interface(
     fn=text_to_speech,
-    inputs=gr.Textbox(label="Enter Text"),
     outputs="audio",
-    title="Text to Speech Generator",
-    description="Enter the text you want to convert to speech, and the model will generate the corresponding audio.",
-    layout="vertical",  # Use vertical layout
-    theme="default"  # Use default theme
 )
-# Launch the Gradio interface
 iface.launch()

+# Install necessary libraries (if not already installed)
+!pip install gradio transformers soundfile torch
 import torch
 import soundfile as sf
+import gradio as gr
 from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor, SpeechT5HifiGan
+# Load your fine-tuned model, processor, and vocoder
 model = SpeechT5ForTextToSpeech.from_pretrained("krishna195/speecht5_krishna_finatuned")
 vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
 processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
+# Use pre-defined speaker embeddings (you can replace this with your actual embeddings)
+speaker_embeddings = torch.randn(1, 512)  # Example embedding size, adjust to your speaker embeddings
+# Function to generate speech from text
 def text_to_speech(input_text):
     # Process the input text
     inputs = processor(text=input_text, return_tensors="pt")
     # Generate speech using the model and vocoder
     speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
+    # Save the generated speech to a temporary file
     output_file = "generated_speech.wav"
     sf.write(output_file, speech.numpy(), 16000)
+    # Return the path to the audio file for Gradio to play
     return output_file
+# Create the Gradio UI interface
 iface = gr.Interface(
     fn=text_to_speech,
+    inputs="text",
     outputs="audio",
+    title="Text to Speech Converter",
+    description="Enter text and convert it into speech using a fine-tuned SpeechT5 model.",
+    examples=[
+        ["Hello, how are you doing today?"],
+        ["Speech synthesis is amazing with deep learning models."],
+        ["TensorFlow and PyTorch are powerful machine learning frameworks."]
+    ]
 )
+# Launch the Gradio app
 iface.launch()