krishna195 commited on
Commit
66fe0e6
·
verified ·
1 Parent(s): 9cf9f2f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -16
app.py CHANGED
@@ -1,20 +1,20 @@
1
- # Install necessary libraries (if not installed)
2
- # !pip install gradio transformers soundfile torch
3
 
4
- import gradio as gr
5
  import torch
6
  import soundfile as sf
 
7
  from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor, SpeechT5HifiGan
8
 
9
- # Load the pre-trained model, vocoder, and processor
10
  model = SpeechT5ForTextToSpeech.from_pretrained("krishna195/speecht5_krishna_finatuned")
11
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
12
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
13
 
14
- # Dummy speaker embeddings for speech generation (replace with actual embeddings if needed)
15
- speaker_embeddings = torch.randn(1, 512) # Example speaker embedding size
16
 
17
- # Function to generate speech from input text
18
  def text_to_speech(input_text):
19
  # Process the input text
20
  inputs = processor(text=input_text, return_tensors="pt")
@@ -22,23 +22,26 @@ def text_to_speech(input_text):
22
  # Generate speech using the model and vocoder
23
  speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
24
 
25
- # Save the audio to a file (temporary storage)
26
  output_file = "generated_speech.wav"
27
  sf.write(output_file, speech.numpy(), 16000)
28
 
29
- # Return the path to the audio file for Gradio to play it
30
  return output_file
31
 
32
- # Create Gradio UI
33
  iface = gr.Interface(
34
  fn=text_to_speech,
35
- inputs=gr.Textbox(label="Enter Text"),
36
  outputs="audio",
37
- title="Text to Speech Generator",
38
- description="Enter the text you want to convert to speech, and the model will generate the corresponding audio.",
39
- layout="vertical", # Use vertical layout
40
- theme="default" # Use default theme
 
 
 
41
  )
42
 
43
- # Launch the Gradio interface
44
  iface.launch()
 
1
+ # Install necessary libraries (if not already installed)
2
+ !pip install gradio transformers soundfile torch
3
 
 
4
  import torch
5
  import soundfile as sf
6
+ import gradio as gr
7
  from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor, SpeechT5HifiGan
8
 
9
+ # Load your fine-tuned model, processor, and vocoder
10
  model = SpeechT5ForTextToSpeech.from_pretrained("krishna195/speecht5_krishna_finatuned")
11
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
12
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
13
 
14
+ # Use pre-defined speaker embeddings (you can replace this with your actual embeddings)
15
+ speaker_embeddings = torch.randn(1, 512) # Example embedding size, adjust to your speaker embeddings
16
 
17
+ # Function to generate speech from text
18
  def text_to_speech(input_text):
19
  # Process the input text
20
  inputs = processor(text=input_text, return_tensors="pt")
 
22
  # Generate speech using the model and vocoder
23
  speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
24
 
25
+ # Save the generated speech to a temporary file
26
  output_file = "generated_speech.wav"
27
  sf.write(output_file, speech.numpy(), 16000)
28
 
29
+ # Return the path to the audio file for Gradio to play
30
  return output_file
31
 
32
+ # Create the Gradio UI interface
33
  iface = gr.Interface(
34
  fn=text_to_speech,
35
+ inputs="text",
36
  outputs="audio",
37
+ title="Text to Speech Converter",
38
+ description="Enter text and convert it into speech using a fine-tuned SpeechT5 model.",
39
+ examples=[
40
+ ["Hello, how are you doing today?"],
41
+ ["Speech synthesis is amazing with deep learning models."],
42
+ ["TensorFlow and PyTorch are powerful machine learning frameworks."]
43
+ ]
44
  )
45
 
46
+ # Launch the Gradio app
47
  iface.launch()