Spaces:
Build error
Build error
Shabbir-Anjum
commited on
Commit
•
e7a21bd
1
Parent(s):
8908b04
Update app.py
Browse files
app.py
CHANGED
@@ -1,52 +1,46 @@
|
|
1 |
import streamlit as st
|
2 |
-
from transformers import
|
3 |
-
from datasets import load_dataset
|
4 |
import torch
|
5 |
import soundfile as sf
|
6 |
-
import
|
7 |
-
|
8 |
-
#
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
sf.write(output_path, audio, samplerate=samplerate)
|
49 |
-
st.audio(output_path)
|
50 |
-
|
51 |
-
if __name__ == "__main__":
|
52 |
-
main()
|
|
|
1 |
import streamlit as st
|
2 |
+
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
|
|
|
3 |
import torch
|
4 |
import soundfile as sf
|
5 |
+
from datasets import load_dataset
|
6 |
+
|
7 |
+
# Initialize the processor and model
|
8 |
+
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
|
9 |
+
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
|
10 |
+
|
11 |
+
# Load the vocoder
|
12 |
+
vocoder = torch.hub.load("s3prl/s3prl", "mb_melgan")
|
13 |
+
|
14 |
+
# Initialize session state
|
15 |
+
if 'text' not in st.session_state:
|
16 |
+
st.session_state['text'] = "Hello, my dog is cooler than you!"
|
17 |
+
|
18 |
+
# Function to update session state
|
19 |
+
def update_text():
|
20 |
+
st.session_state['text'] = st.text_area("Text", st.session_state['text'])
|
21 |
+
|
22 |
+
st.title("Text-to-Speech with SpeechT5")
|
23 |
+
st.write("Enter the text you want to convert to speech:")
|
24 |
+
|
25 |
+
# Use session state to store text
|
26 |
+
update_text()
|
27 |
+
|
28 |
+
if st.button("Generate Speech"):
|
29 |
+
st.write("Generating speech...")
|
30 |
+
|
31 |
+
# Process the input text
|
32 |
+
inputs = processor(text=st.session_state['text'], return_tensors="pt")
|
33 |
+
|
34 |
+
# Generate speech
|
35 |
+
speech = model.generate_speech(inputs["input_ids"], speaker_embeddings=None)
|
36 |
+
|
37 |
+
# Use the vocoder to convert the generated speech to audio
|
38 |
+
with torch.no_grad():
|
39 |
+
audio = vocoder(speech)
|
40 |
+
|
41 |
+
# Save the audio to a file
|
42 |
+
sf.write("output.wav", audio.cpu().numpy(), samplerate=16000)
|
43 |
+
|
44 |
+
# Provide a download link for the generated speech
|
45 |
+
st.audio("output.wav", format="audio/wav")
|
46 |
+
st.write("Speech generation complete. You can listen to the generated speech above.")
|
|
|
|
|
|
|
|
|
|