pm6six commited on
Commit
4938f65
·
verified ·
1 Parent(s): 8990976

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -88
app.py CHANGED
@@ -1,89 +1,42 @@
1
- import streamlit as st
2
- from gtts import gTTS
3
- import os
4
- import base64
5
- import PyPDF2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- # Streamlit app UI
8
- st.title("Text-to-Audio App")
9
- st.text("This app converts your text input or PDF content into audio using TTS.")
10
-
11
- # User input
12
- text_input = st.text_area("Enter some text:")
13
-
14
- # PDF file upload
15
- uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
16
- if uploaded_file is not None:
17
- try:
18
- # Read PDF file
19
- pdf_reader = PyPDF2.PdfReader(uploaded_file)
20
- extracted_text = ""
21
- for page in pdf_reader.pages:
22
- extracted_text += page.extract_text()
23
-
24
- if extracted_text.strip():
25
- text_input = extracted_text
26
- st.success("Text extracted from the uploaded PDF!")
27
- st.text_area("Extracted Text:", text_input, height=200)
28
- else:
29
- st.error("No extractable text found in the uploaded PDF.")
30
- except Exception as e:
31
- st.error(f"An error occurred while reading the PDF: {e}")
32
-
33
- if st.button("Generate Audio"):
34
- if not text_input.strip():
35
- st.error("Please enter some text or upload a PDF with extractable text!")
36
- else:
37
- try:
38
- # Generate speech using gTTS
39
- tts = gTTS(text=text_input, lang="en")
40
- audio_file = "output.wav"
41
- tts.save(audio_file)
42
-
43
- # Check if file exists
44
- if os.path.exists(audio_file):
45
- # Encode audio file to base64
46
- with open(audio_file, "rb") as f:
47
- audio_data = f.read()
48
- audio_base64 = base64.b64encode(audio_data).decode()
49
-
50
- # Embed custom HTML audio player with speed adjustment
51
- audio_html = f"""
52
- <audio id="audio" controls style="width: 100%; margin-top: 10px;">
53
- <source src="data:audio/wav;base64,{audio_base64}" type="audio/wav">
54
- Your browser does not support the audio element.
55
- </audio>
56
- <div style="margin-top: 10px;">
57
- <label for="speed" style="font-weight: bold;">Playback Speed:</label>
58
- <input type="range" id="speed" min="0.5" max="2.0" value="1.0" step="0.1" style="width: 50%; margin-left: 10px;">
59
- <span id="speed-value">1.0x</span>
60
- </div>
61
- <script>
62
- const audio = document.getElementById("audio");
63
- const speedSlider = document.getElementById("speed");
64
- const speedValue = document.getElementById("speed-value");
65
-
66
- // Update playback speed dynamically
67
- speedSlider.addEventListener("input", () => {{
68
- const speed = parseFloat(speedSlider.value);
69
- audio.playbackRate = speed;
70
- speedValue.textContent = speed.toFixed(1) + "x";
71
- }});
72
- </script>
73
- """
74
- st.components.v1.html(audio_html, height=200)
75
-
76
- st.success("Audio generated successfully!")
77
-
78
- # Provide download option
79
- with open(audio_file, "rb") as f:
80
- st.download_button(
81
- label="Download Audio",
82
- data=f.read(),
83
- file_name="output.wav",
84
- mime="audio/wav",
85
- )
86
- else:
87
- st.error("Audio file could not be generated.")
88
- except Exception as e:
89
- st.error(f"An error occurred: {e}")
 
1
+ import gradio as gr
2
+ import torchaudio
3
+ from transformers import pipeline
4
+
5
+ # Load a voice cloning or TTS model
6
+ # Here we use a placeholder for a voice cloning model like Tortoise-TTS
7
+ # You can replace this with your preferred library
8
+ def voice_cloning(input_audio, song_text, musician_style):
9
+ # Load the input audio (your voice)
10
+ waveform, sample_rate = torchaudio.load(input_audio)
11
+
12
+ # Process the waveform to extract voice features (using Tortoise-TTS or similar)
13
+ # This is a placeholder - you'll need to use a real voice cloning pipeline here
14
+ cloned_voice = f"Processed your voice for song '{song_text}' in the style of {musician_style}"
15
+
16
+ # Synthesize the song text using your cloned voice
17
+ # Combine with the musical style of the selected musician
18
+ synthesized_song = f"Singing '{song_text}' with your voice in the style of {musician_style}."
19
+
20
+ return synthesized_song
21
+
22
+ # Create a Gradio interface
23
+ with gr.Blocks() as demo:
24
+ gr.Markdown("### Voice Cloning & Singing in a Musician's Style")
25
+
26
+ with gr.Row():
27
+ input_audio = gr.Audio(label="Upload Your Voice", type="filepath")
28
+ song_text = gr.Textbox(label="Enter Song Lyrics", placeholder="Enter the song lyrics here...")
29
+ musician_style = gr.Textbox(label="Enter Musician's Style", placeholder="e.g., Adele, Ed Sheeran, etc.")
30
+
31
+ output = gr.Textbox(label="Synthesized Song")
32
+
33
+ generate_button = gr.Button("Generate")
34
+ generate_button.click(
35
+ voice_cloning,
36
+ inputs=[input_audio, song_text, musician_style],
37
+ outputs=output
38
+ )
39
+
40
+ # Launch the app
41
+ demo.launch()
42