Spaces:

pm6six
/

testing

Running

App Files Files Community

pm6six commited on 2 days ago

Commit

4938f65

verified ·

1 Parent(s): 8990976

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -88

app.py CHANGED Viewed

@@ -1,89 +1,42 @@
-import streamlit as st
-from gtts import gTTS
-import os
-import base64
-import PyPDF2
-# Streamlit app UI
-st.title("Text-to-Audio App")
-st.text("This app converts your text input or PDF content into audio using TTS.")
-# User input
-text_input = st.text_area("Enter some text:")
-# PDF file upload
-uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
-if uploaded_file is not None:
-    try:
-        # Read PDF file
-        pdf_reader = PyPDF2.PdfReader(uploaded_file)
-        extracted_text = ""
-        for page in pdf_reader.pages:
-            extracted_text += page.extract_text()
-        if extracted_text.strip():
-            text_input = extracted_text
-            st.success("Text extracted from the uploaded PDF!")
-            st.text_area("Extracted Text:", text_input, height=200)
-        else:
-            st.error("No extractable text found in the uploaded PDF.")
-    except Exception as e:
-        st.error(f"An error occurred while reading the PDF: {e}")
-if st.button("Generate Audio"):
-    if not text_input.strip():
-        st.error("Please enter some text or upload a PDF with extractable text!")
-    else:
-        try:
-            # Generate speech using gTTS
-            tts = gTTS(text=text_input, lang="en")
-            audio_file = "output.wav"
-            tts.save(audio_file)
-            # Check if file exists
-            if os.path.exists(audio_file):
-                # Encode audio file to base64
-                with open(audio_file, "rb") as f:
-                    audio_data = f.read()
-                audio_base64 = base64.b64encode(audio_data).decode()
-                # Embed custom HTML audio player with speed adjustment
-                audio_html = f"""
-                    <audio id="audio" controls style="width: 100%; margin-top: 10px;">
-                        <source src="data:audio/wav;base64,{audio_base64}" type="audio/wav">
-                        Your browser does not support the audio element.
-                    </audio>
-                    <div style="margin-top: 10px;">
-                        <label for="speed" style="font-weight: bold;">Playback Speed:</label>
-                        <input type="range" id="speed" min="0.5" max="2.0" value="1.0" step="0.1" style="width: 50%; margin-left: 10px;">
-                        <span id="speed-value">1.0x</span>
-                    </div>
-                    <script>
-                        const audio = document.getElementById("audio");
-                        const speedSlider = document.getElementById("speed");
-                        const speedValue = document.getElementById("speed-value");
-                        // Update playback speed dynamically
-                        speedSlider.addEventListener("input", () => {{
-                            const speed = parseFloat(speedSlider.value);
-                            audio.playbackRate = speed;
-                            speedValue.textContent = speed.toFixed(1) + "x";
-                        }});
-                    </script>
-                """
-                st.components.v1.html(audio_html, height=200)
-                st.success("Audio generated successfully!")
-                # Provide download option
-                with open(audio_file, "rb") as f:
-                    st.download_button(
-                        label="Download Audio",
-                        data=f.read(),
-                        file_name="output.wav",
-                        mime="audio/wav",
-                    )
-            else:
-                st.error("Audio file could not be generated.")
-        except Exception as e:
-            st.error(f"An error occurred: {e}")

+import gradio as gr
+import torchaudio
+from transformers import pipeline
+# Load a voice cloning or TTS model
+# Here we use a placeholder for a voice cloning model like Tortoise-TTS
+# You can replace this with your preferred library
+def voice_cloning(input_audio, song_text, musician_style):
+    # Load the input audio (your voice)
+    waveform, sample_rate = torchaudio.load(input_audio)
+    # Process the waveform to extract voice features (using Tortoise-TTS or similar)
+    # This is a placeholder - you'll need to use a real voice cloning pipeline here
+    cloned_voice = f"Processed your voice for song '{song_text}' in the style of {musician_style}"
+    # Synthesize the song text using your cloned voice
+    # Combine with the musical style of the selected musician
+    synthesized_song = f"Singing '{song_text}' with your voice in the style of {musician_style}."
+    return synthesized_song
+# Create a Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("### Voice Cloning & Singing in a Musician's Style")
+    with gr.Row():
+        input_audio = gr.Audio(label="Upload Your Voice", type="filepath")
+        song_text = gr.Textbox(label="Enter Song Lyrics", placeholder="Enter the song lyrics here...")
+        musician_style = gr.Textbox(label="Enter Musician's Style", placeholder="e.g., Adele, Ed Sheeran, etc.")
+    output = gr.Textbox(label="Synthesized Song")
+    generate_button = gr.Button("Generate")
+    generate_button.click(
+        voice_cloning,
+        inputs=[input_audio, song_text, musician_style],
+        outputs=output
+    )
+# Launch the app
+demo.launch()