insta-maker

Sleeping

App Files Files Community

hivecorp commited on Nov 3, 2024

Commit

3927c7f

verified ·

1 Parent(s): 8ca57cc

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -113

app.py CHANGED Viewed

@@ -2,131 +2,98 @@ import gradio as gr
 from pydub import AudioSegment
 import edge_tts
 import os
-import wave
 import asyncio
-import srt
-# Function to calculate audio duration
-def get_audio_length(audio_path):
-    with wave.open(audio_path, 'rb') as audio:
-        frames = audio.getnframes()
-        rate = audio.getframerate()
-        return frames / float(rate)
-# Generate precise SRT entries for a text batch
-def generate_accurate_srt(text, start_time, batch_index):
-    srt_entries = []
-    current_time = start_time
-    for line in text.splitlines():
-        end_time = current_time + get_audio_length_for_line(line)
-        srt_entries.append(
-            srt.Subtitle(
-                index=batch_index,
-                start=srt.timedelta(seconds=current_time),
-                end=srt.timedelta(seconds=end_time),
-                content=line
-            )
-        )
-        current_time = end_time
-        batch_index += 1
-    return srt_entries, current_time
-# Process batches and accumulate precise SRT entries
-async def batch_process_srt_and_audio(script_text, voice, batch_size=500, progress=gr.Progress()):
-    total_srt_entries = []
     combined_audio = AudioSegment.empty()
-    cumulative_time = 0.0  # Track total time for accurate SRT start times
-    batch_index = 1
-    # Split text into manageable batches
-    for i in range(0, len(script_text), batch_size):
-        batch_text = script_text[i:i+batch_size]
-        mp3_file = f"audio_batch_{i}.mp3"  # Save as MP3 first
-        wav_file = f"audio_batch_{i}.wav"  # Convert to WAV
-        # Generate audio for each batch and save as MP3
-        tts = edge_tts.Communicate(batch_text, voice, rate="-25%")
-        await tts.save(mp3_file)
-        # Convert MP3 to WAV
-        batch_audio = AudioSegment.from_file(mp3_file, format="mp3")
-        batch_audio.export(wav_file, format="wav")
-        # Ensure WAV conversion succeeded and calculate duration
-        batch_duration = get_audio_length(wav_file)
-        srt_entries, cumulative_time = generate_accurate_srt(batch_text, cumulative_time, batch_index)
-        # Append entries and audio for the batch
-        total_srt_entries.extend(srt_entries)
         combined_audio += batch_audio
-        batch_index += len(srt_entries)
-        # Clean up temporary MP3 file
-        os.remove(mp3_file)
     # Export combined audio and SRT
     combined_audio.export("final_audio.wav", format="wav")
     with open("final_subtitles.srt", "w") as srt_file:
-        srt_file.write(srt.compose(total_srt_entries))
-    # Final validation check
-    validate_srt_against_audio("final_subtitles.srt", "final_audio.wav")
     return "final_subtitles.srt", "final_audio.wav"
-# Validate SRT timing with total audio length
-def validate_srt_against_audio(srt_file_path, audio_file_path):
-    audio_duration = get_audio_length(audio_file_path)
-    with open(srt_file_path, 'r') as file:
-        subtitles = list(srt.parse(file.read()))
-    for subtitle in subtitles:
-        if subtitle.end.total_seconds() > audio_duration:
-            subtitle.end = srt.timedelta(seconds=audio_duration)
-            break
-    with open(srt_file_path, 'w') as file:
-        file.write(srt.compose(subtitles))
-# Gradio function with error handling and markdown message
-async def process_script(script_text, language, voice):
-    try:
-        srt_path, audio_path = await batch_process_srt_and_audio(script_text, voice)
-        return srt_path, audio_path, audio_path, ""
-    except Exception as e:
-        print(f"Error: {e}")
-        return None, None, None, "An error occurred. Please check the script text and try again."
-# Dynamic voice selection based on language
-def update_voice_options(language):
-    voices = {
-        "en-US": ["en-US-AndrewNeural", "en-US-JennyNeural"],
-        "es-ES": ["es-ES-AlvaroNeural", "es-ES-ElviraNeural"]
-    }
-    return gr.update(choices=voices.get(language, []), value=voices.get(language, [])[0])
-# Gradio app setup
-with gr.Blocks() as app:
-    gr.Markdown("# Text to Speech with Accurate SRT and Audio Generation")
-    language = gr.Dropdown(choices=["en-US", "es-ES"], label="Select Language", value="en-US")
-    voice = gr.Dropdown(choices=["en-US-AndrewNeural", "en-US-JennyNeural"], label="Select Voice")
-    language.change(fn=update_voice_options, inputs=language, outputs=voice)
-    script_text = gr.Textbox(label="Enter Script Text", lines=10)
-    outputs = [
         gr.File(label="Download SRT File"),
         gr.File(label="Download Audio File"),
-        gr.Audio(label="Play Audio"),
-        gr.Markdown(label="Error Message")  # This will display any error messages
-    ]
-    submit_button = gr.Button("Generate Audio and SRT")
-    submit_button.click(process_script, inputs=[script_text, language, voice], outputs=outputs)
 app.launch()

 from pydub import AudioSegment
 import edge_tts
 import os
 import asyncio
+# Function to get the length of an audio file in seconds
+def get_audio_length(audio_file):
+    audio = AudioSegment.from_file(audio_file)
+    return audio.duration_seconds
+# Function to format time for SRT
+def format_time(seconds):
+    millis = int((seconds % 1) * 1000)
+    seconds = int(seconds)
+    hrs = seconds // 3600
+    mins = (seconds % 3600) // 60
+    secs = seconds % 60
+    return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
+# Function to generate SRT with accurate timing per batch
+async def generate_accurate_srt(batch_text, batch_num, start_offset):
+    audio_file = f"batch_{batch_num}_audio.wav"
+    # Generate the audio using edge-tts
+    tts = edge_tts.Communicate(batch_text, "en-US-AndrewNeural", rate="-25%")
+    await tts.save(audio_file)
+    # Get the actual length of the audio file
+    actual_length = get_audio_length(audio_file)
+    # Initialize SRT content
+    srt_content = ""
+    words = batch_text.split()
+    segment_duration = actual_length / len(words) * 10  # Adjusted for ~10 words per SRT segment
+    start_time = start_offset
+    # Build SRT content with accurate timing
+    for i in range(0, len(words), 10):
+        segment_words = words[i:i+10]
+        end_time = start_time + segment_duration
+        srt_content += f"{i // 10 + 1 + (batch_num * 100)}\n"
+        srt_content += f"{format_time(start_time)} --> {format_time(end_time)}\n"
+        srt_content += " ".join(segment_words) + "\n\n"
+        start_time = end_time
+    return srt_content, audio_file, start_time
+# Batch processing function with concurrent processing and progress indicator
+async def batch_process_srt_and_audio(script_text, progress=gr.Progress()):
+    batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
+    all_srt_content = ""
     combined_audio = AudioSegment.empty()
+    start_offset = 0.0  # Track cumulative time offset for SRT timing
+    # Prepare tasks for concurrent batch processing
+    tasks = [
+        generate_accurate_srt(batch_text, batch_num, start_offset)
+        for batch_num, batch_text in enumerate(batches)
+    ]
+    # Execute tasks concurrently with progress tracking
+    for result in progress.track(asyncio.as_completed(tasks), total=len(tasks), description="Processing batches..."):
+        srt_content, audio_file, end_offset = await result
+        all_srt_content += srt_content
+        # Append the audio of each batch to the combined audio
+        batch_audio = AudioSegment.from_file(audio_file)
         combined_audio += batch_audio
+        start_offset = end_offset  # Update the start offset for the next batch
+        # Clean up the individual batch audio file
+        os.remove(audio_file)
     # Export combined audio and SRT
     combined_audio.export("final_audio.wav", format="wav")
     with open("final_subtitles.srt", "w") as srt_file:
+        srt_file.write(all_srt_content)
     return "final_subtitles.srt", "final_audio.wav"
+# Gradio interface function
+async def process_script(script_text):
+    srt_path, audio_path = await batch_process_srt_and_audio(script_text)
+    return srt_path, audio_path, audio_path
+# Gradio interface setup
+app = gr.Interface(
+    fn=process_script,
+    inputs=gr.Textbox(label="Enter Script Text", lines=10),
+    outputs=[
         gr.File(label="Download SRT File"),
         gr.File(label="Download Audio File"),
+        gr.Audio(label="Play Audio")
+    ],
+    description="Upload your script text, and the app will generate audio with en-US-AndrewNeural voice (Rate: -25%) and an accurate SRT file for download."
+)
 app.launch()