insta-maker

Sleeping

App Files Files Community

hivecorp commited on Nov 3, 2024

Commit

f4b5c65

verified ·

1 Parent(s): ea230c6

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -109

app.py CHANGED Viewed

@@ -1,115 +1,92 @@
 import gradio as gr
-import asyncio
 import edge_tts
-import tempfile
 import os
-import srt
-from datetime import timedelta
-from itertools import chain
-# Default TTS settings
-DEFAULT_VOICE = "en-US-AndrewNeural"
-DEFAULT_RATE = "-25%"
-# Function to split text into batches based on a specified word limit (300-320)
-def split_into_batches(text, batch_size=320):
-    words = text.split()
-    batches = []
-    current_batch = []
-    current_length = 0
-    for word in words:
-        current_batch.append(word)
-        current_length += 1
-        if current_length >= batch_size:
-            batches.append(" ".join(current_batch))
-            current_batch = []
-            current_length = 0
-    if current_batch:
-        batches.append(" ".join(current_batch))
-    return batches
-# Function to generate SRT entries and audio for each segment within a batch
-async def generate_srt_for_batch(batch_text, batch_index):
-    words = batch_text.split()
-    segments = []
-    segment_texts = []
-    start_time = timedelta(seconds=0)
-    # Loop through words to create segments of 5-8 words, considering punctuation
-    current_segment = []
-    for i, word in enumerate(words):
-        current_segment.append(word)
-        if len(current_segment) >= 5 or word.endswith((".", ",", "!", "?")):
-            segment_text = " ".join(current_segment)
-            end_time = start_time + timedelta(seconds=2)  # Example: 2 seconds per segment, adjust as needed
-            segments.append(srt.Subtitle(index=len(segments)+1, start=start_time, end=end_time, content=segment_text))
-            start_time = end_time
-            segment_texts.append(segment_text)
-            current_segment = []
-    # Handle remaining words in the last segment
-    if current_segment:
-        segment_text = " ".join(current_segment)
-        end_time = start_time + timedelta(seconds=2)
-        segments.append(srt.Subtitle(index=len(segments)+1, start=start_time, end=end_time, content=segment_text))
-        segment_texts.append(segment_text)
-    audio_files = []
-    for segment_text in segment_texts:
-        audio_path = await generate_audio(segment_text)
-        audio_files.append(audio_path)
-    return segments, audio_files
-# Function to generate audio using Edge TTS for a given text segment
-async def generate_audio(text, voice=DEFAULT_VOICE, rate=DEFAULT_RATE):
-    communicate = edge_tts.Communicate(text=text, voice=voice, rate=rate)
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
-        await communicate.save(temp_audio.name)
-        return temp_audio.name
-# Function to process the script in batches and generate the final audio and SRT
-async def process_script(script):
-    batches = split_into_batches(script)
-    all_srt_entries = []
-    all_audio_files = []
-    # Process each batch independently, keeping track of SRT and audio segments
-    for batch_index, batch_text in enumerate(batches):
-        srt_entries, audio_files = await generate_srt_for_batch(batch_text, batch_index)
-        all_srt_entries.extend(srt_entries)
-        all_audio_files.extend(audio_files)
-    # Combine and synchronize all SRT entries
-    final_srt = srt.compose(all_srt_entries)
-    # Concatenate all audio files into a single output
-    combined_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
-    os.system(f"ffmpeg -y -i \"concat:{'|'.join(all_audio_files)}\" -c copy {combined_audio_path}")
-    return combined_audio_path, final_srt
-# Function to handle Gradio interface output generation
-def generate_output(script):
-    final_audio_path, final_srt = asyncio.run(process_script(script))
-    # Save final SRT file
-    srt_file_path = tempfile.NamedTemporaryFile(delete=False, suffix=".srt").name
-    with open(srt_file_path, "w") as srt_file:
-        srt_file.write(final_srt)
-    return final_audio_path, srt_file_path
-# Gradio Interface
-with gr.Blocks() as app:
-    gr.Markdown("# Batch SRT and Audio Generator")
-    script_input = gr.Textbox(label="Enter Script", lines=10)
-    generate_button = gr.Button("Generate SRT and Audio")
-    audio_output = gr.Audio(label="Generated Audio", type="filepath")
-    srt_output = gr.File(label="Generated SRT File")
-    # Connect Gradio elements to output generation function
-    generate_button.click(generate_output, inputs=script_input, outputs=[audio_output, srt_output])
 app.launch()

 import gradio as gr
+from pydub import AudioSegment
 import edge_tts
 import os
+import asyncio
+# Function to get the length of an audio file in seconds
+def get_audio_length(audio_file):
+    audio = AudioSegment.from_file(audio_file)
+    return audio.duration_seconds
+# Function to format time for SRT
+def format_time(seconds):
+    millis = int((seconds % 1) * 1000)
+    seconds = int(seconds)
+    hrs = seconds // 3600
+    mins = (seconds % 3600) // 60
+    secs = seconds % 60
+    return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
+# Function to generate SRT with accurate timing per batch
+async def generate_accurate_srt(batch_text, batch_num, start_offset):
+    audio_file = f"batch_{batch_num}_audio.wav"
+    # Generate the audio using edge-tts
+    tts = edge_tts.Communicate(batch_text, "en-US-AndrewNeural", rate="-25%")
+    await tts.save(audio_file)
+    # Get the actual length of the audio file
+    actual_length = get_audio_length(audio_file)
+    # Initialize SRT content
+    srt_content = ""
+    words = batch_text.split()
+    segment_duration = actual_length / len(words) * 10  # Adjusted for ~10 words per SRT segment
+    start_time = start_offset
+    # Build SRT content with accurate timing
+    for i in range(0, len(words), 10):
+        segment_words = words[i:i+10]
+        end_time = start_time + segment_duration
+        srt_content += f"{i // 10 + 1 + (batch_num * 100)}\n"
+        srt_content += f"{format_time(start_time)} --> {format_time(end_time)}\n"
+        srt_content += " ".join(segment_words) + "\n\n"
+        start_time = end_time
+    return srt_content, audio_file, start_time
+# Batch processing function for SRT and audio generation
+async def batch_process_srt_and_audio(script_text):
+    batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
+    all_srt_content = ""
+    combined_audio = AudioSegment.empty()
+    start_offset = 0.0  # Track cumulative time offset for SRT timing
+    for batch_num, batch_text in enumerate(batches):
+        srt_content, audio_file, end_offset = await generate_accurate_srt(batch_text, batch_num, start_offset)
+        all_srt_content += srt_content
+        # Append the audio of each batch to the combined audio
+        batch_audio = AudioSegment.from_file(audio_file)
+        combined_audio += batch_audio
+        start_offset = end_offset  # Update the start offset for the next batch
+        # Clean up the individual batch audio file
+        os.remove(audio_file)
+    # Export combined audio and SRT
+    combined_audio.export("final_audio.wav", format="wav")
+    with open("final_subtitles.srt", "w") as srt_file:
+        srt_file.write(all_srt_content)
+    return "final_subtitles.srt", "final_audio.wav"
+# Gradio interface function
+async def process_script(script_text):
+    srt_path, audio_path = await batch_process_srt_and_audio(script_text)
+    return srt_path, audio_path, audio_path
+# Gradio interface setup
+app = gr.Interface(
+    fn=process_script,
+    inputs=gr.Textbox(label="Enter Script Text", lines=10),
+    outputs=[
+        gr.File(label="Download SRT File"),
+        gr.File(label="Download Audio File"),
+        gr.Audio(label="Play Audio")
+    ],
+    description="Upload your script text, and the app will generate audio with en-US-AndrewNeural voice (Rate: -25%) and an accurate SRT file for download."
+)
 app.launch()