insta-maker

Sleeping

App Files Files Community

hivecorp commited on Nov 3, 2024

Commit

ea230c6

verified ·

1 Parent(s): c5c349b

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -90

app.py CHANGED Viewed

@@ -1,135 +1,115 @@
-import os
 import gradio as gr
-import srt
-import edge_tts
 import asyncio
 import tempfile
 from datetime import timedelta
-from pydub import AudioSegment
-# Define Edge TTS settings
 DEFAULT_VOICE = "en-US-AndrewNeural"
 DEFAULT_RATE = "-25%"
-# Split the script into batches of 300-320 words, keeping punctuation in mind
-def split_into_batches(script, batch_size=320):
-    words = script.split()
     batches = []
     current_batch = []
-    word_count = 0
     for word in words:
         current_batch.append(word)
-        word_count += 1
-        # Check if current batch reached limit or ends with punctuation
-        if word_count >= batch_size or word.endswith((".", "?", "!")):
             batches.append(" ".join(current_batch))
             current_batch = []
-            word_count = 0
     if current_batch:
         batches.append(" ".join(current_batch))
     return batches
-# Further divide each batch into 5-8 words per segment based on punctuation
-def split_into_segments(batch, segment_size=7):
-    words = batch.split()
     segments = []
-    segment = []
     for i, word in enumerate(words):
-        segment.append(word)
-        if len(segment) >= segment_size or word.endswith((".", "?", "!")):
-            segments.append(" ".join(segment))
-            segment = []
-    if segment:
-        segments.append(" ".join(segment))
-    return segments
-# Generate TTS audio asynchronously for each segment
 async def generate_audio(text, voice=DEFAULT_VOICE, rate=DEFAULT_RATE):
-    communicate = edge_tts.Communicate(text, voice, rate)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
         await communicate.save(temp_audio.name)
         return temp_audio.name
-# Create and adjust SRT for each segment with accurate timing
-async def generate_srt_for_batch(batch_text, batch_index):
-    segments = split_into_segments(batch_text)
-    srt_entries = []
-    segment_audio_files = []
-    current_time = timedelta(seconds=0)
-    for i, segment in enumerate(segments):
-        # Generate audio and get duration for the current segment
-        audio_path = await generate_audio(segment)
-        segment_audio_files.append(audio_path)
-        # Get duration of generated audio
-        segment_duration = get_audio_length(audio_path)
-        # Create SRT entry for each segment
-        start_time = current_time
-        end_time = start_time + timedelta(seconds=segment_duration)
-        srt_entry = srt.Subtitle(index=(batch_index * 100) + i + 1,
-                                 start=start_time,
-                                 end=end_time,
-                                 content=segment)
-        srt_entries.append(srt_entry)
-        current_time = end_time
-    return srt_entries, segment_audio_files
-# Get audio length in seconds
-def get_audio_length(audio_path):
-    audio = AudioSegment.from_file(audio_path)
-    return audio.duration_seconds
-# Process all batches, generate audio and SRT
 async def process_script(script):
     batches = split_into_batches(script)
     all_srt_entries = []
     all_audio_files = []
-    # Process each batch sequentially (for large scripts, implement concurrency)
     for batch_index, batch_text in enumerate(batches):
         srt_entries, audio_files = await generate_srt_for_batch(batch_text, batch_index)
         all_srt_entries.extend(srt_entries)
         all_audio_files.extend(audio_files)
-    # Concatenate all audio files into one final audio file
-    final_audio_path = tempfile.mktemp(suffix=".wav")
-    combined_audio = AudioSegment.empty()
-    for audio_file in all_audio_files:
-        combined_audio += AudioSegment.from_file(audio_file)
-    combined_audio.export(final_audio_path, format="wav")
-    # Generate the final SRT file with accurate timings
-    final_srt_path = tempfile.mktemp(suffix=".srt")
-    with open(final_srt_path, "w") as srt_file:
-        srt_file.write(srt.compose(all_srt_entries))
-    return final_audio_path, final_srt_path
-# Gradio Interface for Script Input and Output
 def generate_output(script):
-    final_audio_path, final_srt_path = asyncio.run(process_script(script))
-    return final_audio_path, final_srt_path
-with gr.Blocks() as app:
-    gr.Markdown("### Text to Speech with Batch Processing and SRT Generation")
-    text_input = gr.Textbox(placeholder="Enter your script here", lines=10, label="Script Input")
-    with gr.Row():
-        audio_output = gr.Audio(label="Final Audio", type="filepath")
-        srt_output = gr.File(label="Final SRT")
-    process_button = gr.Button("Generate Audio and SRT")
-    process_button.click(fn=generate_output, inputs=text_input, outputs=[audio_output, srt_output])
 app.launch()

 import gradio as gr
 import asyncio
+import edge_tts
 import tempfile
+import os
+import srt
 from datetime import timedelta
+from itertools import chain
+# Default TTS settings
 DEFAULT_VOICE = "en-US-AndrewNeural"
 DEFAULT_RATE = "-25%"
+# Function to split text into batches based on a specified word limit (300-320)
+def split_into_batches(text, batch_size=320):
+    words = text.split()
     batches = []
     current_batch = []
+    current_length = 0
     for word in words:
         current_batch.append(word)
+        current_length += 1
+        if current_length >= batch_size:
             batches.append(" ".join(current_batch))
             current_batch = []
+            current_length = 0
     if current_batch:
         batches.append(" ".join(current_batch))
     return batches
+# Function to generate SRT entries and audio for each segment within a batch
+async def generate_srt_for_batch(batch_text, batch_index):
+    words = batch_text.split()
     segments = []
+    segment_texts = []
+    start_time = timedelta(seconds=0)
+    # Loop through words to create segments of 5-8 words, considering punctuation
+    current_segment = []
     for i, word in enumerate(words):
+        current_segment.append(word)
+        if len(current_segment) >= 5 or word.endswith((".", ",", "!", "?")):
+            segment_text = " ".join(current_segment)
+            end_time = start_time + timedelta(seconds=2)  # Example: 2 seconds per segment, adjust as needed
+            segments.append(srt.Subtitle(index=len(segments)+1, start=start_time, end=end_time, content=segment_text))
+            start_time = end_time
+            segment_texts.append(segment_text)
+            current_segment = []
+    # Handle remaining words in the last segment
+    if current_segment:
+        segment_text = " ".join(current_segment)
+        end_time = start_time + timedelta(seconds=2)
+        segments.append(srt.Subtitle(index=len(segments)+1, start=start_time, end=end_time, content=segment_text))
+        segment_texts.append(segment_text)
+    audio_files = []
+    for segment_text in segment_texts:
+        audio_path = await generate_audio(segment_text)
+        audio_files.append(audio_path)
+    return segments, audio_files
+# Function to generate audio using Edge TTS for a given text segment
 async def generate_audio(text, voice=DEFAULT_VOICE, rate=DEFAULT_RATE):
+    communicate = edge_tts.Communicate(text=text, voice=voice, rate=rate)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
         await communicate.save(temp_audio.name)
         return temp_audio.name
+# Function to process the script in batches and generate the final audio and SRT
 async def process_script(script):
     batches = split_into_batches(script)
     all_srt_entries = []
     all_audio_files = []
+    # Process each batch independently, keeping track of SRT and audio segments
     for batch_index, batch_text in enumerate(batches):
         srt_entries, audio_files = await generate_srt_for_batch(batch_text, batch_index)
         all_srt_entries.extend(srt_entries)
         all_audio_files.extend(audio_files)
+    # Combine and synchronize all SRT entries
+    final_srt = srt.compose(all_srt_entries)
+    # Concatenate all audio files into a single output
+    combined_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
+    os.system(f"ffmpeg -y -i \"concat:{'|'.join(all_audio_files)}\" -c copy {combined_audio_path}")
+    return combined_audio_path, final_srt
+# Function to handle Gradio interface output generation
 def generate_output(script):
+    final_audio_path, final_srt = asyncio.run(process_script(script))
+    # Save final SRT file
+    srt_file_path = tempfile.NamedTemporaryFile(delete=False, suffix=".srt").name
+    with open(srt_file_path, "w") as srt_file:
+        srt_file.write(final_srt)
+    return final_audio_path, srt_file_path
+# Gradio Interface
+with gr.Blocks() as app:
+    gr.Markdown("# Batch SRT and Audio Generator")
+    script_input = gr.Textbox(label="Enter Script", lines=10)
+    generate_button = gr.Button("Generate SRT and Audio")
+    audio_output = gr.Audio(label="Generated Audio", type="filepath")
+    srt_output = gr.File(label="Generated SRT File")
+    # Connect Gradio elements to output generation function
+    generate_button.click(generate_output, inputs=script_input, outputs=[audio_output, srt_output])
 app.launch()