insta-maker-2

Sleeping

File size: 3,720 Bytes

import edge_tts
import srt
import os
import wave
import gradio as gr

# Function to calculate audio duration for a given audio file
def get_audio_length(audio_path):
    with wave.open(audio_path, 'rb') as audio:
        frames = audio.getnframes()
        rate = audio.getframerate()
        return frames / float(rate)

# Function to generate SRT entries for a batch of text with accurate timing
def generate_accurate_srt(text, start_time, batch_index):
    srt_entries = []
    current_time = start_time
    
    for line in text.splitlines():
        # Estimate duration of each line based on audio segment generated
        duration = len(line.split()) * 0.3  # Assuming approx. 0.3 seconds per word
        end_time = current_time + duration
        
        srt_entries.append(
            srt.Subtitle(
                index=batch_index,
                start=srt.timedelta(seconds=current_time),
                end=srt.timedelta(seconds=end_time),
                content=line
            )
        )
        current_time = end_time
        batch_index += 1
    return srt_entries, current_time

# Process each batch of text, generate audio, and accumulate SRT entries
def batch_process_srt_and_audio(script_text, batch_size=500):
    total_srt_entries = []
    cumulative_time = 0.0
    batch_index = 1
    
    for i in range(0, len(script_text), batch_size):
        batch_text = script_text[i:i+batch_size]
        
        # Generate audio for the batch
        audio_file = f"audio_batch_{i}.wav"
        communicate = edge_tts.Communicate(text=batch_text, voice="en-US-AndrewNeural", rate="-25%")
        communicate.save(audio_file)
        
        # Get the duration of the generated audio batch
        batch_duration = get_audio_length(audio_file)
        
        # Generate SRT entries for this batch and update cumulative time
        srt_entries, cumulative_time = generate_accurate_srt(batch_text, cumulative_time, batch_index)
        
        total_srt_entries.extend(srt_entries)
        batch_index += len(srt_entries)
    
    # Write the SRT file
    srt_file = "output.srt"
    with open(srt_file, 'w') as file:
        file.write(srt.compose(total_srt_entries))
    
    return srt_file

# Final validation to ensure no SRT entry extends beyond total audio duration
def validate_srt_against_audio(srt_file_path, audio_file_path):
    audio_duration = get_audio_length(audio_file_path)
    
    with open(srt_file_path, 'r') as file:
        subtitles = list(srt.parse(file.read()))
    
    for subtitle in subtitles:
        if subtitle.end.total_seconds() > audio_duration:
            subtitle.end = srt.timedelta(seconds=audio_duration)
            break
    
    # Write the validated SRT back to the file
    with open(srt_file_path, 'w') as file:
        file.write(srt.compose(subtitles))

    return srt_file_path

# Gradio Interface
def process_text_to_srt(script_text):
    # Process the script in batches and create SRT
    srt_file = batch_process_srt_and_audio(script_text)
    
    # Validate the final SRT file with the complete audio file
    final_audio_file = "combined_audio.wav"  # Assumes you have a combined final audio file
    validate_srt_against_audio(srt_file, final_audio_file)
    
    return srt_file, final_audio_file

# Gradio app setup
def main():
    gr.Interface(
        fn=process_text_to_srt,
        inputs="textbox",
        outputs=["file", "audio"],
        live=True,
        title="Text-to-SRT with Accurate Timing",
        description="Enter text to convert it into audio with synchronized SRT subtitles. The SRT timings are validated against the total audio duration."
    ).launch()

# Run the app
if __name__ == "__main__":
    main()