File size: 3,720 Bytes
0a83152
077e0e7
0a83152
077e0e7
 
ced46ea
077e0e7
 
 
 
 
 
 
 
 
 
 
ced46ea
077e0e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a83152
077e0e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a83152
077e0e7
0a83152
077e0e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import edge_tts
import srt
import os
import wave
import gradio as gr

# Function to calculate audio duration for a given audio file
def get_audio_length(audio_path):
    with wave.open(audio_path, 'rb') as audio:
        frames = audio.getnframes()
        rate = audio.getframerate()
        return frames / float(rate)

# Function to generate SRT entries for a batch of text with accurate timing
def generate_accurate_srt(text, start_time, batch_index):
    srt_entries = []
    current_time = start_time
    
    for line in text.splitlines():
        # Estimate duration of each line based on audio segment generated
        duration = len(line.split()) * 0.3  # Assuming approx. 0.3 seconds per word
        end_time = current_time + duration
        
        srt_entries.append(
            srt.Subtitle(
                index=batch_index,
                start=srt.timedelta(seconds=current_time),
                end=srt.timedelta(seconds=end_time),
                content=line
            )
        )
        current_time = end_time
        batch_index += 1
    return srt_entries, current_time

# Process each batch of text, generate audio, and accumulate SRT entries
def batch_process_srt_and_audio(script_text, batch_size=500):
    total_srt_entries = []
    cumulative_time = 0.0
    batch_index = 1
    
    for i in range(0, len(script_text), batch_size):
        batch_text = script_text[i:i+batch_size]
        
        # Generate audio for the batch
        audio_file = f"audio_batch_{i}.wav"
        communicate = edge_tts.Communicate(text=batch_text, voice="en-US-AndrewNeural", rate="-25%")
        communicate.save(audio_file)
        
        # Get the duration of the generated audio batch
        batch_duration = get_audio_length(audio_file)
        
        # Generate SRT entries for this batch and update cumulative time
        srt_entries, cumulative_time = generate_accurate_srt(batch_text, cumulative_time, batch_index)
        
        total_srt_entries.extend(srt_entries)
        batch_index += len(srt_entries)
    
    # Write the SRT file
    srt_file = "output.srt"
    with open(srt_file, 'w') as file:
        file.write(srt.compose(total_srt_entries))
    
    return srt_file

# Final validation to ensure no SRT entry extends beyond total audio duration
def validate_srt_against_audio(srt_file_path, audio_file_path):
    audio_duration = get_audio_length(audio_file_path)
    
    with open(srt_file_path, 'r') as file:
        subtitles = list(srt.parse(file.read()))
    
    for subtitle in subtitles:
        if subtitle.end.total_seconds() > audio_duration:
            subtitle.end = srt.timedelta(seconds=audio_duration)
            break
    
    # Write the validated SRT back to the file
    with open(srt_file_path, 'w') as file:
        file.write(srt.compose(subtitles))

    return srt_file_path

# Gradio Interface
def process_text_to_srt(script_text):
    # Process the script in batches and create SRT
    srt_file = batch_process_srt_and_audio(script_text)
    
    # Validate the final SRT file with the complete audio file
    final_audio_file = "combined_audio.wav"  # Assumes you have a combined final audio file
    validate_srt_against_audio(srt_file, final_audio_file)
    
    return srt_file, final_audio_file

# Gradio app setup
def main():
    gr.Interface(
        fn=process_text_to_srt,
        inputs="textbox",
        outputs=["file", "audio"],
        live=True,
        title="Text-to-SRT with Accurate Timing",
        description="Enter text to convert it into audio with synchronized SRT subtitles. The SRT timings are validated against the total audio duration."
    ).launch()

# Run the app
if __name__ == "__main__":
    main()