Spaces:
Sleeping
Sleeping
import edge_tts | |
import srt | |
import os | |
import wave | |
import gradio as gr | |
# Function to calculate audio duration for a given audio file | |
def get_audio_length(audio_path): | |
with wave.open(audio_path, 'rb') as audio: | |
frames = audio.getnframes() | |
rate = audio.getframerate() | |
return frames / float(rate) | |
# Function to generate SRT entries for a batch of text with accurate timing | |
def generate_accurate_srt(text, start_time, batch_index): | |
srt_entries = [] | |
current_time = start_time | |
for line in text.splitlines(): | |
# Estimate duration of each line based on audio segment generated | |
duration = len(line.split()) * 0.3 # Assuming approx. 0.3 seconds per word | |
end_time = current_time + duration | |
srt_entries.append( | |
srt.Subtitle( | |
index=batch_index, | |
start=srt.timedelta(seconds=current_time), | |
end=srt.timedelta(seconds=end_time), | |
content=line | |
) | |
) | |
current_time = end_time | |
batch_index += 1 | |
return srt_entries, current_time | |
# Process each batch of text, generate audio, and accumulate SRT entries | |
def batch_process_srt_and_audio(script_text, batch_size=500): | |
total_srt_entries = [] | |
cumulative_time = 0.0 | |
batch_index = 1 | |
for i in range(0, len(script_text), batch_size): | |
batch_text = script_text[i:i+batch_size] | |
# Generate audio for the batch | |
audio_file = f"audio_batch_{i}.wav" | |
communicate = edge_tts.Communicate(text=batch_text, voice="en-US-AndrewNeural", rate="-25%") | |
communicate.save(audio_file) | |
# Get the duration of the generated audio batch | |
batch_duration = get_audio_length(audio_file) | |
# Generate SRT entries for this batch and update cumulative time | |
srt_entries, cumulative_time = generate_accurate_srt(batch_text, cumulative_time, batch_index) | |
total_srt_entries.extend(srt_entries) | |
batch_index += len(srt_entries) | |
# Write the SRT file | |
srt_file = "output.srt" | |
with open(srt_file, 'w') as file: | |
file.write(srt.compose(total_srt_entries)) | |
return srt_file | |
# Final validation to ensure no SRT entry extends beyond total audio duration | |
def validate_srt_against_audio(srt_file_path, audio_file_path): | |
audio_duration = get_audio_length(audio_file_path) | |
with open(srt_file_path, 'r') as file: | |
subtitles = list(srt.parse(file.read())) | |
for subtitle in subtitles: | |
if subtitle.end.total_seconds() > audio_duration: | |
subtitle.end = srt.timedelta(seconds=audio_duration) | |
break | |
# Write the validated SRT back to the file | |
with open(srt_file_path, 'w') as file: | |
file.write(srt.compose(subtitles)) | |
return srt_file_path | |
# Gradio Interface | |
def process_text_to_srt(script_text): | |
# Process the script in batches and create SRT | |
srt_file = batch_process_srt_and_audio(script_text) | |
# Validate the final SRT file with the complete audio file | |
final_audio_file = "combined_audio.wav" # Assumes you have a combined final audio file | |
validate_srt_against_audio(srt_file, final_audio_file) | |
return srt_file, final_audio_file | |
# Gradio app setup | |
def main(): | |
gr.Interface( | |
fn=process_text_to_srt, | |
inputs="textbox", | |
outputs=["file", "audio"], | |
live=True, | |
title="Text-to-SRT with Accurate Timing", | |
description="Enter text to convert it into audio with synchronized SRT subtitles. The SRT timings are validated against the total audio duration." | |
).launch() | |
# Run the app | |
if __name__ == "__main__": | |
main() | |