Spaces:
Sleeping
Sleeping
File size: 3,720 Bytes
0a83152 077e0e7 0a83152 077e0e7 ced46ea 077e0e7 ced46ea 077e0e7 0a83152 077e0e7 0a83152 077e0e7 0a83152 077e0e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import edge_tts
import srt
import os
import wave
import gradio as gr
# Function to calculate audio duration for a given audio file
def get_audio_length(audio_path):
with wave.open(audio_path, 'rb') as audio:
frames = audio.getnframes()
rate = audio.getframerate()
return frames / float(rate)
# Function to generate SRT entries for a batch of text with accurate timing
def generate_accurate_srt(text, start_time, batch_index):
srt_entries = []
current_time = start_time
for line in text.splitlines():
# Estimate duration of each line based on audio segment generated
duration = len(line.split()) * 0.3 # Assuming approx. 0.3 seconds per word
end_time = current_time + duration
srt_entries.append(
srt.Subtitle(
index=batch_index,
start=srt.timedelta(seconds=current_time),
end=srt.timedelta(seconds=end_time),
content=line
)
)
current_time = end_time
batch_index += 1
return srt_entries, current_time
# Process each batch of text, generate audio, and accumulate SRT entries
def batch_process_srt_and_audio(script_text, batch_size=500):
total_srt_entries = []
cumulative_time = 0.0
batch_index = 1
for i in range(0, len(script_text), batch_size):
batch_text = script_text[i:i+batch_size]
# Generate audio for the batch
audio_file = f"audio_batch_{i}.wav"
communicate = edge_tts.Communicate(text=batch_text, voice="en-US-AndrewNeural", rate="-25%")
communicate.save(audio_file)
# Get the duration of the generated audio batch
batch_duration = get_audio_length(audio_file)
# Generate SRT entries for this batch and update cumulative time
srt_entries, cumulative_time = generate_accurate_srt(batch_text, cumulative_time, batch_index)
total_srt_entries.extend(srt_entries)
batch_index += len(srt_entries)
# Write the SRT file
srt_file = "output.srt"
with open(srt_file, 'w') as file:
file.write(srt.compose(total_srt_entries))
return srt_file
# Final validation to ensure no SRT entry extends beyond total audio duration
def validate_srt_against_audio(srt_file_path, audio_file_path):
audio_duration = get_audio_length(audio_file_path)
with open(srt_file_path, 'r') as file:
subtitles = list(srt.parse(file.read()))
for subtitle in subtitles:
if subtitle.end.total_seconds() > audio_duration:
subtitle.end = srt.timedelta(seconds=audio_duration)
break
# Write the validated SRT back to the file
with open(srt_file_path, 'w') as file:
file.write(srt.compose(subtitles))
return srt_file_path
# Gradio Interface
def process_text_to_srt(script_text):
# Process the script in batches and create SRT
srt_file = batch_process_srt_and_audio(script_text)
# Validate the final SRT file with the complete audio file
final_audio_file = "combined_audio.wav" # Assumes you have a combined final audio file
validate_srt_against_audio(srt_file, final_audio_file)
return srt_file, final_audio_file
# Gradio app setup
def main():
gr.Interface(
fn=process_text_to_srt,
inputs="textbox",
outputs=["file", "audio"],
live=True,
title="Text-to-SRT with Accurate Timing",
description="Enter text to convert it into audio with synchronized SRT subtitles. The SRT timings are validated against the total audio duration."
).launch()
# Run the app
if __name__ == "__main__":
main()
|