Spaces:
Sleeping
Sleeping
import gradio as gr | |
from pydub import AudioSegment | |
import edge_tts | |
import os | |
import wave | |
import asyncio | |
import srt | |
# Function to calculate audio duration | |
def get_audio_length(audio_path): | |
with wave.open(audio_path, 'rb') as audio: | |
frames = audio.getnframes() | |
rate = audio.getframerate() | |
return frames / float(rate) | |
# Generate precise SRT entries for a text batch | |
def generate_accurate_srt(text, start_time, batch_index): | |
srt_entries = [] | |
current_time = start_time | |
for line in text.splitlines(): | |
end_time = current_time + get_audio_length_for_line(line) | |
srt_entries.append( | |
srt.Subtitle( | |
index=batch_index, | |
start=srt.timedelta(seconds=current_time), | |
end=srt.timedelta(seconds=end_time), | |
content=line | |
) | |
) | |
current_time = end_time | |
batch_index += 1 | |
return srt_entries, current_time | |
# Process batches and accumulate precise SRT entries | |
async def batch_process_srt_and_audio(script_text, voice, batch_size=500, progress=gr.Progress()): | |
total_srt_entries = [] | |
combined_audio = AudioSegment.empty() | |
cumulative_time = 0.0 # Track total time for accurate SRT start times | |
batch_index = 1 | |
# Split text into manageable batches | |
for i in range(0, len(script_text), batch_size): | |
batch_text = script_text[i:i+batch_size] | |
mp3_file = f"audio_batch_{i}.mp3" # Save as MP3 first | |
wav_file = f"audio_batch_{i}.wav" # Convert to WAV | |
# Generate audio for each batch and save as MP3 | |
tts = edge_tts.Communicate(batch_text, voice, rate="-25%") | |
await tts.save(mp3_file) | |
# Convert MP3 to WAV | |
batch_audio = AudioSegment.from_file(mp3_file, format="mp3") | |
batch_audio.export(wav_file, format="wav") | |
# Ensure WAV conversion succeeded and calculate duration | |
batch_duration = get_audio_length(wav_file) | |
srt_entries, cumulative_time = generate_accurate_srt(batch_text, cumulative_time, batch_index) | |
# Append entries and audio for the batch | |
total_srt_entries.extend(srt_entries) | |
combined_audio += batch_audio | |
batch_index += len(srt_entries) | |
# Clean up temporary MP3 file | |
os.remove(mp3_file) | |
# Export combined audio and SRT | |
combined_audio.export("final_audio.wav", format="wav") | |
with open("final_subtitles.srt", "w") as srt_file: | |
srt_file.write(srt.compose(total_srt_entries)) | |
# Final validation check | |
validate_srt_against_audio("final_subtitles.srt", "final_audio.wav") | |
return "final_subtitles.srt", "final_audio.wav" | |
# Validate SRT timing with total audio length | |
def validate_srt_against_audio(srt_file_path, audio_file_path): | |
audio_duration = get_audio_length(audio_file_path) | |
with open(srt_file_path, 'r') as file: | |
subtitles = list(srt.parse(file.read())) | |
for subtitle in subtitles: | |
if subtitle.end.total_seconds() > audio_duration: | |
subtitle.end = srt.timedelta(seconds=audio_duration) | |
break | |
with open(srt_file_path, 'w') as file: | |
file.write(srt.compose(subtitles)) | |
# Gradio function with error handling and markdown message | |
async def process_script(script_text, language, voice): | |
try: | |
srt_path, audio_path = await batch_process_srt_and_audio(script_text, voice) | |
return srt_path, audio_path, audio_path, "" | |
except Exception as e: | |
print(f"Error: {e}") | |
return None, None, None, "An error occurred. Please check the script text and try again." | |
# Dynamic voice selection based on language | |
def update_voice_options(language): | |
voices = { | |
"en-US": ["en-US-AndrewNeural", "en-US-JennyNeural"], | |
"es-ES": ["es-ES-AlvaroNeural", "es-ES-ElviraNeural"] | |
} | |
return gr.update(choices=voices.get(language, []), value=voices.get(language, [])[0]) | |
# Gradio app setup | |
with gr.Blocks() as app: | |
gr.Markdown("# Text to Speech with Accurate SRT and Audio Generation") | |
language = gr.Dropdown(choices=["en-US", "es-ES"], label="Select Language", value="en-US") | |
voice = gr.Dropdown(choices=["en-US-AndrewNeural", "en-US-JennyNeural"], label="Select Voice") | |
language.change(fn=update_voice_options, inputs=language, outputs=voice) | |
script_text = gr.Textbox(label="Enter Script Text", lines=10) | |
outputs = [ | |
gr.File(label="Download SRT File"), | |
gr.File(label="Download Audio File"), | |
gr.Audio(label="Play Audio"), | |
gr.Markdown(label="Error Message") # This will display any error messages | |
] | |
submit_button = gr.Button("Generate Audio and SRT") | |
submit_button.click(process_script, inputs=[script_text, language, voice], outputs=outputs) | |
app.launch() | |