Spaces:
Sleeping
Sleeping
File size: 3,661 Bytes
077e0e7 f4b5c65 ea230c6 f4b5c65 3927c7f 077e0e7 3927c7f f4b5c65 3927c7f c95cd5b 3927c7f f4b5c65 3927c7f f4b5c65 3927c7f f4b5c65 3927c7f f4b5c65 3927c7f 077e0e7 8428946 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import gradio as gr
from pydub import AudioSegment
import edge_tts
import os
import asyncio
# Function to get the length of an audio file in seconds
def get_audio_length(audio_file):
audio = AudioSegment.from_file(audio_file)
return audio.duration_seconds
# Function to format time for SRT
def format_time(seconds):
millis = int((seconds % 1) * 1000)
seconds = int(seconds)
hrs = seconds // 3600
mins = (seconds % 3600) // 60
secs = seconds % 60
return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
# Function to generate SRT with accurate timing per batch
async def generate_accurate_srt(batch_text, batch_num, start_offset):
audio_file = f"batch_{batch_num}_audio.wav"
# Generate the audio using edge-tts
tts = edge_tts.Communicate(batch_text, "en-US-AndrewNeural", rate="-25%")
await tts.save(audio_file)
# Get the actual length of the audio file
actual_length = get_audio_length(audio_file)
# Initialize SRT content
srt_content = ""
words = batch_text.split()
segment_duration = actual_length / len(words) * 10 # Adjusted for ~10 words per SRT segment
start_time = start_offset
# Build SRT content with accurate timing
for i in range(0, len(words), 10):
segment_words = words[i:i+10]
end_time = start_time + segment_duration
srt_content += f"{i // 10 + 1 + (batch_num * 100)}\n"
srt_content += f"{format_time(start_time)} --> {format_time(end_time)}\n"
srt_content += " ".join(segment_words) + "\n\n"
start_time = end_time
return srt_content, audio_file, start_time
# Batch processing function with concurrent processing and progress indicator
async def batch_process_srt_and_audio(script_text, progress=gr.Progress()):
batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
all_srt_content = ""
combined_audio = AudioSegment.empty()
start_offset = 0.0 # Track cumulative time offset for SRT timing
# Prepare tasks for concurrent batch processing
tasks = [
generate_accurate_srt(batch_text, batch_num, start_offset)
for batch_num, batch_text in enumerate(batches)
]
# Execute tasks concurrently with progress tracking
for result in progress.track(asyncio.as_completed(tasks), total=len(tasks), description="Processing batches..."):
srt_content, audio_file, end_offset = await result
all_srt_content += srt_content
# Append the audio of each batch to the combined audio
batch_audio = AudioSegment.from_file(audio_file)
combined_audio += batch_audio
start_offset = end_offset # Update the start offset for the next batch
# Clean up the individual batch audio file
os.remove(audio_file)
# Export combined audio and SRT
combined_audio.export("final_audio.wav", format="wav")
with open("final_subtitles.srt", "w") as srt_file:
srt_file.write(all_srt_content)
return "final_subtitles.srt", "final_audio.wav"
# Gradio interface function
async def process_script(script_text):
srt_path, audio_path = await batch_process_srt_and_audio(script_text)
return srt_path, audio_path, audio_path
# Gradio interface setup
app = gr.Interface(
fn=process_script,
inputs=gr.Textbox(label="Enter Script Text", lines=10),
outputs=[
gr.File(label="Download SRT File"),
gr.File(label="Download Audio File"),
gr.Audio(label="Play Audio")
],
description="Upload your script text, and the app will generate audio with en-US-AndrewNeural voice (Rate: -25%) and an accurate SRT file for download."
)
app.launch()
|