hivecorp commited on
Commit
3927c7f
·
verified ·
1 Parent(s): 8ca57cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -113
app.py CHANGED
@@ -2,131 +2,98 @@ import gradio as gr
2
  from pydub import AudioSegment
3
  import edge_tts
4
  import os
5
- import wave
6
  import asyncio
7
- import srt
8
-
9
- # Function to calculate audio duration
10
- def get_audio_length(audio_path):
11
- with wave.open(audio_path, 'rb') as audio:
12
- frames = audio.getnframes()
13
- rate = audio.getframerate()
14
- return frames / float(rate)
15
-
16
- # Generate precise SRT entries for a text batch
17
- def generate_accurate_srt(text, start_time, batch_index):
18
- srt_entries = []
19
- current_time = start_time
 
 
 
 
 
20
 
21
- for line in text.splitlines():
22
- end_time = current_time + get_audio_length_for_line(line)
23
-
24
- srt_entries.append(
25
- srt.Subtitle(
26
- index=batch_index,
27
- start=srt.timedelta(seconds=current_time),
28
- end=srt.timedelta(seconds=end_time),
29
- content=line
30
- )
31
- )
32
- current_time = end_time
33
- batch_index += 1
34
- return srt_entries, current_time
35
-
36
- # Process batches and accumulate precise SRT entries
37
- async def batch_process_srt_and_audio(script_text, voice, batch_size=500, progress=gr.Progress()):
38
- total_srt_entries = []
 
 
 
 
 
 
 
 
 
 
39
  combined_audio = AudioSegment.empty()
40
- cumulative_time = 0.0 # Track total time for accurate SRT start times
41
- batch_index = 1
 
 
 
 
 
42
 
43
- # Split text into manageable batches
44
- for i in range(0, len(script_text), batch_size):
45
- batch_text = script_text[i:i+batch_size]
46
- mp3_file = f"audio_batch_{i}.mp3" # Save as MP3 first
47
- wav_file = f"audio_batch_{i}.wav" # Convert to WAV
48
-
49
- # Generate audio for each batch and save as MP3
50
- tts = edge_tts.Communicate(batch_text, voice, rate="-25%")
51
- await tts.save(mp3_file)
52
-
53
- # Convert MP3 to WAV
54
- batch_audio = AudioSegment.from_file(mp3_file, format="mp3")
55
- batch_audio.export(wav_file, format="wav")
56
-
57
- # Ensure WAV conversion succeeded and calculate duration
58
- batch_duration = get_audio_length(wav_file)
59
- srt_entries, cumulative_time = generate_accurate_srt(batch_text, cumulative_time, batch_index)
60
-
61
- # Append entries and audio for the batch
62
- total_srt_entries.extend(srt_entries)
63
  combined_audio += batch_audio
64
- batch_index += len(srt_entries)
65
-
66
- # Clean up temporary MP3 file
67
- os.remove(mp3_file)
68
-
69
  # Export combined audio and SRT
70
  combined_audio.export("final_audio.wav", format="wav")
71
  with open("final_subtitles.srt", "w") as srt_file:
72
- srt_file.write(srt.compose(total_srt_entries))
73
-
74
- # Final validation check
75
- validate_srt_against_audio("final_subtitles.srt", "final_audio.wav")
76
 
77
  return "final_subtitles.srt", "final_audio.wav"
78
 
79
- # Validate SRT timing with total audio length
80
- def validate_srt_against_audio(srt_file_path, audio_file_path):
81
- audio_duration = get_audio_length(audio_file_path)
82
-
83
- with open(srt_file_path, 'r') as file:
84
- subtitles = list(srt.parse(file.read()))
85
-
86
- for subtitle in subtitles:
87
- if subtitle.end.total_seconds() > audio_duration:
88
- subtitle.end = srt.timedelta(seconds=audio_duration)
89
- break
90
-
91
- with open(srt_file_path, 'w') as file:
92
- file.write(srt.compose(subtitles))
93
-
94
- # Gradio function with error handling and markdown message
95
- async def process_script(script_text, language, voice):
96
- try:
97
- srt_path, audio_path = await batch_process_srt_and_audio(script_text, voice)
98
- return srt_path, audio_path, audio_path, ""
99
- except Exception as e:
100
- print(f"Error: {e}")
101
- return None, None, None, "An error occurred. Please check the script text and try again."
102
-
103
- # Dynamic voice selection based on language
104
- def update_voice_options(language):
105
- voices = {
106
- "en-US": ["en-US-AndrewNeural", "en-US-JennyNeural"],
107
- "es-ES": ["es-ES-AlvaroNeural", "es-ES-ElviraNeural"]
108
- }
109
- return gr.update(choices=voices.get(language, []), value=voices.get(language, [])[0])
110
-
111
- # Gradio app setup
112
- with gr.Blocks() as app:
113
- gr.Markdown("# Text to Speech with Accurate SRT and Audio Generation")
114
-
115
- language = gr.Dropdown(choices=["en-US", "es-ES"], label="Select Language", value="en-US")
116
- voice = gr.Dropdown(choices=["en-US-AndrewNeural", "en-US-JennyNeural"], label="Select Voice")
117
-
118
- language.change(fn=update_voice_options, inputs=language, outputs=voice)
119
-
120
- script_text = gr.Textbox(label="Enter Script Text", lines=10)
121
-
122
- outputs = [
123
  gr.File(label="Download SRT File"),
124
  gr.File(label="Download Audio File"),
125
- gr.Audio(label="Play Audio"),
126
- gr.Markdown(label="Error Message") # This will display any error messages
127
- ]
128
-
129
- submit_button = gr.Button("Generate Audio and SRT")
130
- submit_button.click(process_script, inputs=[script_text, language, voice], outputs=outputs)
131
 
132
  app.launch()
 
2
  from pydub import AudioSegment
3
  import edge_tts
4
  import os
 
5
  import asyncio
6
+
7
+ # Function to get the length of an audio file in seconds
8
+ def get_audio_length(audio_file):
9
+ audio = AudioSegment.from_file(audio_file)
10
+ return audio.duration_seconds
11
+
12
+ # Function to format time for SRT
13
+ def format_time(seconds):
14
+ millis = int((seconds % 1) * 1000)
15
+ seconds = int(seconds)
16
+ hrs = seconds // 3600
17
+ mins = (seconds % 3600) // 60
18
+ secs = seconds % 60
19
+ return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
20
+
21
+ # Function to generate SRT with accurate timing per batch
22
+ async def generate_accurate_srt(batch_text, batch_num, start_offset):
23
+ audio_file = f"batch_{batch_num}_audio.wav"
24
 
25
+ # Generate the audio using edge-tts
26
+ tts = edge_tts.Communicate(batch_text, "en-US-AndrewNeural", rate="-25%")
27
+ await tts.save(audio_file)
28
+
29
+ # Get the actual length of the audio file
30
+ actual_length = get_audio_length(audio_file)
31
+
32
+ # Initialize SRT content
33
+ srt_content = ""
34
+ words = batch_text.split()
35
+ segment_duration = actual_length / len(words) * 10 # Adjusted for ~10 words per SRT segment
36
+ start_time = start_offset
37
+
38
+ # Build SRT content with accurate timing
39
+ for i in range(0, len(words), 10):
40
+ segment_words = words[i:i+10]
41
+ end_time = start_time + segment_duration
42
+ srt_content += f"{i // 10 + 1 + (batch_num * 100)}\n"
43
+ srt_content += f"{format_time(start_time)} --> {format_time(end_time)}\n"
44
+ srt_content += " ".join(segment_words) + "\n\n"
45
+ start_time = end_time
46
+
47
+ return srt_content, audio_file, start_time
48
+
49
+ # Batch processing function with concurrent processing and progress indicator
50
+ async def batch_process_srt_and_audio(script_text, progress=gr.Progress()):
51
+ batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
52
+ all_srt_content = ""
53
  combined_audio = AudioSegment.empty()
54
+ start_offset = 0.0 # Track cumulative time offset for SRT timing
55
+
56
+ # Prepare tasks for concurrent batch processing
57
+ tasks = [
58
+ generate_accurate_srt(batch_text, batch_num, start_offset)
59
+ for batch_num, batch_text in enumerate(batches)
60
+ ]
61
 
62
+ # Execute tasks concurrently with progress tracking
63
+ for result in progress.track(asyncio.as_completed(tasks), total=len(tasks), description="Processing batches..."):
64
+ srt_content, audio_file, end_offset = await result
65
+ all_srt_content += srt_content
66
+
67
+ # Append the audio of each batch to the combined audio
68
+ batch_audio = AudioSegment.from_file(audio_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  combined_audio += batch_audio
70
+ start_offset = end_offset # Update the start offset for the next batch
71
+
72
+ # Clean up the individual batch audio file
73
+ os.remove(audio_file)
74
+
75
  # Export combined audio and SRT
76
  combined_audio.export("final_audio.wav", format="wav")
77
  with open("final_subtitles.srt", "w") as srt_file:
78
+ srt_file.write(all_srt_content)
 
 
 
79
 
80
  return "final_subtitles.srt", "final_audio.wav"
81
 
82
+ # Gradio interface function
83
+ async def process_script(script_text):
84
+ srt_path, audio_path = await batch_process_srt_and_audio(script_text)
85
+ return srt_path, audio_path, audio_path
86
+
87
+ # Gradio interface setup
88
+ app = gr.Interface(
89
+ fn=process_script,
90
+ inputs=gr.Textbox(label="Enter Script Text", lines=10),
91
+ outputs=[
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  gr.File(label="Download SRT File"),
93
  gr.File(label="Download Audio File"),
94
+ gr.Audio(label="Play Audio")
95
+ ],
96
+ description="Upload your script text, and the app will generate audio with en-US-AndrewNeural voice (Rate: -25%) and an accurate SRT file for download."
97
+ )
 
 
98
 
99
  app.launch()