hivecorp commited on
Commit
f4b5c65
·
verified ·
1 Parent(s): ea230c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -109
app.py CHANGED
@@ -1,115 +1,92 @@
1
  import gradio as gr
2
- import asyncio
3
  import edge_tts
4
- import tempfile
5
  import os
6
- import srt
7
- from datetime import timedelta
8
- from itertools import chain
9
-
10
- # Default TTS settings
11
- DEFAULT_VOICE = "en-US-AndrewNeural"
12
- DEFAULT_RATE = "-25%"
13
-
14
- # Function to split text into batches based on a specified word limit (300-320)
15
- def split_into_batches(text, batch_size=320):
16
- words = text.split()
17
- batches = []
18
- current_batch = []
19
- current_length = 0
20
-
21
- for word in words:
22
- current_batch.append(word)
23
- current_length += 1
24
- if current_length >= batch_size:
25
- batches.append(" ".join(current_batch))
26
- current_batch = []
27
- current_length = 0
28
- if current_batch:
29
- batches.append(" ".join(current_batch))
30
- return batches
31
-
32
- # Function to generate SRT entries and audio for each segment within a batch
33
- async def generate_srt_for_batch(batch_text, batch_index):
34
- words = batch_text.split()
35
- segments = []
36
- segment_texts = []
37
- start_time = timedelta(seconds=0)
38
-
39
- # Loop through words to create segments of 5-8 words, considering punctuation
40
- current_segment = []
41
- for i, word in enumerate(words):
42
- current_segment.append(word)
43
- if len(current_segment) >= 5 or word.endswith((".", ",", "!", "?")):
44
- segment_text = " ".join(current_segment)
45
- end_time = start_time + timedelta(seconds=2) # Example: 2 seconds per segment, adjust as needed
46
- segments.append(srt.Subtitle(index=len(segments)+1, start=start_time, end=end_time, content=segment_text))
47
- start_time = end_time
48
- segment_texts.append(segment_text)
49
- current_segment = []
50
-
51
- # Handle remaining words in the last segment
52
- if current_segment:
53
- segment_text = " ".join(current_segment)
54
- end_time = start_time + timedelta(seconds=2)
55
- segments.append(srt.Subtitle(index=len(segments)+1, start=start_time, end=end_time, content=segment_text))
56
- segment_texts.append(segment_text)
57
-
58
- audio_files = []
59
- for segment_text in segment_texts:
60
- audio_path = await generate_audio(segment_text)
61
- audio_files.append(audio_path)
62
-
63
- return segments, audio_files
64
-
65
- # Function to generate audio using Edge TTS for a given text segment
66
- async def generate_audio(text, voice=DEFAULT_VOICE, rate=DEFAULT_RATE):
67
- communicate = edge_tts.Communicate(text=text, voice=voice, rate=rate)
68
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
69
- await communicate.save(temp_audio.name)
70
- return temp_audio.name
71
-
72
- # Function to process the script in batches and generate the final audio and SRT
73
- async def process_script(script):
74
- batches = split_into_batches(script)
75
- all_srt_entries = []
76
- all_audio_files = []
77
-
78
- # Process each batch independently, keeping track of SRT and audio segments
79
- for batch_index, batch_text in enumerate(batches):
80
- srt_entries, audio_files = await generate_srt_for_batch(batch_text, batch_index)
81
- all_srt_entries.extend(srt_entries)
82
- all_audio_files.extend(audio_files)
83
-
84
- # Combine and synchronize all SRT entries
85
- final_srt = srt.compose(all_srt_entries)
86
-
87
- # Concatenate all audio files into a single output
88
- combined_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
89
- os.system(f"ffmpeg -y -i \"concat:{'|'.join(all_audio_files)}\" -c copy {combined_audio_path}")
90
-
91
- return combined_audio_path, final_srt
92
-
93
- # Function to handle Gradio interface output generation
94
- def generate_output(script):
95
- final_audio_path, final_srt = asyncio.run(process_script(script))
96
 
97
- # Save final SRT file
98
- srt_file_path = tempfile.NamedTemporaryFile(delete=False, suffix=".srt").name
99
- with open(srt_file_path, "w") as srt_file:
100
- srt_file.write(final_srt)
101
-
102
- return final_audio_path, srt_file_path
103
-
104
- # Gradio Interface
105
- with gr.Blocks() as app:
106
- gr.Markdown("# Batch SRT and Audio Generator")
107
- script_input = gr.Textbox(label="Enter Script", lines=10)
108
- generate_button = gr.Button("Generate SRT and Audio")
109
- audio_output = gr.Audio(label="Generated Audio", type="filepath")
110
- srt_output = gr.File(label="Generated SRT File")
111
-
112
- # Connect Gradio elements to output generation function
113
- generate_button.click(generate_output, inputs=script_input, outputs=[audio_output, srt_output])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
  app.launch()
 
1
  import gradio as gr
2
+ from pydub import AudioSegment
3
  import edge_tts
 
4
  import os
5
+ import asyncio
6
+
7
+ # Function to get the length of an audio file in seconds
8
+ def get_audio_length(audio_file):
9
+ audio = AudioSegment.from_file(audio_file)
10
+ return audio.duration_seconds
11
+
12
+ # Function to format time for SRT
13
+ def format_time(seconds):
14
+ millis = int((seconds % 1) * 1000)
15
+ seconds = int(seconds)
16
+ hrs = seconds // 3600
17
+ mins = (seconds % 3600) // 60
18
+ secs = seconds % 60
19
+ return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
20
+
21
+ # Function to generate SRT with accurate timing per batch
22
+ async def generate_accurate_srt(batch_text, batch_num, start_offset):
23
+ audio_file = f"batch_{batch_num}_audio.wav"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ # Generate the audio using edge-tts
26
+ tts = edge_tts.Communicate(batch_text, "en-US-AndrewNeural", rate="-25%")
27
+ await tts.save(audio_file)
28
+
29
+ # Get the actual length of the audio file
30
+ actual_length = get_audio_length(audio_file)
31
+
32
+ # Initialize SRT content
33
+ srt_content = ""
34
+ words = batch_text.split()
35
+ segment_duration = actual_length / len(words) * 10 # Adjusted for ~10 words per SRT segment
36
+ start_time = start_offset
37
+
38
+ # Build SRT content with accurate timing
39
+ for i in range(0, len(words), 10):
40
+ segment_words = words[i:i+10]
41
+ end_time = start_time + segment_duration
42
+ srt_content += f"{i // 10 + 1 + (batch_num * 100)}\n"
43
+ srt_content += f"{format_time(start_time)} --> {format_time(end_time)}\n"
44
+ srt_content += " ".join(segment_words) + "\n\n"
45
+ start_time = end_time
46
+
47
+ return srt_content, audio_file, start_time
48
+
49
+ # Batch processing function for SRT and audio generation
50
+ async def batch_process_srt_and_audio(script_text):
51
+ batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
52
+ all_srt_content = ""
53
+ combined_audio = AudioSegment.empty()
54
+ start_offset = 0.0 # Track cumulative time offset for SRT timing
55
+
56
+ for batch_num, batch_text in enumerate(batches):
57
+ srt_content, audio_file, end_offset = await generate_accurate_srt(batch_text, batch_num, start_offset)
58
+ all_srt_content += srt_content
59
+
60
+ # Append the audio of each batch to the combined audio
61
+ batch_audio = AudioSegment.from_file(audio_file)
62
+ combined_audio += batch_audio
63
+ start_offset = end_offset # Update the start offset for the next batch
64
+
65
+ # Clean up the individual batch audio file
66
+ os.remove(audio_file)
67
+
68
+ # Export combined audio and SRT
69
+ combined_audio.export("final_audio.wav", format="wav")
70
+ with open("final_subtitles.srt", "w") as srt_file:
71
+ srt_file.write(all_srt_content)
72
+
73
+ return "final_subtitles.srt", "final_audio.wav"
74
+
75
+ # Gradio interface function
76
+ async def process_script(script_text):
77
+ srt_path, audio_path = await batch_process_srt_and_audio(script_text)
78
+ return srt_path, audio_path, audio_path
79
+
80
+ # Gradio interface setup
81
+ app = gr.Interface(
82
+ fn=process_script,
83
+ inputs=gr.Textbox(label="Enter Script Text", lines=10),
84
+ outputs=[
85
+ gr.File(label="Download SRT File"),
86
+ gr.File(label="Download Audio File"),
87
+ gr.Audio(label="Play Audio")
88
+ ],
89
+ description="Upload your script text, and the app will generate audio with en-US-AndrewNeural voice (Rate: -25%) and an accurate SRT file for download."
90
+ )
91
 
92
  app.launch()