hivecorp commited on
Commit
ea230c6
·
verified ·
1 Parent(s): c5c349b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -90
app.py CHANGED
@@ -1,135 +1,115 @@
1
- import os
2
  import gradio as gr
3
- import srt
4
- import edge_tts
5
  import asyncio
 
6
  import tempfile
 
 
7
  from datetime import timedelta
8
- from pydub import AudioSegment
9
 
10
- # Define Edge TTS settings
11
  DEFAULT_VOICE = "en-US-AndrewNeural"
12
  DEFAULT_RATE = "-25%"
13
 
14
- # Split the script into batches of 300-320 words, keeping punctuation in mind
15
- def split_into_batches(script, batch_size=320):
16
- words = script.split()
17
  batches = []
18
  current_batch = []
19
- word_count = 0
20
 
21
  for word in words:
22
  current_batch.append(word)
23
- word_count += 1
24
- # Check if current batch reached limit or ends with punctuation
25
- if word_count >= batch_size or word.endswith((".", "?", "!")):
26
  batches.append(" ".join(current_batch))
27
  current_batch = []
28
- word_count = 0
29
-
30
  if current_batch:
31
  batches.append(" ".join(current_batch))
32
-
33
  return batches
34
 
35
- # Further divide each batch into 5-8 words per segment based on punctuation
36
- def split_into_segments(batch, segment_size=7):
37
- words = batch.split()
38
  segments = []
39
- segment = []
 
40
 
 
 
41
  for i, word in enumerate(words):
42
- segment.append(word)
43
- if len(segment) >= segment_size or word.endswith((".", "?", "!")):
44
- segments.append(" ".join(segment))
45
- segment = []
46
-
47
- if segment:
48
- segments.append(" ".join(segment))
49
-
50
- return segments
51
-
52
- # Generate TTS audio asynchronously for each segment
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  async def generate_audio(text, voice=DEFAULT_VOICE, rate=DEFAULT_RATE):
54
- communicate = edge_tts.Communicate(text, voice, rate)
55
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
56
  await communicate.save(temp_audio.name)
57
  return temp_audio.name
58
 
59
- # Create and adjust SRT for each segment with accurate timing
60
- async def generate_srt_for_batch(batch_text, batch_index):
61
- segments = split_into_segments(batch_text)
62
- srt_entries = []
63
- segment_audio_files = []
64
-
65
- current_time = timedelta(seconds=0)
66
-
67
- for i, segment in enumerate(segments):
68
- # Generate audio and get duration for the current segment
69
- audio_path = await generate_audio(segment)
70
- segment_audio_files.append(audio_path)
71
-
72
- # Get duration of generated audio
73
- segment_duration = get_audio_length(audio_path)
74
-
75
- # Create SRT entry for each segment
76
- start_time = current_time
77
- end_time = start_time + timedelta(seconds=segment_duration)
78
- srt_entry = srt.Subtitle(index=(batch_index * 100) + i + 1,
79
- start=start_time,
80
- end=end_time,
81
- content=segment)
82
-
83
- srt_entries.append(srt_entry)
84
- current_time = end_time
85
-
86
- return srt_entries, segment_audio_files
87
-
88
- # Get audio length in seconds
89
- def get_audio_length(audio_path):
90
- audio = AudioSegment.from_file(audio_path)
91
- return audio.duration_seconds
92
-
93
- # Process all batches, generate audio and SRT
94
  async def process_script(script):
95
  batches = split_into_batches(script)
96
  all_srt_entries = []
97
  all_audio_files = []
98
 
99
- # Process each batch sequentially (for large scripts, implement concurrency)
100
  for batch_index, batch_text in enumerate(batches):
101
  srt_entries, audio_files = await generate_srt_for_batch(batch_text, batch_index)
102
  all_srt_entries.extend(srt_entries)
103
  all_audio_files.extend(audio_files)
104
 
105
- # Concatenate all audio files into one final audio file
106
- final_audio_path = tempfile.mktemp(suffix=".wav")
107
- combined_audio = AudioSegment.empty()
108
- for audio_file in all_audio_files:
109
- combined_audio += AudioSegment.from_file(audio_file)
110
- combined_audio.export(final_audio_path, format="wav")
111
 
112
- # Generate the final SRT file with accurate timings
113
- final_srt_path = tempfile.mktemp(suffix=".srt")
114
- with open(final_srt_path, "w") as srt_file:
115
- srt_file.write(srt.compose(all_srt_entries))
116
 
117
- return final_audio_path, final_srt_path
118
 
119
- # Gradio Interface for Script Input and Output
120
  def generate_output(script):
121
- final_audio_path, final_srt_path = asyncio.run(process_script(script))
122
- return final_audio_path, final_srt_path
123
-
124
- with gr.Blocks() as app:
125
- gr.Markdown("### Text to Speech with Batch Processing and SRT Generation")
126
- text_input = gr.Textbox(placeholder="Enter your script here", lines=10, label="Script Input")
127
 
128
- with gr.Row():
129
- audio_output = gr.Audio(label="Final Audio", type="filepath")
130
- srt_output = gr.File(label="Final SRT")
 
131
 
132
- process_button = gr.Button("Generate Audio and SRT")
133
- process_button.click(fn=generate_output, inputs=text_input, outputs=[audio_output, srt_output])
 
 
 
 
 
 
 
 
 
 
134
 
135
  app.launch()
 
 
1
  import gradio as gr
 
 
2
  import asyncio
3
+ import edge_tts
4
  import tempfile
5
+ import os
6
+ import srt
7
  from datetime import timedelta
8
+ from itertools import chain
9
 
10
+ # Default TTS settings
11
  DEFAULT_VOICE = "en-US-AndrewNeural"
12
  DEFAULT_RATE = "-25%"
13
 
14
+ # Function to split text into batches based on a specified word limit (300-320)
15
+ def split_into_batches(text, batch_size=320):
16
+ words = text.split()
17
  batches = []
18
  current_batch = []
19
+ current_length = 0
20
 
21
  for word in words:
22
  current_batch.append(word)
23
+ current_length += 1
24
+ if current_length >= batch_size:
 
25
  batches.append(" ".join(current_batch))
26
  current_batch = []
27
+ current_length = 0
 
28
  if current_batch:
29
  batches.append(" ".join(current_batch))
 
30
  return batches
31
 
32
+ # Function to generate SRT entries and audio for each segment within a batch
33
+ async def generate_srt_for_batch(batch_text, batch_index):
34
+ words = batch_text.split()
35
  segments = []
36
+ segment_texts = []
37
+ start_time = timedelta(seconds=0)
38
 
39
+ # Loop through words to create segments of 5-8 words, considering punctuation
40
+ current_segment = []
41
  for i, word in enumerate(words):
42
+ current_segment.append(word)
43
+ if len(current_segment) >= 5 or word.endswith((".", ",", "!", "?")):
44
+ segment_text = " ".join(current_segment)
45
+ end_time = start_time + timedelta(seconds=2) # Example: 2 seconds per segment, adjust as needed
46
+ segments.append(srt.Subtitle(index=len(segments)+1, start=start_time, end=end_time, content=segment_text))
47
+ start_time = end_time
48
+ segment_texts.append(segment_text)
49
+ current_segment = []
50
+
51
+ # Handle remaining words in the last segment
52
+ if current_segment:
53
+ segment_text = " ".join(current_segment)
54
+ end_time = start_time + timedelta(seconds=2)
55
+ segments.append(srt.Subtitle(index=len(segments)+1, start=start_time, end=end_time, content=segment_text))
56
+ segment_texts.append(segment_text)
57
+
58
+ audio_files = []
59
+ for segment_text in segment_texts:
60
+ audio_path = await generate_audio(segment_text)
61
+ audio_files.append(audio_path)
62
+
63
+ return segments, audio_files
64
+
65
+ # Function to generate audio using Edge TTS for a given text segment
66
  async def generate_audio(text, voice=DEFAULT_VOICE, rate=DEFAULT_RATE):
67
+ communicate = edge_tts.Communicate(text=text, voice=voice, rate=rate)
68
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
69
  await communicate.save(temp_audio.name)
70
  return temp_audio.name
71
 
72
+ # Function to process the script in batches and generate the final audio and SRT
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  async def process_script(script):
74
  batches = split_into_batches(script)
75
  all_srt_entries = []
76
  all_audio_files = []
77
 
78
+ # Process each batch independently, keeping track of SRT and audio segments
79
  for batch_index, batch_text in enumerate(batches):
80
  srt_entries, audio_files = await generate_srt_for_batch(batch_text, batch_index)
81
  all_srt_entries.extend(srt_entries)
82
  all_audio_files.extend(audio_files)
83
 
84
+ # Combine and synchronize all SRT entries
85
+ final_srt = srt.compose(all_srt_entries)
 
 
 
 
86
 
87
+ # Concatenate all audio files into a single output
88
+ combined_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
89
+ os.system(f"ffmpeg -y -i \"concat:{'|'.join(all_audio_files)}\" -c copy {combined_audio_path}")
 
90
 
91
+ return combined_audio_path, final_srt
92
 
93
+ # Function to handle Gradio interface output generation
94
  def generate_output(script):
95
+ final_audio_path, final_srt = asyncio.run(process_script(script))
 
 
 
 
 
96
 
97
+ # Save final SRT file
98
+ srt_file_path = tempfile.NamedTemporaryFile(delete=False, suffix=".srt").name
99
+ with open(srt_file_path, "w") as srt_file:
100
+ srt_file.write(final_srt)
101
 
102
+ return final_audio_path, srt_file_path
103
+
104
+ # Gradio Interface
105
+ with gr.Blocks() as app:
106
+ gr.Markdown("# Batch SRT and Audio Generator")
107
+ script_input = gr.Textbox(label="Enter Script", lines=10)
108
+ generate_button = gr.Button("Generate SRT and Audio")
109
+ audio_output = gr.Audio(label="Generated Audio", type="filepath")
110
+ srt_output = gr.File(label="Generated SRT File")
111
+
112
+ # Connect Gradio elements to output generation function
113
+ generate_button.click(generate_output, inputs=script_input, outputs=[audio_output, srt_output])
114
 
115
  app.launch()