hivecorp commited on
Commit
3b10a63
·
verified ·
1 Parent(s): c812734

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -36
app.py CHANGED
@@ -9,6 +9,17 @@ from concurrent.futures import ThreadPoolExecutor
9
  from typing import List, Tuple
10
  import math
11
 
 
 
 
 
 
 
 
 
 
 
 
12
  def get_audio_length(audio_file):
13
  audio = AudioSegment.from_file(audio_file)
14
  return len(audio) / 1000
@@ -88,71 +99,70 @@ def smart_text_split(text, words_per_line, lines_per_segment):
88
 
89
  return segments
90
 
91
- async def process_segment(segment: str, idx: int, voice: str, rate: str, pitch: str) -> Tuple[str, AudioSegment, int]:
92
- """Process a single segment concurrently"""
93
  audio_file = f"temp_segment_{idx}_{uuid.uuid4()}.wav"
94
  try:
95
  tts = edge_tts.Communicate(segment, voice, rate=rate, pitch=pitch)
96
  await tts.save(audio_file)
97
 
98
  segment_audio = AudioSegment.from_file(audio_file)
99
- # Add small silence at the end of each segment
100
- segment_audio = segment_audio + AudioSegment.silent(duration=250)
101
  segment_duration = len(segment_audio)
102
 
103
- return "", segment_audio, segment_duration
 
 
 
 
 
 
 
 
 
 
104
  finally:
105
  if os.path.exists(audio_file):
106
  os.remove(audio_file)
107
 
108
- async def process_chunk_parallel(chunks: List[str], start_idx: int, voice: str, rate: str, pitch: str) -> Tuple[str, AudioSegment]:
109
- """Process a chunk of segments in parallel"""
110
- tasks = [
111
- process_segment(segment, i + start_idx, voice, rate, pitch)
112
- for i, segment in enumerate(chunks, 1)
113
- ]
114
-
115
- results = await asyncio.gather(*tasks)
116
-
117
  combined_audio = AudioSegment.empty()
118
  srt_content = ""
119
- current_time = 0
120
 
121
- for idx, (_, audio_part, duration) in enumerate(results, start_idx):
122
- # Calculate start and end times
123
- start_time = current_time
124
- end_time = start_time + duration
125
-
126
- # Format SRT entry
127
- srt_content += f"{idx}\n"
128
- srt_content += f"{format_time_ms(start_time)} --> {format_time_ms(end_time)}\n"
129
- srt_content += chunks[idx - start_idx] + "\n\n"
130
-
131
  combined_audio += audio_part
132
- # Add the duration plus a small gap
133
- current_time = end_time + 100 # 100ms gap between segments
134
 
135
  return srt_content, combined_audio
136
 
137
  async def generate_accurate_srt(text, voice, rate, pitch, words_per_line, lines_per_segment):
138
  segments = smart_text_split(text, words_per_line, lines_per_segment)
 
139
 
140
- # Process smaller chunks for better timing control
141
- chunk_size = 5 # Reduced from 10 to 5 for better timing control
142
  chunks = [segments[i:i + chunk_size] for i in range(0, len(segments), chunk_size)]
143
 
144
  final_srt = ""
145
  final_audio = AudioSegment.empty()
146
-
147
- # Process chunks in sequence for better timing accuracy
148
  current_index = 1
149
- for chunk in chunks:
150
- srt_content, audio_content = await process_chunk_parallel(
151
- chunk, current_index, voice, rate, pitch
152
- )
 
 
 
 
 
 
 
 
 
153
  final_srt += srt_content
154
  final_audio += audio_content
155
- current_index += len(chunk)
156
 
157
  # Export final files
158
  unique_id = uuid.uuid4()
 
9
  from typing import List, Tuple
10
  import math
11
 
12
+ class TimingManager:
13
+ def __init__(self):
14
+ self.current_time = 0
15
+ self.segment_gap = 100 # ms gap between segments
16
+
17
+ def get_timing(self, duration):
18
+ start_time = self.current_time
19
+ end_time = start_time + duration
20
+ self.current_time = end_time + self.segment_gap
21
+ return start_time, end_time
22
+
23
  def get_audio_length(audio_file):
24
  audio = AudioSegment.from_file(audio_file)
25
  return len(audio) / 1000
 
99
 
100
  return segments
101
 
102
+ async def process_segment(segment: str, idx: int, voice: str, rate: str, pitch: str, timing_mgr: TimingManager) -> Tuple[str, AudioSegment]:
103
+ """Process a single segment with accurate timing"""
104
  audio_file = f"temp_segment_{idx}_{uuid.uuid4()}.wav"
105
  try:
106
  tts = edge_tts.Communicate(segment, voice, rate=rate, pitch=pitch)
107
  await tts.save(audio_file)
108
 
109
  segment_audio = AudioSegment.from_file(audio_file)
 
 
110
  segment_duration = len(segment_audio)
111
 
112
+ # Get timing from manager
113
+ start_time, end_time = timing_mgr.get_timing(segment_duration)
114
+
115
+ # Format SRT entry
116
+ srt_content = (
117
+ f"{idx}\n"
118
+ f"{format_time_ms(start_time)} --> {format_time_ms(end_time)}\n"
119
+ f"{segment}\n\n"
120
+ )
121
+
122
+ return srt_content, segment_audio
123
  finally:
124
  if os.path.exists(audio_file):
125
  os.remove(audio_file)
126
 
127
+ async def process_chunk_parallel(chunks: List[str], start_idx: int, voice: str, rate: str, pitch: str, timing_mgr: TimingManager) -> Tuple[str, AudioSegment]:
128
+ """Process chunks with sequential timing"""
 
 
 
 
 
 
 
129
  combined_audio = AudioSegment.empty()
130
  srt_content = ""
 
131
 
132
+ # Process segments sequentially to maintain timing
133
+ for i, segment in enumerate(chunks, start_idx):
134
+ srt_part, audio_part = await process_segment(segment, i, voice, rate, pitch, timing_mgr)
135
+ srt_content += srt_part
 
 
 
 
 
 
136
  combined_audio += audio_part
 
 
137
 
138
  return srt_content, combined_audio
139
 
140
  async def generate_accurate_srt(text, voice, rate, pitch, words_per_line, lines_per_segment):
141
  segments = smart_text_split(text, words_per_line, lines_per_segment)
142
+ timing_mgr = TimingManager()
143
 
144
+ # Process in smaller chunks
145
+ chunk_size = 5
146
  chunks = [segments[i:i + chunk_size] for i in range(0, len(segments), chunk_size)]
147
 
148
  final_srt = ""
149
  final_audio = AudioSegment.empty()
 
 
150
  current_index = 1
151
+
152
+ # Process chunks in parallel but maintain sequential timing
153
+ chunk_tasks = []
154
+ for i, chunk in enumerate(chunks):
155
+ start_idx = current_index + (i * chunk_size)
156
+ task = process_chunk_parallel(chunk, start_idx, voice, rate, pitch, timing_mgr)
157
+ chunk_tasks.append(task)
158
+
159
+ # Gather results in order
160
+ chunk_results = await asyncio.gather(*chunk_tasks)
161
+
162
+ # Combine results
163
+ for srt_content, audio_content in chunk_results:
164
  final_srt += srt_content
165
  final_audio += audio_content
 
166
 
167
  # Export final files
168
  unique_id = uuid.uuid4()