hivecorp commited on
Commit
266d5cd
·
verified ·
1 Parent(s): 4902504

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -25
app.py CHANGED
@@ -39,6 +39,7 @@ class Segment:
39
  end_time: int = 0
40
  duration: int = 0
41
  audio: Optional[AudioSegment] = None
 
42
 
43
  class TextProcessor:
44
  def __init__(self, words_per_line: int, lines_per_segment: int):
@@ -182,13 +183,18 @@ class TextProcessor:
182
  return lines
183
 
184
  async def process_segment_with_timing(segment: Segment, voice: str, rate: str, pitch: str) -> Segment:
185
- """Process a single segment and calculate its timing"""
186
  audio_file = f"temp_segment_{segment.id}_{uuid.uuid4()}.wav"
187
  try:
188
- tts = edge_tts.Communicate(segment.text, voice, rate=rate, pitch=pitch)
 
 
189
  await tts.save(audio_file)
190
 
191
  segment.audio = AudioSegment.from_file(audio_file)
 
 
 
192
  segment.duration = len(segment.audio)
193
 
194
  return segment
@@ -197,46 +203,50 @@ async def process_segment_with_timing(segment: Segment, voice: str, rate: str, p
197
  os.remove(audio_file)
198
 
199
  async def generate_accurate_srt(text: str, voice: str, rate: str, pitch: str, words_per_line: int, lines_per_segment: int) -> Tuple[str, str]:
200
- # Initialize text processor and split text
201
  processor = TextProcessor(words_per_line, lines_per_segment)
202
  segments = processor.split_into_segments(text)
203
 
204
- # Process all segments in parallel
205
- tasks = [
206
- process_segment_with_timing(segment, voice, rate, pitch)
207
- for segment in segments
208
- ]
209
- processed_segments = await asyncio.gather(*tasks)
210
-
211
- # Calculate timing for each segment
212
  current_time = 0
213
  final_audio = AudioSegment.empty()
214
  srt_content = ""
215
 
216
- for segment in processed_segments:
217
- # Set segment timing
218
- segment.start_time = current_time
219
- segment.end_time = current_time + segment.duration
 
 
 
220
 
221
- # Add to SRT content
222
  srt_content += (
223
- f"{segment.id}\n"
224
- f"{format_time_ms(segment.start_time)} --> {format_time_ms(segment.end_time)}\n"
225
- f"{segment.text}\n\n"
226
  )
227
 
228
- # Add to final audio
229
- final_audio += segment.audio
230
 
231
- # Update timing
232
- current_time = segment.end_time + 100 # 100ms gap between segments
 
233
 
234
- # Export files
235
  unique_id = uuid.uuid4()
236
  audio_path = f"final_audio_{unique_id}.mp3"
237
  srt_path = f"final_subtitles_{unique_id}.srt"
238
 
239
- final_audio.export(audio_path, format="mp3", bitrate="320k")
 
 
 
 
 
 
 
240
  with open(srt_path, "w", encoding='utf-8') as f:
241
  f.write(srt_content)
242
 
 
39
  end_time: int = 0
40
  duration: int = 0
41
  audio: Optional[AudioSegment] = None
42
+ lines: List[str] = None # Add lines field for display purposes only
43
 
44
  class TextProcessor:
45
  def __init__(self, words_per_line: int, lines_per_segment: int):
 
183
  return lines
184
 
185
  async def process_segment_with_timing(segment: Segment, voice: str, rate: str, pitch: str) -> Segment:
186
+ """Process a complete segment as a single TTS unit"""
187
  audio_file = f"temp_segment_{segment.id}_{uuid.uuid4()}.wav"
188
  try:
189
+ # Process the entire segment text as one unit, replacing newlines with spaces
190
+ segment_text = ' '.join(segment.text.split('\n'))
191
+ tts = edge_tts.Communicate(segment_text, voice, rate=rate, pitch=pitch)
192
  await tts.save(audio_file)
193
 
194
  segment.audio = AudioSegment.from_file(audio_file)
195
+ # Add small silence at start and end for natural spacing
196
+ silence = AudioSegment.silent(duration=50)
197
+ segment.audio = silence + segment.audio + silence
198
  segment.duration = len(segment.audio)
199
 
200
  return segment
 
203
  os.remove(audio_file)
204
 
205
  async def generate_accurate_srt(text: str, voice: str, rate: str, pitch: str, words_per_line: int, lines_per_segment: int) -> Tuple[str, str]:
 
206
  processor = TextProcessor(words_per_line, lines_per_segment)
207
  segments = processor.split_into_segments(text)
208
 
209
+ # Process segments sequentially for better timing control
210
+ processed_segments = []
 
 
 
 
 
 
211
  current_time = 0
212
  final_audio = AudioSegment.empty()
213
  srt_content = ""
214
 
215
+ for segment in segments:
216
+ # Process segment
217
+ processed_segment = await process_segment_with_timing(segment, voice, rate, pitch)
218
+
219
+ # Calculate precise timing
220
+ processed_segment.start_time = current_time
221
+ processed_segment.end_time = current_time + processed_segment.duration
222
 
223
+ # Add to SRT with precise timing
224
  srt_content += (
225
+ f"{processed_segment.id}\n"
226
+ f"{format_time_ms(processed_segment.start_time)} --> {format_time_ms(processed_segment.end_time)}\n"
227
+ f"{processed_segment.text}\n\n"
228
  )
229
 
230
+ # Add to final audio with precise positioning
231
+ final_audio = final_audio.append(processed_segment.audio, crossfade=0)
232
 
233
+ # Update timing with precise gap
234
+ current_time = processed_segment.end_time
235
+ processed_segments.append(processed_segment)
236
 
237
+ # Export with high precision
238
  unique_id = uuid.uuid4()
239
  audio_path = f"final_audio_{unique_id}.mp3"
240
  srt_path = f"final_subtitles_{unique_id}.srt"
241
 
242
+ # Export with high quality settings for precise timing
243
+ final_audio.export(
244
+ audio_path,
245
+ format="mp3",
246
+ bitrate="320k",
247
+ parameters=["-ar", "48000", "-ac", "2"]
248
+ )
249
+
250
  with open(srt_path, "w", encoding='utf-8') as f:
251
  f.write(srt_content)
252