hivecorp commited on
Commit
3851ab4
·
verified ·
1 Parent(s): 2ff3127

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -9
app.py CHANGED
@@ -13,6 +13,8 @@ import hashlib
13
  import json
14
  from pathlib import Path
15
  from tqdm.asyncio import tqdm
 
 
16
 
17
  class TimingManager:
18
  def __init__(self):
@@ -45,6 +47,20 @@ class Segment:
45
  audio: Optional[AudioSegment] = None
46
  lines: List[str] = None # Add lines field for display purposes only
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  class TextProcessor:
49
  def __init__(self, words_per_line: int, lines_per_segment: int):
50
  self.words_per_line = words_per_line
@@ -108,6 +124,9 @@ class TextProcessor:
108
  return breaks
109
 
110
  def split_into_segments(self, text: str) -> List[Segment]:
 
 
 
111
  # Normalize text and add proper spacing around punctuation
112
  text = re.sub(r'\s+', ' ', text.strip())
113
  text = re.sub(r'([.!?,;:])\s*', r'\1 ', text)
@@ -210,6 +229,8 @@ class SSMLBuilder:
210
  self.content = []
211
 
212
  def add_text(self, text: str):
 
 
213
  self.content.append(text)
214
  return self
215
 
@@ -274,16 +295,19 @@ class SpeechEnhancer:
274
 
275
  async def process_segment_with_timing(segment: Segment, voice: str, rate: str, pitch: str, cache: AudioCache) -> Segment:
276
  """Process segment with enhanced speech features"""
277
- cache_key = cache.get_cache_key(segment.text, voice, rate, pitch)
278
- cached_audio = cache.get_cached_audio(cache_key)
279
-
280
- if cached_audio:
281
- segment.audio = cached_audio
282
- segment.duration = len(cached_audio)
283
- return segment
284
-
285
  try:
286
- enhanced_text = SpeechEnhancer.add_speech_marks(segment.text)
 
 
 
 
 
 
 
 
 
 
 
287
  tts = edge_tts.Communicate(enhanced_text, voice, rate=rate, pitch=pitch)
288
 
289
  audio_file = f"temp_segment_{segment.id}_{uuid.uuid4()}.wav"
@@ -303,6 +327,9 @@ async def process_segment_with_timing(segment: Segment, voice: str, rate: str, p
303
  os.remove(audio_file)
304
 
305
  async def generate_accurate_srt(text: str, voice: str, rate: str, pitch: str, words_per_line: int, lines_per_segment: int, enable_ssml: bool, use_cache: bool, pause_after_period: int, pause_after_comma: int) -> Tuple[str, str]:
 
 
 
306
  processor = TextProcessor(words_per_line, lines_per_segment)
307
  segments = processor.split_into_segments(text)
308
 
 
13
  import json
14
  from pathlib import Path
15
  from tqdm.asyncio import tqdm
16
+ from html import unescape
17
+ import html
18
 
19
  class TimingManager:
20
  def __init__(self):
 
47
  audio: Optional[AudioSegment] = None
48
  lines: List[str] = None # Add lines field for display purposes only
49
 
50
+ class TextCleaner:
51
+ @staticmethod
52
+ def clean_text(text: str) -> str:
53
+ """Clean text from HTML and normalize for TTS"""
54
+ # Remove HTML tags
55
+ text = re.sub(r'<[^>]+>', '', text)
56
+ # Convert HTML entities
57
+ text = unescape(text)
58
+ # Normalize whitespace
59
+ text = ' '.join(text.split())
60
+ # Fix common punctuation issues
61
+ text = re.sub(r'\s+([.,!?;:])', r'\1', text)
62
+ return text
63
+
64
  class TextProcessor:
65
  def __init__(self, words_per_line: int, lines_per_segment: int):
66
  self.words_per_line = words_per_line
 
124
  return breaks
125
 
126
  def split_into_segments(self, text: str) -> List[Segment]:
127
+ # Clean text before processing
128
+ text = TextCleaner.clean_text(text)
129
+
130
  # Normalize text and add proper spacing around punctuation
131
  text = re.sub(r'\s+', ' ', text.strip())
132
  text = re.sub(r'([.!?,;:])\s*', r'\1 ', text)
 
229
  self.content = []
230
 
231
  def add_text(self, text: str):
232
+ # Escape special characters for SSML
233
+ text = html.escape(text, quote=True)
234
  self.content.append(text)
235
  return self
236
 
 
295
 
296
  async def process_segment_with_timing(segment: Segment, voice: str, rate: str, pitch: str, cache: AudioCache) -> Segment:
297
  """Process segment with enhanced speech features"""
 
 
 
 
 
 
 
 
298
  try:
299
+ # Clean text before processing
300
+ clean_text = TextCleaner.clean_text(segment.text)
301
+ cache_key = cache.get_cache_key(clean_text, voice, rate, pitch)
302
+ cached_audio = cache.get_cached_audio(cache_key)
303
+
304
+ if cached_audio:
305
+ segment.audio = cached_audio
306
+ segment.duration = len(cached_audio)
307
+ return segment
308
+
309
+ # Create SSML with cleaned text
310
+ enhanced_text = SpeechEnhancer.add_speech_marks(clean_text)
311
  tts = edge_tts.Communicate(enhanced_text, voice, rate=rate, pitch=pitch)
312
 
313
  audio_file = f"temp_segment_{segment.id}_{uuid.uuid4()}.wav"
 
327
  os.remove(audio_file)
328
 
329
  async def generate_accurate_srt(text: str, voice: str, rate: str, pitch: str, words_per_line: int, lines_per_segment: int, enable_ssml: bool, use_cache: bool, pause_after_period: int, pause_after_comma: int) -> Tuple[str, str]:
330
+ # Clean input text first
331
+ text = TextCleaner.clean_text(text)
332
+
333
  processor = TextProcessor(words_per_line, lines_per_segment)
334
  segments = processor.split_into_segments(text)
335