insta-maker-2

Sleeping

App Files Files Community

hivecorp commited on Mar 19

Commit

3851ab4

verified ·

1 Parent(s): 2ff3127

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -9

app.py CHANGED Viewed

@@ -13,6 +13,8 @@ import hashlib
 import json
 from pathlib import Path
 from tqdm.asyncio import tqdm
 class TimingManager:
     def __init__(self):
@@ -45,6 +47,20 @@ class Segment:
     audio: Optional[AudioSegment] = None
     lines: List[str] = None  # Add lines field for display purposes only
 class TextProcessor:
     def __init__(self, words_per_line: int, lines_per_segment: int):
         self.words_per_line = words_per_line
@@ -108,6 +124,9 @@ class TextProcessor:
         return breaks
     def split_into_segments(self, text: str) -> List[Segment]:
         # Normalize text and add proper spacing around punctuation
         text = re.sub(r'\s+', ' ', text.strip())
         text = re.sub(r'([.!?,;:])\s*', r'\1 ', text)
@@ -210,6 +229,8 @@ class SSMLBuilder:
         self.content = []
     def add_text(self, text: str):
         self.content.append(text)
         return self
@@ -274,16 +295,19 @@ class SpeechEnhancer:
 async def process_segment_with_timing(segment: Segment, voice: str, rate: str, pitch: str, cache: AudioCache) -> Segment:
     """Process segment with enhanced speech features"""
-    cache_key = cache.get_cache_key(segment.text, voice, rate, pitch)
-    cached_audio = cache.get_cached_audio(cache_key)
-    if cached_audio:
-        segment.audio = cached_audio
-        segment.duration = len(cached_audio)
-        return segment
     try:
-        enhanced_text = SpeechEnhancer.add_speech_marks(segment.text)
         tts = edge_tts.Communicate(enhanced_text, voice, rate=rate, pitch=pitch)
         audio_file = f"temp_segment_{segment.id}_{uuid.uuid4()}.wav"
@@ -303,6 +327,9 @@ async def process_segment_with_timing(segment: Segment, voice: str, rate: str, p
             os.remove(audio_file)
 async def generate_accurate_srt(text: str, voice: str, rate: str, pitch: str, words_per_line: int, lines_per_segment: int, enable_ssml: bool, use_cache: bool, pause_after_period: int, pause_after_comma: int) -> Tuple[str, str]:
     processor = TextProcessor(words_per_line, lines_per_segment)
     segments = processor.split_into_segments(text)

 import json
 from pathlib import Path
 from tqdm.asyncio import tqdm
+from html import unescape
+import html
 class TimingManager:
     def __init__(self):
     audio: Optional[AudioSegment] = None
     lines: List[str] = None  # Add lines field for display purposes only
+class TextCleaner:
+    @staticmethod
+    def clean_text(text: str) -> str:
+        """Clean text from HTML and normalize for TTS"""
+        # Remove HTML tags
+        text = re.sub(r'<[^>]+>', '', text)
+        # Convert HTML entities
+        text = unescape(text)
+        # Normalize whitespace
+        text = ' '.join(text.split())
+        # Fix common punctuation issues
+        text = re.sub(r'\s+([.,!?;:])', r'\1', text)
+        return text
 class TextProcessor:
     def __init__(self, words_per_line: int, lines_per_segment: int):
         self.words_per_line = words_per_line
         return breaks
     def split_into_segments(self, text: str) -> List[Segment]:
+        # Clean text before processing
+        text = TextCleaner.clean_text(text)
         # Normalize text and add proper spacing around punctuation
         text = re.sub(r'\s+', ' ', text.strip())
         text = re.sub(r'([.!?,;:])\s*', r'\1 ', text)
         self.content = []
     def add_text(self, text: str):
+        # Escape special characters for SSML
+        text = html.escape(text, quote=True)
         self.content.append(text)
         return self
 async def process_segment_with_timing(segment: Segment, voice: str, rate: str, pitch: str, cache: AudioCache) -> Segment:
     """Process segment with enhanced speech features"""
     try:
+        # Clean text before processing
+        clean_text = TextCleaner.clean_text(segment.text)
+        cache_key = cache.get_cache_key(clean_text, voice, rate, pitch)
+        cached_audio = cache.get_cached_audio(cache_key)
+        if cached_audio:
+            segment.audio = cached_audio
+            segment.duration = len(cached_audio)
+            return segment
+        # Create SSML with cleaned text
+        enhanced_text = SpeechEnhancer.add_speech_marks(clean_text)
         tts = edge_tts.Communicate(enhanced_text, voice, rate=rate, pitch=pitch)
         audio_file = f"temp_segment_{segment.id}_{uuid.uuid4()}.wav"
             os.remove(audio_file)
 async def generate_accurate_srt(text: str, voice: str, rate: str, pitch: str, words_per_line: int, lines_per_segment: int, enable_ssml: bool, use_cache: bool, pause_after_period: int, pause_after_comma: int) -> Tuple[str, str]:
+    # Clean input text first
+    text = TextCleaner.clean_text(text)
     processor = TextProcessor(words_per_line, lines_per_segment)
     segments = processor.split_into_segments(text)