Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -13,6 +13,8 @@ import hashlib
|
|
13 |
import json
|
14 |
from pathlib import Path
|
15 |
from tqdm.asyncio import tqdm
|
|
|
|
|
16 |
|
17 |
class TimingManager:
|
18 |
def __init__(self):
|
@@ -45,6 +47,20 @@ class Segment:
|
|
45 |
audio: Optional[AudioSegment] = None
|
46 |
lines: List[str] = None # Add lines field for display purposes only
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
class TextProcessor:
|
49 |
def __init__(self, words_per_line: int, lines_per_segment: int):
|
50 |
self.words_per_line = words_per_line
|
@@ -108,6 +124,9 @@ class TextProcessor:
|
|
108 |
return breaks
|
109 |
|
110 |
def split_into_segments(self, text: str) -> List[Segment]:
|
|
|
|
|
|
|
111 |
# Normalize text and add proper spacing around punctuation
|
112 |
text = re.sub(r'\s+', ' ', text.strip())
|
113 |
text = re.sub(r'([.!?,;:])\s*', r'\1 ', text)
|
@@ -210,6 +229,8 @@ class SSMLBuilder:
|
|
210 |
self.content = []
|
211 |
|
212 |
def add_text(self, text: str):
|
|
|
|
|
213 |
self.content.append(text)
|
214 |
return self
|
215 |
|
@@ -274,16 +295,19 @@ class SpeechEnhancer:
|
|
274 |
|
275 |
async def process_segment_with_timing(segment: Segment, voice: str, rate: str, pitch: str, cache: AudioCache) -> Segment:
|
276 |
"""Process segment with enhanced speech features"""
|
277 |
-
cache_key = cache.get_cache_key(segment.text, voice, rate, pitch)
|
278 |
-
cached_audio = cache.get_cached_audio(cache_key)
|
279 |
-
|
280 |
-
if cached_audio:
|
281 |
-
segment.audio = cached_audio
|
282 |
-
segment.duration = len(cached_audio)
|
283 |
-
return segment
|
284 |
-
|
285 |
try:
|
286 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
tts = edge_tts.Communicate(enhanced_text, voice, rate=rate, pitch=pitch)
|
288 |
|
289 |
audio_file = f"temp_segment_{segment.id}_{uuid.uuid4()}.wav"
|
@@ -303,6 +327,9 @@ async def process_segment_with_timing(segment: Segment, voice: str, rate: str, p
|
|
303 |
os.remove(audio_file)
|
304 |
|
305 |
async def generate_accurate_srt(text: str, voice: str, rate: str, pitch: str, words_per_line: int, lines_per_segment: int, enable_ssml: bool, use_cache: bool, pause_after_period: int, pause_after_comma: int) -> Tuple[str, str]:
|
|
|
|
|
|
|
306 |
processor = TextProcessor(words_per_line, lines_per_segment)
|
307 |
segments = processor.split_into_segments(text)
|
308 |
|
|
|
13 |
import json
|
14 |
from pathlib import Path
|
15 |
from tqdm.asyncio import tqdm
|
16 |
+
from html import unescape
|
17 |
+
import html
|
18 |
|
19 |
class TimingManager:
|
20 |
def __init__(self):
|
|
|
47 |
audio: Optional[AudioSegment] = None
|
48 |
lines: List[str] = None # Add lines field for display purposes only
|
49 |
|
50 |
+
class TextCleaner:
|
51 |
+
@staticmethod
|
52 |
+
def clean_text(text: str) -> str:
|
53 |
+
"""Clean text from HTML and normalize for TTS"""
|
54 |
+
# Remove HTML tags
|
55 |
+
text = re.sub(r'<[^>]+>', '', text)
|
56 |
+
# Convert HTML entities
|
57 |
+
text = unescape(text)
|
58 |
+
# Normalize whitespace
|
59 |
+
text = ' '.join(text.split())
|
60 |
+
# Fix common punctuation issues
|
61 |
+
text = re.sub(r'\s+([.,!?;:])', r'\1', text)
|
62 |
+
return text
|
63 |
+
|
64 |
class TextProcessor:
|
65 |
def __init__(self, words_per_line: int, lines_per_segment: int):
|
66 |
self.words_per_line = words_per_line
|
|
|
124 |
return breaks
|
125 |
|
126 |
def split_into_segments(self, text: str) -> List[Segment]:
|
127 |
+
# Clean text before processing
|
128 |
+
text = TextCleaner.clean_text(text)
|
129 |
+
|
130 |
# Normalize text and add proper spacing around punctuation
|
131 |
text = re.sub(r'\s+', ' ', text.strip())
|
132 |
text = re.sub(r'([.!?,;:])\s*', r'\1 ', text)
|
|
|
229 |
self.content = []
|
230 |
|
231 |
def add_text(self, text: str):
|
232 |
+
# Escape special characters for SSML
|
233 |
+
text = html.escape(text, quote=True)
|
234 |
self.content.append(text)
|
235 |
return self
|
236 |
|
|
|
295 |
|
296 |
async def process_segment_with_timing(segment: Segment, voice: str, rate: str, pitch: str, cache: AudioCache) -> Segment:
|
297 |
"""Process segment with enhanced speech features"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
try:
|
299 |
+
# Clean text before processing
|
300 |
+
clean_text = TextCleaner.clean_text(segment.text)
|
301 |
+
cache_key = cache.get_cache_key(clean_text, voice, rate, pitch)
|
302 |
+
cached_audio = cache.get_cached_audio(cache_key)
|
303 |
+
|
304 |
+
if cached_audio:
|
305 |
+
segment.audio = cached_audio
|
306 |
+
segment.duration = len(cached_audio)
|
307 |
+
return segment
|
308 |
+
|
309 |
+
# Create SSML with cleaned text
|
310 |
+
enhanced_text = SpeechEnhancer.add_speech_marks(clean_text)
|
311 |
tts = edge_tts.Communicate(enhanced_text, voice, rate=rate, pitch=pitch)
|
312 |
|
313 |
audio_file = f"temp_segment_{segment.id}_{uuid.uuid4()}.wav"
|
|
|
327 |
os.remove(audio_file)
|
328 |
|
329 |
async def generate_accurate_srt(text: str, voice: str, rate: str, pitch: str, words_per_line: int, lines_per_segment: int, enable_ssml: bool, use_cache: bool, pause_after_period: int, pause_after_comma: int) -> Tuple[str, str]:
|
330 |
+
# Clean input text first
|
331 |
+
text = TextCleaner.clean_text(text)
|
332 |
+
|
333 |
processor = TextProcessor(words_per_line, lines_per_segment)
|
334 |
segments = processor.split_into_segments(text)
|
335 |
|