Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -13,7 +13,6 @@ import hashlib
|
|
13 |
import json
|
14 |
from pathlib import Path
|
15 |
from tqdm.asyncio import tqdm
|
16 |
-
import ssml.builder as ssml
|
17 |
|
18 |
class TimingManager:
|
19 |
def __init__(self):
|
@@ -206,17 +205,62 @@ class AudioCache:
|
|
206 |
cache_file = self.cache_dir / f"{cache_key}.wav"
|
207 |
audio.export(str(cache_file), format="wav")
|
208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
class SpeechEnhancer:
|
210 |
@staticmethod
|
211 |
def add_speech_marks(text: str) -> str:
|
212 |
"""Add SSML marks for better speech control"""
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
|
221 |
@staticmethod
|
222 |
def enhance_timing(segment: Segment) -> Segment:
|
|
|
13 |
import json
|
14 |
from pathlib import Path
|
15 |
from tqdm.asyncio import tqdm
|
|
|
16 |
|
17 |
class TimingManager:
|
18 |
def __init__(self):
|
|
|
205 |
cache_file = self.cache_dir / f"{cache_key}.wav"
|
206 |
audio.export(str(cache_file), format="wav")
|
207 |
|
208 |
+
class SSMLBuilder:
|
209 |
+
def __init__(self):
|
210 |
+
self.content = []
|
211 |
+
|
212 |
+
def add_text(self, text: str):
|
213 |
+
self.content.append(text)
|
214 |
+
return self
|
215 |
+
|
216 |
+
def add_break(self, strength: str = "medium"):
|
217 |
+
self.content.append(f'<break strength="{strength}"/>')
|
218 |
+
return self
|
219 |
+
|
220 |
+
def add_prosody(self, text: str, rate: str = "medium", pitch: str = "medium"):
|
221 |
+
self.content.append(
|
222 |
+
f'<prosody rate="{rate}" pitch="{pitch}">{text}</prosody>'
|
223 |
+
)
|
224 |
+
return self
|
225 |
+
|
226 |
+
def add_sentence(self, text: str):
|
227 |
+
self.content.append(f'<s>{text}</s>')
|
228 |
+
return self
|
229 |
+
|
230 |
+
def __str__(self):
|
231 |
+
return (
|
232 |
+
'<?xml version="1.0"?>'
|
233 |
+
'<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis">'
|
234 |
+
f'{"".join(self.content)}'
|
235 |
+
'</speak>'
|
236 |
+
)
|
237 |
+
|
238 |
class SpeechEnhancer:
|
239 |
@staticmethod
|
240 |
def add_speech_marks(text: str) -> str:
|
241 |
"""Add SSML marks for better speech control"""
|
242 |
+
ssml = SSMLBuilder()
|
243 |
+
|
244 |
+
# Split text and add appropriate SSML tags
|
245 |
+
sentences = text.split('. ')
|
246 |
+
for i, sentence in enumerate(sentences):
|
247 |
+
sentence = sentence.strip()
|
248 |
+
if not sentence:
|
249 |
+
continue
|
250 |
+
|
251 |
+
ssml.add_sentence(sentence)
|
252 |
+
|
253 |
+
# Add appropriate breaks between sentences
|
254 |
+
if i < len(sentences) - 1:
|
255 |
+
ssml.add_break("strong")
|
256 |
+
|
257 |
+
# Add breaks at commas
|
258 |
+
if ',' in sentence:
|
259 |
+
parts = sentence.split(',')
|
260 |
+
for part in parts[:-1]:
|
261 |
+
ssml.add_break("medium")
|
262 |
+
|
263 |
+
return str(ssml)
|
264 |
|
265 |
@staticmethod
|
266 |
def enhance_timing(segment: Segment) -> Segment:
|