hivecorp commited on
Commit
4c25f4a
·
verified ·
1 Parent(s): 9ecdae1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -8
app.py CHANGED
@@ -13,7 +13,6 @@ import hashlib
13
  import json
14
  from pathlib import Path
15
  from tqdm.asyncio import tqdm
16
- import ssml.builder as ssml
17
 
18
  class TimingManager:
19
  def __init__(self):
@@ -206,17 +205,62 @@ class AudioCache:
206
  cache_file = self.cache_dir / f"{cache_key}.wav"
207
  audio.export(str(cache_file), format="wav")
208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  class SpeechEnhancer:
210
  @staticmethod
211
  def add_speech_marks(text: str) -> str:
212
  """Add SSML marks for better speech control"""
213
- speech = ssml.Speech()
214
- # Add prosody and breaks for natural speech
215
- speech.prosody(rate="medium", pitch="medium", volume="medium")
216
- for sentence in text.split('. '):
217
- speech.p(sentence.strip())
218
- speech.break_("medium")
219
- return str(speech)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
  @staticmethod
222
  def enhance_timing(segment: Segment) -> Segment:
 
13
  import json
14
  from pathlib import Path
15
  from tqdm.asyncio import tqdm
 
16
 
17
  class TimingManager:
18
  def __init__(self):
 
205
  cache_file = self.cache_dir / f"{cache_key}.wav"
206
  audio.export(str(cache_file), format="wav")
207
 
208
+ class SSMLBuilder:
209
+ def __init__(self):
210
+ self.content = []
211
+
212
+ def add_text(self, text: str):
213
+ self.content.append(text)
214
+ return self
215
+
216
+ def add_break(self, strength: str = "medium"):
217
+ self.content.append(f'<break strength="{strength}"/>')
218
+ return self
219
+
220
+ def add_prosody(self, text: str, rate: str = "medium", pitch: str = "medium"):
221
+ self.content.append(
222
+ f'<prosody rate="{rate}" pitch="{pitch}">{text}</prosody>'
223
+ )
224
+ return self
225
+
226
+ def add_sentence(self, text: str):
227
+ self.content.append(f'<s>{text}</s>')
228
+ return self
229
+
230
+ def __str__(self):
231
+ return (
232
+ '<?xml version="1.0"?>'
233
+ '<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis">'
234
+ f'{"".join(self.content)}'
235
+ '</speak>'
236
+ )
237
+
238
  class SpeechEnhancer:
239
  @staticmethod
240
  def add_speech_marks(text: str) -> str:
241
  """Add SSML marks for better speech control"""
242
+ ssml = SSMLBuilder()
243
+
244
+ # Split text and add appropriate SSML tags
245
+ sentences = text.split('. ')
246
+ for i, sentence in enumerate(sentences):
247
+ sentence = sentence.strip()
248
+ if not sentence:
249
+ continue
250
+
251
+ ssml.add_sentence(sentence)
252
+
253
+ # Add appropriate breaks between sentences
254
+ if i < len(sentences) - 1:
255
+ ssml.add_break("strong")
256
+
257
+ # Add breaks at commas
258
+ if ',' in sentence:
259
+ parts = sentence.split(',')
260
+ for part in parts[:-1]:
261
+ ssml.add_break("medium")
262
+
263
+ return str(ssml)
264
 
265
  @staticmethod
266
  def enhance_timing(segment: Segment) -> Segment: