hivecorp commited on
Commit
e5d758b
·
verified ·
1 Parent(s): 85ad137

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -46
app.py CHANGED
@@ -20,23 +20,39 @@ def format_time_ms(milliseconds):
20
  return f"{hrs:02}:{mins:02}:{secs:02},{ms:03}"
21
 
22
  def smart_text_split(text, words_per_line, lines_per_segment):
23
- # First split by major punctuation with more granular control
 
 
 
 
24
  sentences = []
25
  current = ""
26
 
27
- # Split text into meaningful chunks using various punctuation marks
28
- for char in text:
29
- current += char
30
- if char in '.!?': # Strong break
31
- sentences.append(current.strip())
32
- current = ""
33
- elif char in ',;:': # Soft break - only break if it makes a meaningful chunk
34
- if len(current.split()) >= words_per_line:
35
- sentences.append(current.strip())
36
- current = ""
37
-
38
- if current:
39
- sentences.append(current.strip())
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  # Process sentences into lines and segments
42
  segments = []
@@ -45,38 +61,28 @@ def smart_text_split(text, words_per_line, lines_per_segment):
45
 
46
  for sentence in sentences:
47
  words = sentence.strip().split()
48
- i = 0
49
 
50
- while i < len(words):
51
- # Look ahead to find the best breaking point
52
- look_ahead = min(words_per_line, len(words) - i)
53
- best_break = look_ahead
54
 
55
- # Find natural breaking points
56
- for j in range(look_ahead - 1, 0, -1):
57
- if any(words[i + j - 1].endswith(p) for p in ',;:.!?') or \
58
- any(words[i + j].startswith(p) for p in '([{'):
59
- best_break = j
60
  break
61
 
62
- # Add words to current line
63
- current_line.extend(words[i:i + best_break])
64
 
65
- if len(current_line) >= words_per_line or i + best_break >= len(words):
66
- # Add line to current segment
67
- current_segment.append(' '.join(current_line))
68
- current_line = []
69
-
70
- # Check if segment is complete
71
- if len(current_segment) >= lines_per_segment:
72
- segments.append('\n'.join(current_segment))
73
- current_segment = []
74
 
75
- i += best_break
 
 
76
 
77
  # Handle remaining content
78
- if current_line:
79
- current_segment.append(' '.join(current_line))
80
  if current_segment:
81
  segments.append('\n'.join(current_segment))
82
 
@@ -160,9 +166,15 @@ async def generate_accurate_srt(text, voice, rate, pitch, words_per_line, lines_
160
  return srt_path, audio_path
161
 
162
  async def process_text(text, pitch, rate, voice, words_per_line, lines_per_segment):
163
- pitch_str = f"{pitch}Hz" if pitch != 0 else "0Hz"
164
- # Fix: Always include + sign for zero and positive rates
165
- rate_str = f"+{rate}%" if rate >= 0 else f"{rate}%"
 
 
 
 
 
 
166
 
167
  srt_path, audio_path = await generate_accurate_srt(
168
  text,
@@ -223,11 +235,11 @@ app = gr.Interface(
223
  fn=process_text,
224
  inputs=[
225
  gr.Textbox(label="Enter Text", lines=10),
226
- gr.Slider(label="Pitch Adjustment (Hz)", minimum=-20, maximum=20, value=0, step=1),
227
- gr.Slider(label="Rate Adjustment (%)", minimum=-50, maximum=50, value=0, step=1),
228
  gr.Dropdown(label="Select Voice", choices=list(voice_options.keys()), value="Jenny Female"),
229
- gr.Slider(label="Words per Line", minimum=1, maximum=15, value=8, step=1),
230
- gr.Slider(label="Lines per Segment", minimum=1, maximum=5, value=2, step=1)
231
  ],
232
  outputs=[
233
  gr.File(label="Download SRT"),
@@ -235,7 +247,7 @@ app = gr.Interface(
235
  gr.Audio(label="Preview Audio")
236
  ],
237
  title="Advanced TTS with Configurable SRT Generation",
238
- description="Generate perfectly synchronized audio and subtitles with custom segmentation control."
239
  )
240
 
241
  app.launch()
 
20
  return f"{hrs:02}:{mins:02}:{secs:02},{ms:03}"
21
 
22
  def smart_text_split(text, words_per_line, lines_per_segment):
23
+ # Define natural break patterns
24
+ end_sentence = r'[.!?]+'
25
+ mid_sentence = r'[,;:]+'
26
+
27
+ # First split by major punctuation
28
  sentences = []
29
  current = ""
30
 
31
+ # Clean the text and ensure proper spacing after punctuation
32
+ text = re.sub(r'([.!?,;:])\s*', r'\1 ', text).strip()
33
+
34
+ # Split into initial chunks by strong punctuation
35
+ chunks = re.split(f'({end_sentence})', text)
36
+ temp_sentences = []
37
+
38
+ for i in range(0, len(chunks)-1, 2):
39
+ if i+1 < len(chunks):
40
+ temp_sentences.append(chunks[i] + chunks[i+1])
41
+ else:
42
+ temp_sentences.append(chunks[i])
43
+
44
+ # Further process each sentence
45
+ for sentence in temp_sentences:
46
+ # Split by mid-sentence punctuation if sentence is too long
47
+ if len(sentence.split()) > words_per_line * 2:
48
+ sub_chunks = re.split(f'({mid_sentence})', sentence)
49
+ for i in range(0, len(sub_chunks)-1, 2):
50
+ if i+1 < len(sub_chunks):
51
+ sentences.append(sub_chunks[i] + sub_chunks[i+1])
52
+ else:
53
+ sentences.append(sub_chunks[i])
54
+ else:
55
+ sentences.append(sentence)
56
 
57
  # Process sentences into lines and segments
58
  segments = []
 
61
 
62
  for sentence in sentences:
63
  words = sentence.strip().split()
 
64
 
65
+ while words:
66
+ # Determine natural break point
67
+ break_point = min(words_per_line, len(words))
 
68
 
69
+ # Look for natural breaks
70
+ for i in range(break_point-1, 0, -1):
71
+ if any(words[i-1].endswith(p) for p in '.!?,;:') or \
72
+ any(words[i].startswith(p) for p in '([{'):
73
+ break_point = i
74
  break
75
 
76
+ current_line = words[:break_point]
77
+ words = words[break_point:]
78
 
79
+ current_segment.append(' '.join(current_line))
 
 
 
 
 
 
 
 
80
 
81
+ if len(current_segment) >= lines_per_segment:
82
+ segments.append('\n'.join(current_segment))
83
+ current_segment = []
84
 
85
  # Handle remaining content
 
 
86
  if current_segment:
87
  segments.append('\n'.join(current_segment))
88
 
 
166
  return srt_path, audio_path
167
 
168
  async def process_text(text, pitch, rate, voice, words_per_line, lines_per_segment):
169
+ # Set default pitch and rate strings that work well
170
+ pitch_str = "+0Hz" # neutral pitch
171
+ rate_str = "+0%" # neutral rate
172
+
173
+ # Only modify if user has changed values
174
+ if pitch != 0:
175
+ pitch_str = f"{pitch:+d}Hz"
176
+ if rate != 0:
177
+ rate_str = f"{rate:+d}%"
178
 
179
  srt_path, audio_path = await generate_accurate_srt(
180
  text,
 
235
  fn=process_text,
236
  inputs=[
237
  gr.Textbox(label="Enter Text", lines=10),
238
+ gr.Slider(label="Pitch Adjustment (Hz)", minimum=-10, maximum=10, value=0, step=1),
239
+ gr.Slider(label="Rate Adjustment (%)", minimum=-25, maximum=25, value=0, step=1),
240
  gr.Dropdown(label="Select Voice", choices=list(voice_options.keys()), value="Jenny Female"),
241
+ gr.Slider(label="Words per Line", minimum=3, maximum=12, value=6, step=1),
242
+ gr.Slider(label="Lines per Segment", minimum=1, maximum=4, value=2, step=1)
243
  ],
244
  outputs=[
245
  gr.File(label="Download SRT"),
 
247
  gr.Audio(label="Preview Audio")
248
  ],
249
  title="Advanced TTS with Configurable SRT Generation",
250
+ description="Generate perfectly synchronized audio and subtitles with natural speech patterns."
251
  )
252
 
253
  app.launch()