hivecorp commited on
Commit
4b97382
·
verified ·
1 Parent(s): f1e232e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -51
app.py CHANGED
@@ -20,26 +20,33 @@ def format_time(seconds):
20
  secs = seconds % 60
21
  return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
22
 
23
- # Function to split text into segments by punctuation or limit to 7-8 words
24
  def split_text_into_segments(text):
25
  segments = []
26
- raw_segments = re.split(r'([.!?])', text)
27
- for i in range(0, len(raw_segments) - 1, 2):
28
- sentence = raw_segments[i].strip() + raw_segments[i + 1]
29
  words = sentence.split()
30
-
 
31
  if len(words) > 8:
32
- for j in range(0, len(words), 8):
33
- segments.append(" ".join(words[j:j + 8]))
 
 
 
 
 
 
 
34
  else:
35
  segments.append(sentence.strip())
36
-
37
- if len(raw_segments) % 2 == 1:
38
- remaining_text = raw_segments[-1].strip()
39
- words = remaining_text.split()
40
- for j in range(0, len(words), 8):
41
- segments.append(" ".join(words[j:j + 8]))
42
-
43
  return segments
44
 
45
  # Function to generate SRT with accurate timing per batch
@@ -127,43 +134,8 @@ async def process_script(script_text, pitch, rate, voice):
127
  voice_options = {
128
  "Andrew Male": "en-US-AndrewNeural",
129
  "Jenny Female": "en-US-JennyNeural",
130
- "Guy Male": "en-US-GuyNeural",
131
- "Ana Female": "en-US-AnaNeural",
132
- "Aria Female": "en-US-AriaNeural",
133
- "Brian Male": "en-US-BrianNeural",
134
- "Christopher Male": "en-US-ChristopherNeural",
135
- "Eric Male": "en-US-EricNeural",
136
- "Michelle Male": "en-US-MichelleNeural",
137
- "Roger Male": "en-US-RogerNeural",
138
- "Natasha Female": "en-AU-NatashaNeural",
139
- "William Male": "en-AU-WilliamNeural",
140
- "Clara Female": "en-CA-ClaraNeural",
141
- "Liam Female ": "en-CA-LiamNeural",
142
- "Libby Female": "en-GB-LibbyNeural",
143
- "Maisie": "en-GB-MaisieNeural",
144
- "Ryan": "en-GB-RyanNeural",
145
- "Sonia": "en-GB-SoniaNeural",
146
- "Thomas": "en-GB-ThomasNeural",
147
- "Sam": "en-HK-SamNeural",
148
- "Yan": "en-HK-YanNeural",
149
- "Connor": "en-IE-ConnorNeural",
150
- "Emily": "en-IE-EmilyNeural",
151
- "Neerja": "en-IN-NeerjaNeural",
152
- "Prabhat": "en-IN-PrabhatNeural",
153
- "Asilia": "en-KE-AsiliaNeural",
154
- "Chilemba": "en-KE-ChilembaNeural",
155
- "Abeo": "en-NG-AbeoNeural",
156
- "Ezinne": "en-NG-EzinneNeural",
157
- "Mitchell": "en-NZ-MitchellNeural",
158
- "James": "en-PH-JamesNeural",
159
- "Rosa": "en-PH-RosaNeural",
160
- "Luna": "en-SG-LunaNeural",
161
- "Wayne": "en-SG-WayneNeural",
162
- "Elimu": "en-TZ-ElimuNeural",
163
- "Imani": "en-TZ-ImaniNeural",
164
- "Leah": "en-ZA-LeahNeural",
165
- "Luke": "en-ZA-LukeNeural"
166
- } # All voice options
167
 
168
  app = gr.Interface(
169
  fn=process_script,
 
20
  secs = seconds % 60
21
  return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
22
 
23
+ # Function to split text into segments without cutting words awkwardly
24
  def split_text_into_segments(text):
25
  segments = []
26
+ sentences = re.split(r'([.!?])', text)
27
+ for i in range(0, len(sentences) - 1, 2):
28
+ sentence = sentences[i].strip() + sentences[i + 1]
29
  words = sentence.split()
30
+
31
+ # Ensure full phrases by keeping each segment between 7 to 8 words
32
  if len(words) > 8:
33
+ segment = ""
34
+ for word in words:
35
+ if len(segment.split()) < 8:
36
+ segment += " " + word
37
+ else:
38
+ segments.append(segment.strip())
39
+ segment = word
40
+ if segment:
41
+ segments.append(segment.strip())
42
  else:
43
  segments.append(sentence.strip())
44
+
45
+ # Handle any leftover sentence fragment
46
+ if len(sentences) % 2 == 1:
47
+ remaining_text = sentences[-1].strip()
48
+ segments.append(remaining_text)
49
+
 
50
  return segments
51
 
52
  # Function to generate SRT with accurate timing per batch
 
134
  voice_options = {
135
  "Andrew Male": "en-US-AndrewNeural",
136
  "Jenny Female": "en-US-JennyNeural",
137
+ # Add other voices here...
138
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  app = gr.Interface(
141
  fn=process_script,