hivecorp commited on
Commit
310bb28
·
verified ·
1 Parent(s): 8ed1f45

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -18
app.py CHANGED
@@ -20,26 +20,25 @@ def format_time(seconds):
20
  secs = seconds % 60
21
  return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
22
 
23
- # Function to split text into segments by punctuation or limit to 7-8 words
24
  def split_text_into_segments(text):
25
  segments = []
26
- raw_segments = re.split(r'([.!?])', text)
 
 
 
27
  for i in range(0, len(raw_segments) - 1, 2):
28
  sentence = raw_segments[i].strip() + raw_segments[i + 1]
 
 
 
 
29
  words = sentence.split()
30
-
31
- if len(words) > 8:
32
- for j in range(0, len(words), 8):
33
- segments.append(" ".join(words[j:j + 8]))
34
- else:
35
- segments.append(sentence.strip())
36
-
37
- if len(raw_segments) % 2 == 1:
38
- remaining_text = raw_segments[-1].strip()
39
- words = remaining_text.split()
40
- for j in range(0, len(words), 8):
41
- segments.append(" ".join(words[j:j + 8]))
42
-
43
  return segments
44
 
45
  # Function to generate SRT with accurate timing per batch
@@ -63,6 +62,7 @@ async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate
63
  for index, segment in enumerate(segments):
64
  end_time = start_time + segment_duration
65
 
 
66
  if end_time > start_offset + actual_length:
67
  end_time = start_offset + actual_length
68
 
@@ -92,6 +92,7 @@ async def batch_process_srt_and_audio(script_text, pitch, rate, voice, progress=
92
  os.remove(audio_file)
93
  progress((batch_num + 1) / len(batches))
94
 
 
95
  total_audio_length = combined_audio.duration_seconds
96
  validated_srt_content = ""
97
  for line in all_srt_content.strip().splitlines():
@@ -99,8 +100,11 @@ async def batch_process_srt_and_audio(script_text, pitch, rate, voice, progress=
99
  start_str, end_str = line.split(' --> ')
100
  start_time = sum(x * float(t) for x, t in zip([3600, 60, 1, 0.001], start_str.replace(',', ':').split(':')))
101
  end_time = sum(x * float(t) for x, t in zip([3600, 60, 1, 0.001], end_str.replace(',', ':').split(':')))
 
 
102
  if end_time > total_audio_length:
103
  end_time = total_audio_length
 
104
  line = f"{format_time(start_time)} --> {format_time(end_time)}"
105
  validated_srt_content += line + "\n"
106
 
@@ -179,8 +183,7 @@ app = gr.Interface(
179
  gr.Audio(label="Audio Playback")
180
  ],
181
  title="HIVEcorp Text-to-Speech with SRT Generation",
182
- description="Convert your script into audio and generate subtitles.",
183
- theme="compact",
184
  )
185
 
186
- app.launch()
 
20
  secs = seconds % 60
21
  return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
22
 
23
+ # Updated function to split text into segments by punctuation or limit to 7-8 words without splitting words
24
  def split_text_into_segments(text):
25
  segments = []
26
+ raw_segments = re.split(r'([.!?])', text) # Split by punctuation with the delimiter preserved
27
+ combined_segments = []
28
+
29
+ # Combine text with punctuation back into full sentences
30
  for i in range(0, len(raw_segments) - 1, 2):
31
  sentence = raw_segments[i].strip() + raw_segments[i + 1]
32
+ combined_segments.append(sentence.strip())
33
+
34
+ # Further split sentences into 7-8 word segments without splitting words
35
+ for sentence in combined_segments:
36
  words = sentence.split()
37
+ while words:
38
+ segment = " ".join(words[:8]) # Take up to 8 words
39
+ segments.append(segment)
40
+ words = words[8:] # Move to the next batch of words
41
+
 
 
 
 
 
 
 
 
42
  return segments
43
 
44
  # Function to generate SRT with accurate timing per batch
 
62
  for index, segment in enumerate(segments):
63
  end_time = start_time + segment_duration
64
 
65
+ # Ensure the end time does not exceed the total audio length
66
  if end_time > start_offset + actual_length:
67
  end_time = start_offset + actual_length
68
 
 
92
  os.remove(audio_file)
93
  progress((batch_num + 1) / len(batches))
94
 
95
+ # Adjust the total length of the audio for the final cut-off
96
  total_audio_length = combined_audio.duration_seconds
97
  validated_srt_content = ""
98
  for line in all_srt_content.strip().splitlines():
 
100
  start_str, end_str = line.split(' --> ')
101
  start_time = sum(x * float(t) for x, t in zip([3600, 60, 1, 0.001], start_str.replace(',', ':').split(':')))
102
  end_time = sum(x * float(t) for x, t in zip([3600, 60, 1, 0.001], end_str.replace(',', ':').split(':')))
103
+
104
+ # Correct end time to ensure it does not exceed the total audio length
105
  if end_time > total_audio_length:
106
  end_time = total_audio_length
107
+
108
  line = f"{format_time(start_time)} --> {format_time(end_time)}"
109
  validated_srt_content += line + "\n"
110
 
 
183
  gr.Audio(label="Audio Playback")
184
  ],
185
  title="HIVEcorp Text-to-Speech with SRT Generation",
186
+ description="Convert your script into speech and generate synchronized subtitles (SRT)."
 
187
  )
188
 
189
+ app.launch(share=True)