hivecorp commited on
Commit
95d954d
·
verified ·
1 Parent(s): 4b97382

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -20
app.py CHANGED
@@ -20,32 +20,38 @@ def format_time(seconds):
20
  secs = seconds % 60
21
  return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
22
 
23
- # Function to split text into segments without cutting words awkwardly
24
  def split_text_into_segments(text):
 
25
  segments = []
26
- sentences = re.split(r'([.!?])', text)
27
- for i in range(0, len(sentences) - 1, 2):
28
- sentence = sentences[i].strip() + sentences[i + 1]
 
 
 
29
  words = sentence.split()
30
 
31
- # Ensure full phrases by keeping each segment between 7 to 8 words
32
- if len(words) > 8:
33
- segment = ""
 
 
 
34
  for word in words:
35
- if len(segment.split()) < 8:
36
- segment += " " + word
37
  else:
38
- segments.append(segment.strip())
39
- segment = word
40
- if segment:
41
- segments.append(segment.strip())
42
- else:
43
- segments.append(sentence.strip())
44
-
45
- # Handle any leftover sentence fragment
46
- if len(sentences) % 2 == 1:
47
- remaining_text = sentences[-1].strip()
48
- segments.append(remaining_text)
49
 
50
  return segments
51
 
 
20
  secs = seconds % 60
21
  return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
22
 
23
+ # Function to split text based on punctuation, with handling for segments over 8 words
24
  def split_text_into_segments(text):
25
+ # Split based on punctuation marks (.!?)
26
  segments = []
27
+ raw_segments = re.split(r'([.!?])', text)
28
+ temp_sentence = ""
29
+
30
+ for i in range(0, len(raw_segments) - 1, 2):
31
+ # Combine sentence with punctuation
32
+ sentence = raw_segments[i].strip() + raw_segments[i + 1]
33
  words = sentence.split()
34
 
35
+ # If the sentence has 8 words or fewer, add as is
36
+ if len(words) <= 8:
37
+ segments.append(sentence.strip())
38
+ else:
39
+ # Split longer sentences into chunks of max 8 words without splitting words
40
+ chunk = ""
41
  for word in words:
42
+ if len(chunk.split()) < 8:
43
+ chunk += " " + word
44
  else:
45
+ segments.append(chunk.strip())
46
+ chunk = word
47
+ if chunk:
48
+ segments.append(chunk.strip())
49
+
50
+ # Handle any leftover sentence fragment not followed by punctuation
51
+ if len(raw_segments) % 2 == 1:
52
+ remaining_text = raw_segments[-1].strip()
53
+ if remaining_text:
54
+ segments.append(remaining_text)
 
55
 
56
  return segments
57