insta-maker

Sleeping

hivecorp commited on Nov 13, 2024

Commit

95d954d

verified ·

1 Parent(s): 4b97382

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,32 +20,38 @@ def format_time(seconds):
     secs = seconds % 60
     return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
-# Function to split text into segments without cutting words awkwardly
 def split_text_into_segments(text):
     segments = []
-    sentences = re.split(r'([.!?])', text)
-    for i in range(0, len(sentences) - 1, 2):
-        sentence = sentences[i].strip() + sentences[i + 1]
         words = sentence.split()
-        # Ensure full phrases by keeping each segment between 7 to 8 words
-        if len(words) > 8:
-            segment = ""
             for word in words:
-                if len(segment.split()) < 8:
-                    segment += " " + word
                 else:
-                    segments.append(segment.strip())
-                    segment = word
-            if segment:
-                segments.append(segment.strip())
-        else:
-            segments.append(sentence.strip())
-    # Handle any leftover sentence fragment
-    if len(sentences) % 2 == 1:
-        remaining_text = sentences[-1].strip()
-        segments.append(remaining_text)
     return segments

     secs = seconds % 60
     return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
+# Function to split text based on punctuation, with handling for segments over 8 words
 def split_text_into_segments(text):
+    # Split based on punctuation marks (.!?)
     segments = []
+    raw_segments = re.split(r'([.!?])', text)
+    temp_sentence = ""
+    for i in range(0, len(raw_segments) - 1, 2):
+        # Combine sentence with punctuation
+        sentence = raw_segments[i].strip() + raw_segments[i + 1]
         words = sentence.split()
+        # If the sentence has 8 words or fewer, add as is
+        if len(words) <= 8:
+            segments.append(sentence.strip())
+        else:
+            # Split longer sentences into chunks of max 8 words without splitting words
+            chunk = ""
             for word in words:
+                if len(chunk.split()) < 8:
+                    chunk += " " + word
                 else:
+                    segments.append(chunk.strip())
+                    chunk = word
+            if chunk:
+                segments.append(chunk.strip())
+    # Handle any leftover sentence fragment not followed by punctuation
+    if len(raw_segments) % 2 == 1:
+        remaining_text = raw_segments[-1].strip()
+        if remaining_text:
+            segments.append(remaining_text)
     return segments