whisper-large-v3-1-1

Runtime error

danielwm994 commited on Oct 16, 2024

Commit

c7af682

verified ·

1 Parent(s): a9ecf96

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -33,9 +33,28 @@ def transcribe(inputs, task):
     text = result["text"]
     timestamps = result["chunks"]
-    timestamp_str = "\n".join([f"[{chunk['timestamp']}] {chunk['text']}" for chunk in timestamps])
-    return text, timestamp_str
 def _return_yt_html_embed(yt_url):

     text = result["text"]
     timestamps = result["chunks"]
+    # Initialize an empty list to store processed text with more natural breaks
+    processed_text = []
+    timestamp_str = ""
+    for chunk in timestamps:
+        # For each chunk, ensure text ends at a period, question mark, or exclamation mark
+        chunk_text = chunk["text"]
+        # Adjust chunk_text to end with a more natural boundary if needed (e.g., sentence end)
+        if not chunk_text.endswith(('.', '!', '?')):
+            # You could modify this part to adjust as needed, for example, by adding a period
+            chunk_text += "..."
+        # Add the text with timestamps
+        processed_text.append(chunk_text)
+        timestamp_str += f"[{chunk['timestamp']}] {chunk_text}\n"
+    # Join all the processed text into a single string with logical sentence boundaries
+    full_text = " ".join(processed_text)
+    return full_text, timestamp_str
 def _return_yt_html_embed(yt_url):