danielwm994 commited on
Commit
c7af682
·
verified ·
1 Parent(s): a9ecf96

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -3
app.py CHANGED
@@ -33,9 +33,28 @@ def transcribe(inputs, task):
33
  text = result["text"]
34
  timestamps = result["chunks"]
35
 
36
- timestamp_str = "\n".join([f"[{chunk['timestamp']}] {chunk['text']}" for chunk in timestamps])
37
-
38
- return text, timestamp_str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
 
41
  def _return_yt_html_embed(yt_url):
 
33
  text = result["text"]
34
  timestamps = result["chunks"]
35
 
36
+ # Initialize an empty list to store processed text with more natural breaks
37
+ processed_text = []
38
+ timestamp_str = ""
39
+
40
+ for chunk in timestamps:
41
+ # For each chunk, ensure text ends at a period, question mark, or exclamation mark
42
+ chunk_text = chunk["text"]
43
+
44
+ # Adjust chunk_text to end with a more natural boundary if needed (e.g., sentence end)
45
+ if not chunk_text.endswith(('.', '!', '?')):
46
+ # You could modify this part to adjust as needed, for example, by adding a period
47
+ chunk_text += "..."
48
+
49
+ # Add the text with timestamps
50
+ processed_text.append(chunk_text)
51
+ timestamp_str += f"[{chunk['timestamp']}] {chunk_text}\n"
52
+
53
+ # Join all the processed text into a single string with logical sentence boundaries
54
+ full_text = " ".join(processed_text)
55
+
56
+ return full_text, timestamp_str
57
+
58
 
59
 
60
  def _return_yt_html_embed(yt_url):