SCBconsulting commited on
Commit
8521187
Β·
verified Β·
1 Parent(s): 9d35238

Update utils/summarizer.py

Browse files
Files changed (1) hide show
  1. utils/summarizer.py +50 -22
utils/summarizer.py CHANGED
@@ -6,7 +6,13 @@ from typing import List
6
  # ========== Load Summarization Pipeline ==========
7
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
8
 
9
- # ========== Helper Functions ==========
 
 
 
 
 
 
10
 
11
  def split_text(text: str, max_chunk_len: int = 800) -> List[str]:
12
  """
@@ -28,33 +34,55 @@ def split_text(text: str, max_chunk_len: int = 800) -> List[str]:
28
 
29
  return chunks
30
 
31
- def clean_text(text: str) -> str:
32
- """
33
- 🧹 Remove excessive whitespace and line breaks.
34
- """
35
- return text.replace("\n", " ").replace(" ", " ").strip()
36
-
37
- # ========== Summarization Function ==========
38
 
39
- def summarize_text(text: str) -> str:
40
  """
41
- πŸ“„ Generate a readable executive summary using bullet points.
 
 
 
 
42
  """
43
  if not text.strip():
44
  return "No input provided."
45
 
46
- cleaned = clean_text(text)
47
- chunks = split_text(cleaned)
 
 
 
 
 
 
 
 
 
 
48
 
49
- bullet_points = []
 
 
 
 
 
 
 
50
 
51
- for chunk in chunks:
52
- result = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
53
- summary = result[0]["summary_text"].strip()
54
- lines = summary.split('. ')
55
- for line in lines:
56
- cleaned_line = line.strip().rstrip('.')
57
- if cleaned_line:
58
- bullet_points.append(f"β€’ {cleaned_line}.")
59
 
60
- return "πŸ“„ Executive Summary:\n" + "\n".join(bullet_points)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  # ========== Load Summarization Pipeline ==========
7
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
8
 
9
+ # ========== Text Helpers ==========
10
+
11
+ def clean_text(text: str) -> str:
12
+ """
13
+ 🧹 Remove excessive whitespace and line breaks.
14
+ """
15
+ return text.replace("\n", " ").replace(" ", " ").strip()
16
 
17
  def split_text(text: str, max_chunk_len: int = 800) -> List[str]:
18
  """
 
34
 
35
  return chunks
36
 
37
+ # ========== Summarization Functions ==========
 
 
 
 
 
 
38
 
39
+ def summarize_text(text: str, as_paragraph: bool = False, fallback: bool = True) -> str:
40
  """
41
+ πŸ“„ Generate an executive summary.
42
+
43
+ Params:
44
+ - as_paragraph: True β†’ returns as 2–3 paragraph summary; False β†’ bullet points
45
+ - fallback: True β†’ if model fails, returns manual fallback
46
  """
47
  if not text.strip():
48
  return "No input provided."
49
 
50
+ try:
51
+ cleaned = clean_text(text)
52
+ chunks = split_text(cleaned)
53
+ summaries = []
54
+
55
+ for chunk in chunks:
56
+ result = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
57
+ summary = result[0]["summary_text"].strip()
58
+ summaries.append(summary)
59
+
60
+ if as_paragraph:
61
+ return "πŸ“„ Executive Summary:\n\n" + "\n\n".join(summaries)
62
 
63
+ # Otherwise β†’ return as bullet points
64
+ bullet_points = []
65
+ for summary in summaries:
66
+ lines = summary.split('. ')
67
+ for line in lines:
68
+ cleaned_line = line.strip().rstrip('.')
69
+ if cleaned_line:
70
+ bullet_points.append(f"β€’ {cleaned_line}.")
71
 
72
+ return "πŸ“„ Executive Summary:\n" + "\n".join(bullet_points)
 
 
 
 
 
 
 
73
 
74
+ except Exception as e:
75
+ if fallback:
76
+ return fallback_summary(text)
77
+ return f"An error occurred: {str(e)}"
78
+
79
+ # ========== Fallback Summary (manual) ==========
80
+
81
+ def fallback_summary(text: str, max_lines: int = 5) -> str:
82
+ """
83
+ 🧭 Fallback: Return first few sentences as pseudo-summary.
84
+ """
85
+ lines = text.split(". ")
86
+ selected = lines[:max_lines]
87
+ points = [f"β€’ {line.strip().rstrip('.')}" for line in selected if line.strip()]
88
+ return "πŸ“„ (Fallback Summary)\n" + "\n".join(points)