Spaces:
Sleeping
Sleeping
Update utils/summarizer.py
Browse files- utils/summarizer.py +50 -22
utils/summarizer.py
CHANGED
@@ -6,7 +6,13 @@ from typing import List
|
|
6 |
# ========== Load Summarization Pipeline ==========
|
7 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
8 |
|
9 |
-
# ==========
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
def split_text(text: str, max_chunk_len: int = 800) -> List[str]:
|
12 |
"""
|
@@ -28,33 +34,55 @@ def split_text(text: str, max_chunk_len: int = 800) -> List[str]:
|
|
28 |
|
29 |
return chunks
|
30 |
|
31 |
-
|
32 |
-
"""
|
33 |
-
π§Ή Remove excessive whitespace and line breaks.
|
34 |
-
"""
|
35 |
-
return text.replace("\n", " ").replace(" ", " ").strip()
|
36 |
-
|
37 |
-
# ========== Summarization Function ==========
|
38 |
|
39 |
-
def summarize_text(text: str) -> str:
|
40 |
"""
|
41 |
-
π Generate
|
|
|
|
|
|
|
|
|
42 |
"""
|
43 |
if not text.strip():
|
44 |
return "No input provided."
|
45 |
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
-
|
52 |
-
result = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
|
53 |
-
summary = result[0]["summary_text"].strip()
|
54 |
-
lines = summary.split('. ')
|
55 |
-
for line in lines:
|
56 |
-
cleaned_line = line.strip().rstrip('.')
|
57 |
-
if cleaned_line:
|
58 |
-
bullet_points.append(f"β’ {cleaned_line}.")
|
59 |
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
# ========== Load Summarization Pipeline ==========
|
7 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
8 |
|
9 |
+
# ========== Text Helpers ==========
|
10 |
+
|
11 |
+
def clean_text(text: str) -> str:
|
12 |
+
"""
|
13 |
+
π§Ή Remove excessive whitespace and line breaks.
|
14 |
+
"""
|
15 |
+
return text.replace("\n", " ").replace(" ", " ").strip()
|
16 |
|
17 |
def split_text(text: str, max_chunk_len: int = 800) -> List[str]:
|
18 |
"""
|
|
|
34 |
|
35 |
return chunks
|
36 |
|
37 |
+
# ========== Summarization Functions ==========
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
+
def summarize_text(text: str, as_paragraph: bool = False, fallback: bool = True) -> str:
|
40 |
"""
|
41 |
+
π Generate an executive summary.
|
42 |
+
|
43 |
+
Params:
|
44 |
+
- as_paragraph: True β returns as 2β3 paragraph summary; False β bullet points
|
45 |
+
- fallback: True β if model fails, returns manual fallback
|
46 |
"""
|
47 |
if not text.strip():
|
48 |
return "No input provided."
|
49 |
|
50 |
+
try:
|
51 |
+
cleaned = clean_text(text)
|
52 |
+
chunks = split_text(cleaned)
|
53 |
+
summaries = []
|
54 |
+
|
55 |
+
for chunk in chunks:
|
56 |
+
result = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
|
57 |
+
summary = result[0]["summary_text"].strip()
|
58 |
+
summaries.append(summary)
|
59 |
+
|
60 |
+
if as_paragraph:
|
61 |
+
return "π Executive Summary:\n\n" + "\n\n".join(summaries)
|
62 |
|
63 |
+
# Otherwise β return as bullet points
|
64 |
+
bullet_points = []
|
65 |
+
for summary in summaries:
|
66 |
+
lines = summary.split('. ')
|
67 |
+
for line in lines:
|
68 |
+
cleaned_line = line.strip().rstrip('.')
|
69 |
+
if cleaned_line:
|
70 |
+
bullet_points.append(f"β’ {cleaned_line}.")
|
71 |
|
72 |
+
return "π Executive Summary:\n" + "\n".join(bullet_points)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
+
except Exception as e:
|
75 |
+
if fallback:
|
76 |
+
return fallback_summary(text)
|
77 |
+
return f"An error occurred: {str(e)}"
|
78 |
+
|
79 |
+
# ========== Fallback Summary (manual) ==========
|
80 |
+
|
81 |
+
def fallback_summary(text: str, max_lines: int = 5) -> str:
|
82 |
+
"""
|
83 |
+
π§ Fallback: Return first few sentences as pseudo-summary.
|
84 |
+
"""
|
85 |
+
lines = text.split(". ")
|
86 |
+
selected = lines[:max_lines]
|
87 |
+
points = [f"β’ {line.strip().rstrip('.')}" for line in selected if line.strip()]
|
88 |
+
return "π (Fallback Summary)\n" + "\n".join(points)
|