SCBconsulting commited on
Commit
cb4344f
·
verified ·
1 Parent(s): a3b248d

Update utils/summarizer.py

Browse files
Files changed (1) hide show
  1. utils/summarizer.py +34 -3
utils/summarizer.py CHANGED
@@ -1,11 +1,42 @@
 
 
1
  from transformers import pipeline
2
 
 
3
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  def summarize_text(text):
 
 
 
6
  if not text.strip():
7
  return "No input provided."
8
 
9
- text = text.replace("\n", " ").strip()[:1024]
10
- summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
11
- return summary[0]["summary_text"]
 
 
 
 
 
 
1
+ # utils/summarizer.py
2
+
3
  from transformers import pipeline
4
 
5
+ # Load summarization pipeline
6
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
7
 
8
+ def split_text(text, max_chunk_len=800):
9
+ """
10
+ Breaks long documents into smaller chunks for summarization.
11
+ """
12
+ sentences = text.split('. ')
13
+ chunks = []
14
+ current_chunk = ""
15
+
16
+ for sentence in sentences:
17
+ if len(current_chunk) + len(sentence) < max_chunk_len:
18
+ current_chunk += sentence + ". "
19
+ else:
20
+ chunks.append(current_chunk.strip())
21
+ current_chunk = sentence + ". "
22
+
23
+ if current_chunk:
24
+ chunks.append(current_chunk.strip())
25
+
26
+ return chunks
27
+
28
  def summarize_text(text):
29
+ """
30
+ Generate summary by chunking and combining.
31
+ """
32
  if not text.strip():
33
  return "No input provided."
34
 
35
+ chunks = split_text(text)
36
+ summaries = []
37
+
38
+ for chunk in chunks:
39
+ result = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
40
+ summaries.append(result[0]["summary_text"])
41
+
42
+ return " ".join(summaries)