Shreyas94 commited on
Commit
2e72d63
·
verified ·
1 Parent(s): 9f66221

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -38
app.py CHANGED
@@ -2,51 +2,68 @@ import gradio as gr
2
  from transformers import pipeline
3
  from bs4 import BeautifulSoup
4
  import requests
 
5
 
6
- def summarize_blog_post(url):
7
- # Load summarization pipeline
8
  summarizer = pipeline("summarization")
9
 
10
- # Get blog post content
11
- r = requests.get(url)
12
- soup = BeautifulSoup(r.text, 'html.parser')
13
- results = soup.find_all(['h1', 'p'])
14
- text = [result.text for result in results]
15
- ARTICLE = ' '.join(text)
16
-
17
- # Chunk text
18
- max_chunk = 500
19
- ARTICLE = ARTICLE.replace('.', '.<eos>')
20
- ARTICLE = ARTICLE.replace('?', '?<eos>')
21
- ARTICLE = ARTICLE.replace('!', '!<eos>')
22
-
23
- sentences = ARTICLE.split('<eos>')
24
- current_chunk = 0
25
- chunks = []
26
- for sentence in sentences:
27
- if len(chunks) == current_chunk + 1:
28
- if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk:
29
- chunks[current_chunk].extend(sentence.split(' '))
30
- else:
31
- current_chunk += 1
32
- chunks.append(sentence.split(' '))
33
- else:
34
- chunks.append(sentence.split(' '))
35
-
36
- for chunk_id in range(len(chunks)):
37
- chunks[chunk_id] = ' '.join(chunks[chunk_id])
38
-
39
- # Summarize text
40
- summaries = summarizer(chunks, max_length=120, min_length=30, do_sample=False)
41
- summary_text = " ".join([summary['summary_text'] for summary in summaries])
42
- return summary_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  iface = gr.Interface(
45
- fn=summarize_blog_post,
46
  inputs="text",
47
  outputs="text",
48
- title="Medium Blog Post Summarizer",
49
- description="Enter the URL of a Medium blog post to get a summarized version of the content."
50
  )
51
 
52
  iface.launch()
 
2
  from transformers import pipeline
3
  from bs4 import BeautifulSoup
4
  import requests
5
+ from googlesearch import search
6
 
7
+ def summarize_news(query):
8
+ # Initialize summarization pipeline
9
  summarizer = pipeline("summarization")
10
 
11
+ # Search for news articles
12
+ search_results = search(query, num_results=3)
13
+ articles = []
14
+
15
+ for url in search_results:
16
+ try:
17
+ # Fetch the content of the news article
18
+ r = requests.get(url)
19
+ soup = BeautifulSoup(r.text, 'html.parser')
20
+ results = soup.find_all(['h1', 'p'])
21
+ text = [result.text for result in results]
22
+ ARTICLE = ' '.join(text)
23
+
24
+ # Chunk the article text
25
+ max_chunk = 500
26
+ ARTICLE = ARTICLE.replace('.', '.<eos>')
27
+ ARTICLE = ARTICLE.replace('?', '?<eos>')
28
+ ARTICLE = ARTICLE.replace('!', '!<eos>')
29
+
30
+ sentences = ARTICLE.split('<eos>')
31
+ current_chunk = 0
32
+ chunks = []
33
+ for sentence in sentences:
34
+ if len(chunks) == current_chunk + 1:
35
+ if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk:
36
+ chunks[current_chunk].extend(sentence.split(' '))
37
+ else:
38
+ current_chunk += 1
39
+ chunks.append(sentence.split(' '))
40
+ else:
41
+ chunks.append(sentence.split(' '))
42
+
43
+ for chunk_id in range(len(chunks)):
44
+ chunks[chunk_id] = ' '.join(chunks[chunk_id])
45
+
46
+ # Summarize the chunks
47
+ summaries = summarizer(chunks, max_length=120, min_length=30, do_sample=False)
48
+ summary_text = " ".join([summary['summary_text'] for summary in summaries])
49
+ articles.append((url, summary_text))
50
+ except Exception as e:
51
+ continue
52
+
53
+ return articles
54
+
55
+ def format_output(articles):
56
+ formatted_text = ""
57
+ for url, summary in articles:
58
+ formatted_text += f"URL: {url}\nSummary: {summary}\n\n"
59
+ return formatted_text
60
 
61
  iface = gr.Interface(
62
+ fn=lambda query: format_output(summarize_news(query)),
63
  inputs="text",
64
  outputs="text",
65
+ title="News Summarizer",
66
+ description="Enter a query to get summarized versions of the top news articles."
67
  )
68
 
69
  iface.launch()