Shreyas94 commited on
Commit
c19b837
1 Parent(s): 2e72d63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -7
app.py CHANGED
@@ -3,17 +3,29 @@ from transformers import pipeline
3
  from bs4 import BeautifulSoup
4
  import requests
5
  from googlesearch import search
 
6
 
7
- def summarize_news(query):
8
- # Initialize summarization pipeline
9
- summarizer = pipeline("summarization")
 
 
 
 
 
 
 
10
 
11
  # Search for news articles
12
- search_results = search(query, num_results=3)
 
13
  articles = []
14
 
 
 
15
  for url in search_results:
16
  try:
 
17
  # Fetch the content of the news article
18
  r = requests.get(url)
19
  soup = BeautifulSoup(r.text, 'html.parser')
@@ -22,6 +34,7 @@ def summarize_news(query):
22
  ARTICLE = ' '.join(text)
23
 
24
  # Chunk the article text
 
25
  max_chunk = 500
26
  ARTICLE = ARTICLE.replace('.', '.<eos>')
27
  ARTICLE = ARTICLE.replace('?', '?<eos>')
@@ -43,13 +56,20 @@ def summarize_news(query):
43
  for chunk_id in range(len(chunks)):
44
  chunks[chunk_id] = ' '.join(chunks[chunk_id])
45
 
 
 
46
  # Summarize the chunks
 
47
  summaries = summarizer(chunks, max_length=120, min_length=30, do_sample=False)
48
  summary_text = " ".join([summary['summary_text'] for summary in summaries])
49
  articles.append((url, summary_text))
 
 
50
  except Exception as e:
 
51
  continue
52
 
 
53
  return articles
54
 
55
  def format_output(articles):
@@ -59,11 +79,13 @@ def format_output(articles):
59
  return formatted_text
60
 
61
  iface = gr.Interface(
62
- fn=lambda query: format_output(summarize_news(query)),
63
- inputs="text",
64
  outputs="text",
65
  title="News Summarizer",
66
  description="Enter a query to get summarized versions of the top news articles."
67
  )
68
 
69
- iface.launch()
 
 
 
3
  from bs4 import BeautifulSoup
4
  import requests
5
  from googlesearch import search
6
+ import logging
7
 
8
+ # Configure logging
9
+ logging.basicConfig(level=logging.DEBUG)
10
+
11
+ def summarize_news(query, num_results=3):
12
+ logging.debug(f"Query received: {query}")
13
+ logging.debug(f"Number of results requested: {num_results}")
14
+
15
+ # Initialize summarization pipeline with a specific model
16
+ logging.debug("Initializing summarization pipeline...")
17
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
18
 
19
  # Search for news articles
20
+ logging.debug("Searching for news articles...")
21
+ search_results = search(query, num_results=num_results)
22
  articles = []
23
 
24
+ logging.debug(f"Search results: {search_results}")
25
+
26
  for url in search_results:
27
  try:
28
+ logging.debug(f"Fetching content from URL: {url}")
29
  # Fetch the content of the news article
30
  r = requests.get(url)
31
  soup = BeautifulSoup(r.text, 'html.parser')
 
34
  ARTICLE = ' '.join(text)
35
 
36
  # Chunk the article text
37
+ logging.debug("Chunking the article text...")
38
  max_chunk = 500
39
  ARTICLE = ARTICLE.replace('.', '.<eos>')
40
  ARTICLE = ARTICLE.replace('?', '?<eos>')
 
56
  for chunk_id in range(len(chunks)):
57
  chunks[chunk_id] = ' '.join(chunks[chunk_id])
58
 
59
+ logging.debug(f"Chunks created: {chunks}")
60
+
61
  # Summarize the chunks
62
+ logging.debug("Summarizing the chunks...")
63
  summaries = summarizer(chunks, max_length=120, min_length=30, do_sample=False)
64
  summary_text = " ".join([summary['summary_text'] for summary in summaries])
65
  articles.append((url, summary_text))
66
+
67
+ logging.debug(f"Summary for URL {url}: {summary_text}")
68
  except Exception as e:
69
+ logging.error(f"Error processing URL {url}: {e}")
70
  continue
71
 
72
+ logging.debug(f"Final summarized articles: {articles}")
73
  return articles
74
 
75
  def format_output(articles):
 
79
  return formatted_text
80
 
81
  iface = gr.Interface(
82
+ fn=lambda query, num_results: format_output(summarize_news(query, num_results)),
83
+ inputs=[gr.inputs.Textbox(label="Query"), gr.inputs.Slider(minimum=1, maximum=10, default=3, label="Number of Results")],
84
  outputs="text",
85
  title="News Summarizer",
86
  description="Enter a query to get summarized versions of the top news articles."
87
  )
88
 
89
+ if __name__ == "__main__":
90
+ logging.debug("Launching Gradio interface...")
91
+ iface.launch()