Shreyas94 commited on
Commit
3148927
1 Parent(s): 7b58076

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -15
app.py CHANGED
@@ -4,7 +4,8 @@ import requests
4
  import nltk
5
  from transformers import pipeline
6
  import gradio as gr
7
- from newsapi import NewsApiClient # Import NewsApiClient from newsapi library
 
8
 
9
  # Configure logging
10
  logging.basicConfig(level=logging.DEBUG)
@@ -31,7 +32,7 @@ def fetch_article_content(url):
31
  return ""
32
 
33
  # Function to summarize news articles based on a query
34
- def summarize_news(query, num_results=3):
35
  logging.debug(f"Query received: {query}")
36
  logging.debug(f"Number of results requested: {num_results}")
37
 
@@ -54,26 +55,24 @@ def summarize_news(query, num_results=3):
54
 
55
  # Summarize the aggregated content
56
  try:
57
- # Chunk the aggregated content into meaningful segments
58
  sentences = nltk.sent_tokenize(aggregated_content)
59
-
60
- # Summarize each sentence individually if it's meaningful
61
- summaries = []
62
- for sentence in sentences:
63
- if len(sentence) > 10: # Adjust minimum length as needed
64
- summary = summarizer(sentence, max_length=120, min_length=30, do_sample=False)
65
- summaries.append(summary[0]['summary_text'])
66
-
67
- # Join all summaries to form final output
68
- final_summary = " ".join(summaries)
69
-
70
  logging.debug(f"Final summarized text: {final_summary}")
71
  return final_summary
72
 
73
  except Exception as e:
74
  logging.error(f"Error during summarization: {e}")
75
  return "An error occurred during summarization."
76
-
77
  # Setting up Gradio interface
78
  iface = gr.Interface(
79
  fn=summarize_news,
 
4
  import nltk
5
  from transformers import pipeline
6
  import gradio as gr
7
+ from newsapi import NewsApiClient
8
+ import asyncio
9
 
10
  # Configure logging
11
  logging.basicConfig(level=logging.DEBUG)
 
32
  return ""
33
 
34
  # Function to summarize news articles based on a query
35
+ async def summarize_news(query, num_results=3):
36
  logging.debug(f"Query received: {query}")
37
  logging.debug(f"Number of results requested: {num_results}")
38
 
 
55
 
56
  # Summarize the aggregated content
57
  try:
58
+ # Chunk the aggregated content into chunks
59
  sentences = nltk.sent_tokenize(aggregated_content)
60
+ chunk_size = 500 # Adjust chunk size as needed
61
+ chunks = [sentences[i:i + chunk_size] for i in range(0, len(sentences), chunk_size)]
62
+
63
+ # Summarize chunks concurrently
64
+ summaries = await asyncio.gather(*[summarizer(' '.join(chunk), max_length=120, min_length=30, do_sample=False) for chunk in chunks])
65
+
66
+ # Combine all summaries
67
+ final_summary = ' '.join([summary[0]['summary_text'] for summary in summaries])
68
+
 
 
69
  logging.debug(f"Final summarized text: {final_summary}")
70
  return final_summary
71
 
72
  except Exception as e:
73
  logging.error(f"Error during summarization: {e}")
74
  return "An error occurred during summarization."
75
+
76
  # Setting up Gradio interface
77
  iface = gr.Interface(
78
  fn=summarize_news,