World_News / app.py
Shreyas94's picture
Update app.py
720d7f6 verified
raw
history blame
3.08 kB
import logging
from bs4 import BeautifulSoup
import requests
import nltk
from transformers import pipeline
import gradio as gr
from newsapi import NewsApiClient # Import NewsApiClient from newsapi library
# Configure logging
logging.basicConfig(level=logging.DEBUG)
# Initialize the summarization pipeline from Hugging Face Transformers
summarizer = pipeline("summarization")
# Initialize the NLTK sentence tokenizer
nltk.download('punkt')
# Initialize the News API client with your API key
newsapi = NewsApiClient(api_key='your_newsapi_key_here')
# Function to fetch content from a given URL
def fetch_article_content(url):
try:
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
results = soup.find_all(['h1', 'p'])
text = [result.text for result in results]
return ' '.join(text)
except Exception as e:
logging.error(f"Error fetching content from {url}: {e}")
return ""
# Function to summarize news articles based on a query
def summarize_news(query, num_results=3):
logging.debug(f"Query received: {query}")
logging.debug(f"Number of results requested: {num_results}")
# Search for news articles
logging.debug("Searching for news articles...")
articles = []
aggregated_content = ""
try:
news_results = newsapi.get_everything(q=query, language='en', page_size=num_results)
logging.debug(f"Search results: {news_results}")
for article in news_results['articles']:
url = article['url']
logging.debug(f"Fetching content from URL: {url}")
content = fetch_article_content(url)
aggregated_content += content + " "
except Exception as e:
logging.error(f"Error fetching news articles: {e}")
# Summarize the aggregated content
try:
# Chunk the aggregated content into meaningful segments
sentences = nltk.sent_tokenize(aggregated_content)
# Summarize each sentence individually if it's meaningful
summaries = []
for sentence in sentences:
if len(sentence) > 10: # Adjust minimum length as needed
summary = summarizer(sentence, max_length=120, min_length=30, do_sample=False)
summaries.append(summary[0]['summary_text'])
# Join all summaries to form final output
final_summary = " ".join(summaries)
logging.debug(f"Final summarized text: {final_summary}")
return final_summary
except Exception as e:
logging.error(f"Error during summarization: {e}")
return "An error occurred during summarization."
# Setting up Gradio interface
iface = gr.Interface(
fn=summarize_news,
inputs=[
gr.Textbox(label="Query"),
gr.Slider(minimum=1, maximum=10, value=3, label="Number of Results")
],
outputs="textbox",
title="News Summarizer",
description="Enter a query to get a consolidated summary of the top news articles."
)
if __name__ == "__main__":
iface.launch()