World_News / app.py
Shreyas94's picture
Update app.py
fe7d500 verified
raw
history blame
3.09 kB
import logging
from bs4 import BeautifulSoup
import requests
import nltk
from transformers import pipeline
import gradio as gr
from newsapi import NewsApiClient
import asyncio
# Configure logging
logging.basicConfig(level=logging.DEBUG)
# Initialize the summarization pipeline from Hugging Face Transformers
summarizer = pipeline("summarization")
# Initialize the NLTK sentence tokenizer
nltk.download('punkt')
# Initialize the News API client with your API key
newsapi = NewsApiClient(api_key='5ab7bb1aaceb41b8993db03477098aad')
# Function to fetch content from a given URL
def fetch_article_content(url):
try:
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
results = soup.find_all(['h1', 'p'])
text = [result.text for result in results]
return ' '.join(text)
except Exception as e:
logging.error(f"Error fetching content from {url}: {e}")
return ""
# Function to summarize news articles based on a query
async def summarize_news(query, num_results=3):
logging.debug(f"Query received: {query}")
logging.debug(f"Number of results requested: {num_results}")
# Search for news articles
logging.debug("Searching for news articles...")
articles = []
aggregated_content = ""
try:
news_results = newsapi.get_everything(q=query, language='en', page_size=num_results)
logging.debug(f"Search results: {news_results}")
for article in news_results['articles']:
url = article['url']
logging.debug(f"Fetching content from URL: {url}")
content = fetch_article_content(url)
aggregated_content += content + " "
except Exception as e:
logging.error(f"Error fetching news articles: {e}")
# Summarize the aggregated content
try:
# Chunk the aggregated content into chunks
sentences = nltk.sent_tokenize(aggregated_content)
chunk_size = 500 # Adjust chunk size as needed
chunks = [sentences[i:i + chunk_size] for i in range(0, len(sentences), chunk_size)]
# Summarize each chunk separately
summaries = []
for chunk in chunks:
chunk_text = ' '.join(chunk)
summary = summarizer(chunk_text, max_length=120, min_length=30, do_sample=False)
summaries.append(summary[0]['summary_text'])
# Combine all summaries
final_summary = ' '.join(summaries)
logging.debug(f"Final summarized text: {final_summary}")
return final_summary
except Exception as e:
logging.error(f"Error during summarization: {e}")
return "An error occurred during summarization."
# Setting up Gradio interface
iface = gr.Interface(
fn=summarize_news,
inputs=[
gr.Textbox(label="Query"),
gr.Slider(minimum=1, maximum=10, value=3, label="Number of Results")
],
outputs="textbox",
title="News Summarizer",
description="Enter a query to get a consolidated summary of the top news articles."
)
if __name__ == "__main__":
iface.launch()