Spaces:

Shreyas94
/

World_News

Sleeping

File size: 3,084 Bytes

f3a27fd
871b845
 
f3a27fd
 
 
720d7f6
e26bc82
c19b837
 
 
f3a27fd
 
 
 
 
 
720d7f6
 
 
f3a27fd
 
 
 
 
 
 
 
 
 
 
 
 
c19b837
 
 
 
2e72d63
c19b837
d530acf
f3a27fd
 
 
 
 
 
 
 
5ccfc6a
f3a27fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f6ed4f
151e53b
dfdc926
 
 
 
787b984
2e72d63
f3a27fd
3f6ed4f
 
c19b837
720d7f6

import logging
from bs4 import BeautifulSoup
import requests
import nltk
from transformers import pipeline
import gradio as gr
from newsapi import NewsApiClient  # Import NewsApiClient from newsapi library

# Configure logging
logging.basicConfig(level=logging.DEBUG)

# Initialize the summarization pipeline from Hugging Face Transformers
summarizer = pipeline("summarization")

# Initialize the NLTK sentence tokenizer
nltk.download('punkt')

# Initialize the News API client with your API key
newsapi = NewsApiClient(api_key='your_newsapi_key_here')

# Function to fetch content from a given URL
def fetch_article_content(url):
    try:
        r = requests.get(url)
        soup = BeautifulSoup(r.text, 'html.parser')
        results = soup.find_all(['h1', 'p'])
        text = [result.text for result in results]
        return ' '.join(text)
    except Exception as e:
        logging.error(f"Error fetching content from {url}: {e}")
        return ""

# Function to summarize news articles based on a query
def summarize_news(query, num_results=3):
    logging.debug(f"Query received: {query}")
    logging.debug(f"Number of results requested: {num_results}")

    # Search for news articles
    logging.debug("Searching for news articles...")

    articles = []
    aggregated_content = ""
    try:
        news_results = newsapi.get_everything(q=query, language='en', page_size=num_results)
        logging.debug(f"Search results: {news_results}")
        
        for article in news_results['articles']:
            url = article['url']
            logging.debug(f"Fetching content from URL: {url}")
            content = fetch_article_content(url)
            aggregated_content += content + " "
    except Exception as e:
        logging.error(f"Error fetching news articles: {e}")

    # Summarize the aggregated content
    try:
        # Chunk the aggregated content into meaningful segments
        sentences = nltk.sent_tokenize(aggregated_content)
        
        # Summarize each sentence individually if it's meaningful
        summaries = []
        for sentence in sentences:
            if len(sentence) > 10:  # Adjust minimum length as needed
                summary = summarizer(sentence, max_length=120, min_length=30, do_sample=False)
                summaries.append(summary[0]['summary_text'])
        
        # Join all summaries to form final output
        final_summary = " ".join(summaries)
        
        logging.debug(f"Final summarized text: {final_summary}")
        return final_summary
    
    except Exception as e:
        logging.error(f"Error during summarization: {e}")
        return "An error occurred during summarization."
        
# Setting up Gradio interface
iface = gr.Interface(
    fn=summarize_news,
    inputs=[
        gr.Textbox(label="Query"),
        gr.Slider(minimum=1, maximum=10, value=3, label="Number of Results")
    ],
    outputs="textbox",
    title="News Summarizer",
    description="Enter a query to get a consolidated summary of the top news articles."
)

if __name__ == "__main__":
    iface.launch()