Spaces:

Shreyas94
/

World_News

Sleeping

File size: 3,091 Bytes

f3a27fd
871b845
 
f3a27fd
 
 
3148927
 
e26bc82
c19b837
 
 
f3a27fd
 
 
 
 
 
720d7f6
7b58076
720d7f6
f3a27fd
 
 
 
 
 
 
 
 
 
 
 
 
3148927
c19b837
 
 
2e72d63
c19b837
d530acf
f3a27fd
 
 
 
 
 
 
 
5ccfc6a
f3a27fd
 
 
 
 
 
 
3148927
f3a27fd
3148927
 
 
2225c6c
 
 
 
 
 
3148927
 
2225c6c
3148927
f3a27fd
 
 
 
 
 
3148927
f3a27fd
3f6ed4f
151e53b
dfdc926
 
 
 
787b984
2e72d63
f3a27fd
3f6ed4f
 
c19b837
fe7d500

import logging
from bs4 import BeautifulSoup
import requests
import nltk
from transformers import pipeline
import gradio as gr
from newsapi import NewsApiClient
import asyncio

# Configure logging
logging.basicConfig(level=logging.DEBUG)

# Initialize the summarization pipeline from Hugging Face Transformers
summarizer = pipeline("summarization")

# Initialize the NLTK sentence tokenizer
nltk.download('punkt')

# Initialize the News API client with your API key
newsapi = NewsApiClient(api_key='5ab7bb1aaceb41b8993db03477098aad')

# Function to fetch content from a given URL
def fetch_article_content(url):
    try:
        r = requests.get(url)
        soup = BeautifulSoup(r.text, 'html.parser')
        results = soup.find_all(['h1', 'p'])
        text = [result.text for result in results]
        return ' '.join(text)
    except Exception as e:
        logging.error(f"Error fetching content from {url}: {e}")
        return ""

# Function to summarize news articles based on a query
async def summarize_news(query, num_results=3):
    logging.debug(f"Query received: {query}")
    logging.debug(f"Number of results requested: {num_results}")

    # Search for news articles
    logging.debug("Searching for news articles...")

    articles = []
    aggregated_content = ""
    try:
        news_results = newsapi.get_everything(q=query, language='en', page_size=num_results)
        logging.debug(f"Search results: {news_results}")
        
        for article in news_results['articles']:
            url = article['url']
            logging.debug(f"Fetching content from URL: {url}")
            content = fetch_article_content(url)
            aggregated_content += content + " "
    except Exception as e:
        logging.error(f"Error fetching news articles: {e}")

    # Summarize the aggregated content
    try:
        # Chunk the aggregated content into chunks
        sentences = nltk.sent_tokenize(aggregated_content)
        chunk_size = 500  # Adjust chunk size as needed
        chunks = [sentences[i:i + chunk_size] for i in range(0, len(sentences), chunk_size)]

        # Summarize each chunk separately
        summaries = []
        for chunk in chunks:
            chunk_text = ' '.join(chunk)
            summary = summarizer(chunk_text, max_length=120, min_length=30, do_sample=False)
            summaries.append(summary[0]['summary_text'])

        # Combine all summaries
        final_summary = ' '.join(summaries)

        logging.debug(f"Final summarized text: {final_summary}")
        return final_summary
    
    except Exception as e:
        logging.error(f"Error during summarization: {e}")
        return "An error occurred during summarization."

# Setting up Gradio interface
iface = gr.Interface(
    fn=summarize_news,
    inputs=[
        gr.Textbox(label="Query"),
        gr.Slider(minimum=1, maximum=10, value=3, label="Number of Results")
    ],
    outputs="textbox",
    title="News Summarizer",
    description="Enter a query to get a consolidated summary of the top news articles."
)

if __name__ == "__main__":
    iface.launch()