import logging from bs4 import BeautifulSoup import requests import nltk from transformers import pipeline import gradio as gr # Configure logging logging.basicConfig(level=logging.DEBUG) # Initialize the summarization pipeline from Hugging Face Transformers summarizer = pipeline("summarization") # Initialize the NLTK sentence tokenizer nltk.download('punkt') # Function to fetch content from a given URL def fetch_article_content(url): try: r = requests.get(url) soup = BeautifulSoup(r.text, 'html.parser') results = soup.find_all(['h1', 'p']) text = [result.text for result in results] return ' '.join(text) except Exception as e: logging.error(f"Error fetching content from {url}: {e}") return "" # Function to summarize news articles based on a query def summarize_news(query, num_results=3): logging.debug(f"Query received: {query}") logging.debug(f"Number of results requested: {num_results}") # Search for news articles logging.debug("Searching for news articles...") articles = [] aggregated_content = "" try: news_results = newsapi.get_everything(q=query, language='en', page_size=num_results) logging.debug(f"Search results: {news_results}") for article in news_results['articles']: url = article['url'] logging.debug(f"Fetching content from URL: {url}") content = fetch_article_content(url) aggregated_content += content + " " except Exception as e: logging.error(f"Error fetching news articles: {e}") # Summarize the aggregated content try: # Chunk the aggregated content into meaningful segments sentences = nltk.sent_tokenize(aggregated_content) # Summarize each sentence individually if it's meaningful summaries = [] for sentence in sentences: if len(sentence) > 10: # Adjust minimum length as needed summary = summarizer(sentence, max_length=120, min_length=30, do_sample=False) summaries.append(summary[0]['summary_text']) # Join all summaries to form final output final_summary = " ".join(summaries) logging.debug(f"Final summarized text: {final_summary}") return final_summary except Exception as e: logging.error(f"Error during summarization: {e}") return "An error occurred during summarization." # Setting up Gradio interface iface = gr.Interface( fn=summarize_news, inputs=[gr.Textbox(label="Query"), gr.Slider(minimum=1, maximum=10, default=3, label="Number of Results")], outputs="textbox", title="News Summarizer", description="Enter a query to get a consolidated summary of the top news articles." ) if __name__ == "__main__": logging.debug("Launching Gradio interface...") iface.launch()