import gradio as gr from transformers import pipeline from bs4 import BeautifulSoup import requests from googlesearch import search import logging # Configure logging logging.basicConfig(level=logging.DEBUG) def summarize_news(query, num_results=3): logging.debug(f"Query received: {query}") logging.debug(f"Number of results requested: {num_results}") # Initialize summarization pipeline with a specific model logging.debug("Initializing summarization pipeline...") summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") # Search for news articles logging.debug("Searching for news articles...") search_results = search(query, num_results=num_results) articles = [] logging.debug(f"Search results: {search_results}") for url in search_results: try: logging.debug(f"Fetching content from URL: {url}") # Fetch the content of the news article r = requests.get(url) soup = BeautifulSoup(r.text, 'html.parser') results = soup.find_all(['h1', 'p']) text = [result.text for result in results] ARTICLE = ' '.join(text) # Chunk the article text logging.debug("Chunking the article text...") max_chunk = 350 ARTICLE = ARTICLE.replace('.', '.') ARTICLE = ARTICLE.replace('?', '?') ARTICLE = ARTICLE.replace('!', '!') sentences = ARTICLE.split('') chunks = [] current_chunk = [] for sentence in sentences: if len(' '.join(current_chunk + sentence.split())) <= max_chunk: current_chunk.extend(sentence.split()) else: chunks.append(' '.join(current_chunk)) current_chunk = sentence.split() chunks.append(' '.join(current_chunk)) logging.debug(f"Chunks created: {chunks}") # Summarize the chunks logging.debug("Summarizing the chunks...") summaries = [] for chunk in chunks: summaries.append(summarizer(chunk, max_length=150, min_length=30, do_sample=False)[0]['summary_text']) # Concatenate summaries and summarize again for cohesion combined_summary = ' '.join(summaries) final_summary = summarizer(combined_summary, max_length=300, min_length=80, do_sample=False)[0]['summary_text'] articles.append((url, final_summary)) logging.debug(f"Final summary for URL {url}: {final_summary}") except Exception as e: logging.error(f"Error processing URL {url}: {e}") continue logging.debug(f"Final summarized articles: {articles}") return format_output(articles) def format_output(articles): formatted_text = "" for url, summary in articles: formatted_text += f"URL: {url}\nSummary: {summary}\n\n" return formatted_text iface = gr.Interface( fn=summarize_news, inputs=["textbox", "slider"], outputs="textbox", title="News Summarizer", description="Enter a query to get summarized versions of the top news articles." ) if __name__ == "__main__": logging.debug("Launching Gradio interface...") iface.launch()