import gradio as gr from transformers import pipeline from bs4 import BeautifulSoup import requests from googlesearch import search import logging # Configure logging logging.basicConfig(level=logging.DEBUG) def summarize_news(query, num_results=3): logging.debug(f"Query received: {query}") logging.debug(f"Number of results requested: {num_results}") # Initialize summarization pipeline with a specific model logging.debug("Initializing summarization pipeline...") summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") # Search for news articles logging.debug("Searching for news articles...") search_results = search(query, num_results=num_results) articles = [] logging.debug(f"Search results: {search_results}") for url in search_results: try: logging.debug(f"Fetching content from URL: {url}") # Fetch the content of the news article r = requests.get(url) soup = BeautifulSoup(r.text, 'html.parser') results = soup.find_all(['h1', 'p']) text = [result.text for result in results] ARTICLE = ' '.join(text) # Chunk the article text logging.debug("Chunking the article text...") max_chunk = 500 ARTICLE = ARTICLE.replace('.', '.') ARTICLE = ARTICLE.replace('?', '?') ARTICLE = ARTICLE.replace('!', '!') sentences = ARTICLE.split('') current_chunk = 0 chunks = [] for sentence in sentences: if len(chunks) == current_chunk + 1: if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk: chunks[current_chunk].extend(sentence.split(' ')) else: current_chunk += 1 chunks.append(sentence.split(' ')) else: chunks.append(sentence.split(' ')) for chunk_id in range(len(chunks)): chunks[chunk_id] = ' '.join(chunks[chunk_id]) logging.debug(f"Chunks created: {chunks}") # Summarize the chunks logging.debug("Summarizing the chunks...") summaries = summarizer(chunks, max_length=120, min_length=30, do_sample=False) summary_text = " ".join([summary['summary_text'] for summary in summaries]) articles.append((url, summary_text)) logging.debug(f"Summary for URL {url}: {summary_text}") except Exception as e: logging.error(f"Error processing URL {url}: {e}") continue logging.debug(f"Final summarized articles: {articles}") return articles def format_output(articles): formatted_text = "" for url, summary in articles: formatted_text += f"URL: {url}\nSummary: {summary}\n\n" return formatted_text iface = gr.Interface( fn=summarize_news, inputs=["text", "number"], outputs="text", title="News Summarizer", description="Enter a query to get summarized versions of the top news articles.", example=[ ["Python programming", 3], ["Artificial Intelligence", 5], ] ) if __name__ == "__main__": logging.debug("Launching Gradio interface...") iface.launch()