Spaces:

Shreyas94
/

World_News

Sleeping

App Files Files Community

World_News / app.py

Shreyas94

Update app.py

fe87932 verified 9 months ago

raw

history blame

3.74 kB

	import gradio as gr
	from transformers import pipeline
	from bs4 import BeautifulSoup
	import requests
	import logging
	from newsapi import NewsApiClient

	# Configure logging
	logging.basicConfig(level=logging.DEBUG)

	# Initialize the News API client
	newsapi = NewsApiClient(api_key='5ab7bb1aaceb41b8993db03477098aad')

	def fetch_article_content(url):
	try:
	r = requests.get(url)
	soup = BeautifulSoup(r.text, 'html.parser')
	results = soup.find_all(['h1', 'p'])
	text = [result.text for result in results]
	return ' '.join(text)
	except Exception as e:
	logging.error(f"Error fetching content from {url}: {e}")
	return ""

	def summarize_news(query, num_results=3):
	logging.debug(f"Query received: {query}")
	logging.debug(f"Number of results requested: {num_results}")

	# Initialize summarization pipeline with a specific model
	logging.debug("Initializing summarization pipeline...")
	summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")

	# Search for news articles
	logging.debug("Searching for news articles...")
	articles = []
	aggregated_content = ""
	try:
	news_results = newsapi.get_everything(q=query, language='en', page_size=num_results)
	logging.debug(f"Search results: {news_results}")

	for article in news_results['articles']:
	url = article['url']
	logging.debug(f"Fetching content from URL: {url}")
	content = fetch_article_content(url)
	aggregated_content += content + " "
	except Exception as e:
	logging.error(f"Error fetching news articles: {e}")

	# Chunk the aggregated content
	logging.debug("Chunking the aggregated content...")
	max_chunk = 500
	aggregated_content = aggregated_content.replace('.', '.<eos>')
	aggregated_content = aggregated_content.replace('?', '?<eos>')
	aggregated_content = aggregated_content.replace('!', '!<eos>')

	sentences = aggregated_content.split('<eos>')
	current_chunk = 0
	chunks = []
	for sentence in sentences:
	if len(chunks) == current_chunk + 1:
	if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk:
	chunks[current_chunk].extend(sentence.split(' '))
	else:
	current_chunk += 1
	chunks.append(sentence.split(' '))
	else:
	chunks.append(sentence.split(' '))

	for chunk_id in range(len(chunks)):
	chunks[chunk_id] = ' '.join(chunks[chunk_id])

	logging.debug(f"Chunks created: {chunks}")

	# Summarize the chunks
	logging.debug("Summarizing the chunks...")
	try:
	summaries = summarizer(chunks, max_length=120, min_length=30, do_sample=False)
	summary_text = " ".join([summary['summary_text'] for summary in summaries])

	# Reprocess the generated summary
	logging.debug("Reprocessing the summary for cohesiveness and elaboration...")
	final_summary = summarizer(summary_text, max_length=150, min_length=60, do_sample=False)[0]['summary_text']
	except Exception as e:
	logging.error(f"Error during summarization: {e}")
	final_summary = "An error occurred during summarization."

	logging.debug(f"Final summarized text: {final_summary}")
	return final_summary

	iface = gr.Interface(
	fn=summarize_news,
	inputs=[gr.Textbox(label="Query"), gr.Slider(minimum=1, maximum=10, value=3, label="Number of Results")],
	outputs="textbox",
	title="News Summarizer",
	description="Enter a query to get a consolidated summary of the top news articles."
	)

	if __name__ == "__main__":
	logging.debug("Launching Gradio interface...")
	iface.launch()