Spaces:

Shreyas94
/

World_News

Sleeping

World_News / app.py

Update app.py

3f6ed4f verified 9 months ago

1.62 kB

	import gradio as gr
	from transformers import pipeline
	from bs4 import BeautifulSoup
	import requests

	def summarize_blog_post(url):
	# Load summarization pipeline
	summarizer = pipeline("summarization")

	# Get blog post content
	r = requests.get(url)
	soup = BeautifulSoup(r.text, 'html.parser')
	results = soup.find_all(['h1', 'p'])
	text = [result.text for result in results]
	ARTICLE = ' '.join(text)

	# Chunk text
	max_chunk = 500
	ARTICLE = ARTICLE.replace('.', '.<eos>')
	ARTICLE = ARTICLE.replace('?', '?<eos>')
	ARTICLE = ARTICLE.replace('!', '!<eos>')

	sentences = ARTICLE.split('<eos>')
	current_chunk = 0
	chunks = []
	for sentence in sentences:
	if len(chunks) == current_chunk + 1:
	if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk:
	chunks[current_chunk].extend(sentence.split(' '))
	else:
	current_chunk += 1
	chunks.append(sentence.split(' '))
	else:
	chunks.append(sentence.split(' '))

	for chunk_id in range(len(chunks)):
	chunks[chunk_id] = ' '.join(chunks[chunk_id])

	# Summarize text
	summaries = summarizer(chunks, max_length=120, min_length=30, do_sample=False)
	summary_text = " ".join([summary['summary_text'] for summary in summaries])
	return summary_text

	iface = gr.Interface(
	fn=summarize_blog_post,
	inputs="text",
	outputs="text",
	title="Medium Blog Post Summarizer",
	description="Enter the URL of a Medium blog post to get a summarized version of the content."
	)

	iface.launch()