World_News / app.py
Shreyas94's picture
Update app.py
3f6ed4f verified
raw
history blame
1.62 kB
import gradio as gr
from transformers import pipeline
from bs4 import BeautifulSoup
import requests
def summarize_blog_post(url):
# Load summarization pipeline
summarizer = pipeline("summarization")
# Get blog post content
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
results = soup.find_all(['h1', 'p'])
text = [result.text for result in results]
ARTICLE = ' '.join(text)
# Chunk text
max_chunk = 500
ARTICLE = ARTICLE.replace('.', '.<eos>')
ARTICLE = ARTICLE.replace('?', '?<eos>')
ARTICLE = ARTICLE.replace('!', '!<eos>')
sentences = ARTICLE.split('<eos>')
current_chunk = 0
chunks = []
for sentence in sentences:
if len(chunks) == current_chunk + 1:
if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk:
chunks[current_chunk].extend(sentence.split(' '))
else:
current_chunk += 1
chunks.append(sentence.split(' '))
else:
chunks.append(sentence.split(' '))
for chunk_id in range(len(chunks)):
chunks[chunk_id] = ' '.join(chunks[chunk_id])
# Summarize text
summaries = summarizer(chunks, max_length=120, min_length=30, do_sample=False)
summary_text = " ".join([summary['summary_text'] for summary in summaries])
return summary_text
iface = gr.Interface(
fn=summarize_blog_post,
inputs="text",
outputs="text",
title="Medium Blog Post Summarizer",
description="Enter the URL of a Medium blog post to get a summarized version of the content."
)
iface.launch()