7jimmy's picture
Update app.py
26f56f5
raw
history blame
2.85 kB
from newspaper import Article
from newspaper import Config
import nltk
nltk.download('punkt')
from transformers import pipeline
import gradio as gr
from gradio.mix import Parallel, Series
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'
config = Config()
config.browser_user_agent = USER_AGENT
config.request_timeout = 10
url = 'https://www.technologyreview.com/2021/07/09/1028140/ai-voice-actors-sound-human/'
article = Article(url, config=config)
article.download()
article.parse()
authors = ", ".join(author for author in article.authors)
title = article.title
date = article.publish_date
text = article.text
image = article.top_image
videos = article.movies
url = article.url
print("Information about the article")
print("=" * 30)
print(f"Title: {title}")
print(f"Author(s): {authors}")
print(f"Publish date: {date}")
print(f"Image: {image}")
print(f"Videos: {videos}")
print(f"Article link: {url}")
print(f"Content: {text[:100] + '...'}")
article.nlp()
keywords = article.keywords
keywords.sort()
print(keywords)
print(f"Summary: \n{article.summary}")
io1 = gr.Interface.load('huggingface/sshleifer/distilbart-cnn-12-6')
io2 = gr.Interface.load("huggingface/facebook/bart-large-cnn")
io3 = gr.Interface.load("huggingface/google/pegasus-xsum")
io4 = gr.Interface.load("huggingface/sshleifer/distilbart-cnn-6-6")
iface = Parallel(io1, io2, io3, io4,
theme='huggingface',
inputs = gr.inputs.Textbox(lines = 10, label="Text"))
iface.launch()
def extract_article_text(url):
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'
config = Config()
config.browser_user_agent = USER_AGENT
config.request_timeout = 10
article = Article(url, config=config)
article.download()
article.parse()
text = article.text
return text
extractor = gr.Interface(extract_article_text, 'text', 'text')
summarizer = gr.Interface.load("huggingface/facebook/bart-large-cnn")
sample_url = [['https://www.technologyreview.com/2021/07/22/1029973/deepmind-alphafold-protein-folding-biology-disease-drugs-proteome/'],
['https://www.technologyreview.com/2021/07/21/1029860/disability-rights-employment-discrimination-ai-hiring/'],
['https://www.technologyreview.com/2021/07/09/1028140/ai-voice-actors-sound-human/']]
desc = '''
Let Hugging Face models summarize articles for you.
Note: Shorter articles generate faster summaries.
This summarizer uses bart-large-cnn model by Facebook
'''
iface = Series(extractor, summarizer,
inputs = gr.inputs.Textbox(
lines = 2,
label = 'URL'
),
outputs = 'text',
title = 'News Summarizer',
theme = 'huggingface',
description = desc,
examples=sample_url)
iface.launch()