Spaces:

7jimmy
/

Build_a_News_Article_Summarizer

Runtime error

File size: 2,849 Bytes

ef89f0a
 
 
 
 
 
 
26f56f5

from newspaper import Article
from newspaper import Config
import nltk
nltk.download('punkt')

from transformers import pipeline
import gradio as gr
from gradio.mix import Parallel, Series
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'

config = Config()
config.browser_user_agent = USER_AGENT
config.request_timeout = 10

url = 'https://www.technologyreview.com/2021/07/09/1028140/ai-voice-actors-sound-human/'
article = Article(url, config=config)
article.download() 
article.parse() 

authors = ", ".join(author for author in article.authors)
title = article.title
date = article.publish_date
text = article.text
image = article.top_image
videos = article.movies
url = article.url
print("Information about the article")
print("=" * 30)
print(f"Title: {title}")
print(f"Author(s): {authors}")
print(f"Publish date: {date}")
print(f"Image: {image}")
print(f"Videos: {videos}")
print(f"Article link: {url}")
print(f"Content: {text[:100] + '...'}")
article.nlp()
keywords = article.keywords
keywords.sort()
print(keywords)
print(f"Summary: \n{article.summary}")
io1 = gr.Interface.load('huggingface/sshleifer/distilbart-cnn-12-6')
io2 = gr.Interface.load("huggingface/facebook/bart-large-cnn")
io3 = gr.Interface.load("huggingface/google/pegasus-xsum")  
io4 = gr.Interface.load("huggingface/sshleifer/distilbart-cnn-6-6")                   

iface = Parallel(io1, io2, io3, io4,
                 theme='huggingface', 
                 inputs = gr.inputs.Textbox(lines = 10, label="Text"))

iface.launch()
def extract_article_text(url):
  USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'
  config = Config()
  config.browser_user_agent = USER_AGENT
  config.request_timeout = 10

  article = Article(url, config=config)
  article.download()
  article.parse()
  text = article.text
  return text
    extractor = gr.Interface(extract_article_text, 'text', 'text')
summarizer = gr.Interface.load("huggingface/facebook/bart-large-cnn")

sample_url = [['https://www.technologyreview.com/2021/07/22/1029973/deepmind-alphafold-protein-folding-biology-disease-drugs-proteome/'],
              ['https://www.technologyreview.com/2021/07/21/1029860/disability-rights-employment-discrimination-ai-hiring/'],
              ['https://www.technologyreview.com/2021/07/09/1028140/ai-voice-actors-sound-human/']]

desc =  '''
        Let Hugging Face models summarize articles for you. 
        Note: Shorter articles generate faster summaries.
        This summarizer uses bart-large-cnn model by Facebook
        '''

iface = Series(extractor, summarizer, 
  inputs = gr.inputs.Textbox(
      lines = 2,
      label = 'URL'
  ),
  outputs = 'text',
  title = 'News Summarizer',
  theme = 'huggingface',
  description = desc,
  examples=sample_url)

iface.launch()