from newspaper import Article

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")
model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")

import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize

def my_paraphrase(sentence):

  sentence = "paraphrase: " + sentence + " </s>"
  encoding = tokenizer.encode_plus(sentence,padding=True, return_tensors="pt")
  input_ids, attention_masks = encoding["input_ids"], encoding["attention_mask"]
  
  outputs = model.generate(
    input_ids=input_ids, attention_mask=attention_masks,
    max_length=256,
    do_sample=True,
    top_k=120,
    top_p=0.95,
    early_stopping=True,
    num_return_sequences=1)
  output = tokenizer.decode(outputs[0], skip_special_tokens=True,clean_up_tokenization_spaces=True)

  return(output)

def text(url):
  article = Article(url)
  article.download()
  article.parse()

  input_text = article.text
  output = " ".join([my_paraphrase(sent) for sent in sent_tokenize(input_text)])

  return output

import gradio as gr
def summarize(url):

  outputtext = text(url)
  return outputtext
gr.Interface(fn=summarize, inputs=gr.inputs.Textbox(lines=7, placeholder="Enter text here"), outputs=[gr.outputs.Textbox(label="Paraphrased Text")],examples=[["developed by python team"
]]).launch(inline=False)